# Source Code

## Import library

In [1]:
import os
import time
from operator import itemgetter

In [2]:
W: int #capable weight
m: int #number of classes
w = [] #weight of each item
v = [] #value of each item
c = [] #class of each item
n = len(v)

## Implementation

In [3]:
# initialize a class to store information of decision tree
class Node:
    def __init__(self, index, value, weight):
        # path: path from the root to the node
        # index: index of this node in the given item list
        # value: value of nodes on path from root to this node (including this node)
        # weight: weight of nodes on path from root to this node (including this node)
        self.path = []
        self.index = index
        self.value = value
        self.weight = weight

In [4]:
class Priority_Queue:
    def __init__(self):
        self.pqueue = []
        self.length = 0
    
    def insert(self, node):
        i = 0
        while i < len(self.pqueue):
            if self.pqueue[i].bound > node.bound:
                break
            i+=1
        self.pqueue.insert(i,node)
        self.length += 1

    def print_pqueue(self):
        for i in list(range(len(self.pqueue))):
            print ("pqueue",i, "=", self.pqueue[i].bound)
                    
    def remove(self):
        try:
            result = self.pqueue.pop()
            self.length -= 1
        except: 
            print("Priority queue is empty, cannot pop from empty list.")
        else:
            return result

In [5]:
# check if the considered solution satisfies the condition:
# select at least one item from each class or not
def check_knapsack (knapsack, m, c):
    knapsack_class = set()
    for i in knapsack:
        knapsack_class.add(c[i])
    # if the set of items contains all given class, then we return true
    if (len(knapsack_class) == m): return True
    #else return false
    return False

In [6]:
# function to sort item in term of the "profit" (value / weight) of each item in descending order
def sort_values_and_weights(values, weights, n, c):
    sortList = []
    for i in range(n):
        sortList.append((values[i], weights[i], values[i]/weights[i], i, c[i]))

    # sort by the "profit" value of item, but the output list is sorted in ascending order
    sortList = sorted(sortList, key=itemgetter(2))
    # so we reverse it to have a sorted list in descending order
    sortList.reverse()

    # initialize list to get essential value from the sorted list
    sorted_values = []
    sorted_weights = []
    indexes = []
    classes = []

    for a in sortList:
        # get items' values in sorted list
        sorted_values.append(a[0])
        # get items' weights in sorted list
        sorted_weights.append(a[1])
        # get items' indexes in sorted list
        indexes.append(a[3])
        # get items' classes in sorted list
        classes.append(a[4])

    return sorted_values, sorted_weights, indexes, classes

In [7]:
# Returns bound of value in subtree rooted with 'node'
def bound(node, n, W, values, weights):

    # if weight overcomes the knapsack capacity, return
    # 0 as expected bound
    if node.weight >= W:
        return 0

    else:
        # initialize bound on value by current value
        bound = node.value
        # initialize essential information for calculating
        total_weight = node.weight
        j = node.index + 1

        # start from the item after current item and sum up all value until
        # reaching the last item or total weight exceed the capability
        # these codes are to calculate bound and find the first item that
        # does not fit completely in the knapsack
        while ((j < n) and (total_weight + weights[j] <= W)):
            total_weight = total_weight + weights[j]
            bound = bound + values[j]
            j = j + 1

        k = j
        # if it's still not the last item
        if k < n:
            # then calculate the fraction of item's value that 
            # still fits in the knapsack 
            bound = bound + (W - total_weight) * values[k] / weights[k]

        return bound

In [8]:
# implement the algorithm
def branch_and_bound_knapsack(W, values, weights, c, m):
    n = len(values)

    # sorting item on basis of value per weight
    values, weights, indexes, c = sort_values_and_weights(values, weights, n, c)

    # initializing a priority queue for traversing
    pq = Priority_Queue()
    # initializing a list to store result
    result = []

    # start with a node of index -1 to
    # act as previous node
    v = Node(-1, 0, 0)
    # initializing highest value as 0
    highest_value = 0

    total_weight = 0

    v.bound = bound(v, n, W, values, weights)

    pq.insert(v)

    while pq.length != 0:
        # get the first node in the queue
        v = pq.remove()

        if(v.bound > highest_value):
            # if it's the last item, then stop branching
            if v.index == n-1:
                continue

            # current node
            u = Node(0, 0, 0)

            # compute the value and weight of current node
            # by adding current node's value and weight to 
            # the accumulated value and weight stored in previous node
            u.index = v.index + 1
            u.value = v.value + values[u.index]
            u.weight = v.weight + weights[u.index]

            # set the path from root to current node

            u.path = v.path.copy()

            u.path.append(indexes[u.index]) # adds current item to path

            # if total weight until now is less than the capability
            # and total value is higher than current highest value
            # then update highest value and result path

            if (u.weight <= W and u.value > highest_value): 
                if check_knapsack(u.path, m, c):
                    # update highest_value
                    total_weight = u.weight
                    highest_value = u.value
                    result = u.path

            # get the upper bound to decide whether we should continue
            # add more nodes and traverse on this branch that have current node        
            u.bound = bound(u, n, W, values, weights)
            
            # if the upper bound is not higher than current highest value
            # then it is useless to keep considering this branch
            # else we push this node into queue for further consideration
            if u.bound > highest_value:
                pq.insert(u)

            # create a child node indicate branch that does not contain 
            # current node
            u2 = Node(u.index, v.value, v.weight)
            
            # do the same thing with this node (calculate upper bound
            # and check whether we should add this node into queue)
            u2.bound = bound(u2, n, W, values, weights)
            u2.path = v.path.copy()

            if u2.bound > highest_value:
                pq.insert(u2)

    # after iterating through all possible outcomes and found the most suitable solution
    # we save result as a list of 0 and 1; 1 for chosen item and 0 for others
    result_path = [0] * len(w)
    for i in result:
        result_path[i] = 1

    # return result
    return result_path, highest_value, total_weight

In [9]:
# set path for input files
input_dir='data/input/' 
for file in os.listdir(input_dir):
    # open each file
    with open(input_dir+file) as f:
        # check if that file is .txt file or not
        if (file[-4:]!='.txt'): continue
        # read 5 input strings from file to variables
        capacity,class_num,weight,val,label=f.readlines()
        # set value for W and m
        W,m=int(capacity),int(class_num)
        # set value for w 
        w=weight.replace(' ','').replace('\n','')
        w=[float(num) for num in weight.split(',')]
        # set value for v
        v=val.replace(' ','').replace('\n','')
        v=[float(num) for num in val.split(',')]
        # set value for c
        c=label.replace(' ','').replace('\n','')
        c=[float(num) for num in label.split(',')]

        start = time.time()
        (result, highest_value, total_weight) = (branch_and_bound_knapsack(W, v, w, c, m))
        end = time.time()

        # print the result list, highest value and total time needed to run the algorithm
        print(file)
        print(f'The result is: {result}\nWith the value of: {highest_value} and total weight: {total_weight})')
        print(f'Complete searching in: {end-start} seconds\n')

input_001.txt
The result is: [0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
With the value of: 117.0 and total weight: 100.0)
Complete searching in: 0.0 seconds

input_002.txt
The result is: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
With the value of: 1000.0 and total weight: 10.0)
Complete searching in: 0.0 seconds

input_003.txt
The result is: [1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
With the value of: 14211.0 and total weight: 8729.0)
Complete searching in: 0.000997304916381836 seconds

input_004.txt
The result is: [0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0]
With the value of: 6107.0 and total weight: 4923.0)
Complete searching in: 0.05583500862121582 seconds

input_005 (trap).txt
The result is: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
With the value of: 0 and total weight: 0)
Complete searching in: 0.0009975433349609375 seconds

input_006.txt
The result is: [1, 1, 1, 1, 1,

# Report

## Algorithm Description

The tree's branches, which stand in for subsets of the solution set, are investigated in Branch and Bound. This algorithm's basic principle is to recursively branch and prune the search tree by determining an upper bound, or the highest possible value, and then deciding whether or not to continue traversing that branch. We can eliminate that branch to save time if it is unable to generate a better solution than the best one so far discovered.

The idea after calculating upper bound, which is the maximum potential value that branch can get, is that: if the upper bound for some node is not exceed the highest value, meaning that it not worth keeping this branch since we will never get the optimal solution on this branch, thus we can safely discarded it from the tree. 

## Pseudo Code

*This is pseudo code for function checking if the list contains all classes or not*

```
check_knapsack(knapsack: list, m, c) return bool
    knapsack_class is set
    FOR i IN knapsack
        BEGIN
            knapsack_class add c[i]
        END
    IF (size knapsack_lass = m)
        return True
        ENDIF
    return False
```

*This is pseudo code for function sorting items based on value per weight*

```
sort_value_and_weight(values, weights, n, c) return sorted_values, sorted_weights, indexes, classes
    sortList <- []
    FOR i < n:
        BEGIN
        sortList add (values[i], weights[i], values[i]/weights[i], i, c[i])
        END

    sort sortList based on the third element in list
    reverse sortList

    sorted_values <- []
    sorted_weights <- []
    indexes <- []
    classes <- []

    FOR a IN sortList:
        BEGIN
        sorted_values <- add a[0]
        sorted_weights <- add a[1]
        indexes <- add a[3]
        classes <- add a[3]
        END

    return sorted_values, sorted_weights, indexes, classes
```
*This is the pseudo code for function calculating bound*

```
bound(node, n, W, values, weights) return int
    IF node.weight >= W
        return 0
        ENDIF

    else:
        bound <- node.value
        total_weight <- node.weight
        j <- node.index + 1

        WHILE (j < n) AND total_weight + weights[j] <= W
            total_weight += weights[j]
            bound += values[j]
            j += 1

        if j < n:
            bound += (W - total_weight) * (values[j]/weights[j])
            ENDIF

        return bound
```

*This is the pseudo code for function implementing branch and bound algorithm*

```
branch_and_bound_knapsack(W, values, weights, c, m) return highest_value, result

    n <- size of values

    values, weights, indexes, c <- sort_values_and_weights(values, weights, n, c)

    pq <- []
    result = []

    v <- Node(-1, 0, 0)
    node <- Node(0, 0, 0)
    highest_value <- 0

    pq add v

    WHILE pq:
        
        v pop first element in pq

        if(v.index = -1)
            node.index <- 0
            ENDIF

        if node.index = n-1
            CONTINUE
            ENDIF

        node.index <- v.index + 1

        node.value <- v.value + values[node.index]
        node.weight <- v.weight + weights[node.index]

        node.path copy v.path
        node.path add indexes[node.index]

        IF node.weight <= W AND node.value > highest_value
            IF check_knapsack(node.path, m, c)
                highest_value <- node.value
                result <- node.path
                ENDIF
            ENDIF

        node.bound <- bound(node, n, W, values, weights)

        if node.bound > highest_value
            pq add node
            ENDIF

        node <- Node(node.index, v.value, v.weight)
        node.bound <- bound(node, n, W, values, weights)
        node.path copy v.path

        if node.bound > highest_value
            pq add node
            ENDIF

    return highest_value, result
```

## Algorithm Explanation

To implement this algorithm, first we have to sort the item list on basis of value / weight to make sure that we prioritize those that have larger value and smaller weight so that we can add them first when calculating bound and not miss them. 

Then we implement a function to calculate bound. The idea is that we will continuously add item's value in the sorted list until it reach the capability of the knapsack. If the knapsack still has some weights left, we calculate the fraction of the next item that still fits in the knapsack. In the end, we sum up all values from the current item to the first n-1 items and a fraction of item s.

$$\text{bound(node s) }= v[s] + \sum_{i=s+1}^{n}v[i]+((W - \sum_{i=s+1}^{n}w[i]) / w[n+1]) *  v[n+1]$$

where the nodes from index s+1 to n denote items can be fully fit into the knapsack and the node at n+1 is fit into the knapsack whose capacity is W with a fractional value. 

For the main function implement the algorithm, we first start with an empty queue and dummy node, then perform best-first traversal on the remaining nodes from the root node. I implement this step by using a priority queue. Each node will branch out two child nodes represent for decisions of whether we take the parent node or not. At each node update the current weights and values, calculate the bound and update the global optimum or prune if necessary

## Evaluate

Comparing to Brute Fore, Branch and Bound complete the search much more faster (each test cases needs about half a second), and the I also did not get into trouble with any memory-related issue, so it is reasonable to say that Branch and Bound does not use as much memory as Brute Force. Additionally, similar to Brute Force, Branch and Bound is a global search algorithm; thus, it also returns the best solution for the problem. 

## Comments

Other heuristics beyond the one I used to trim the tree may also be utilized by Branch and Bound. Calculating the lower and upper bounds for each node in state space is one example. Kill that node if the lower bound exceeds the higher bound. If not, choose a node with the lowest lower bound. Alternatively, another heuristic strategy is to stop branching when the distance between the upper and lower boundaries is below a certain threshold.

Additionally, a variety of queue data structures can be used. An implementation using a FIFO queue produces a breadth-first search. In a stack (LIFO queue), a depth-first algorithm will be produced. While a priority queue that sorts nodes on their lower bound may be utilized to produce the best-first branch and bound technique, which is what I used

## Conclusion

The effectiveness of the algorithm significantly relies on a suitable heuristic function for the search space's branch boundaries. The procedure degenerates into an exhaustive search if bounds are not supplied. Branch and Bound can rapidly and efficiently arrive to the answer with the right heuristic function, though. Consequently, it may be stated that this method successfully solves issues, particularly those involving optimization.