In [2]:
import time
import numpy as np
import itertools
np.set_printoptions(suppress=True)
import sys
sys.setrecursionlimit(3000)

In this programming problem and the next you'll code up the knapsack algorithm from lecture.

Let's start with a warm-up. Download the text file below.

This file describes a knapsack instance, and it has the following format:

[knapsack_size][number_of_items]

[value_1] [weight_1]

[value_2] [weight_2]

...

For example, the third line of the file is "50074 659", indicating that the second item has value 50074 and size 659, respectively.

You can assume that all numbers are positive. You should assume that item weights and the knapsack capacity are integers.

In the box below, type in the value of the optimal solution.

ADVICE: If you're not getting the correct answer, try debugging your algorithm using some small test cases. And then post them to the discussion forum!


In [30]:
def knapsack_problem_small(filename):
    first = True
    counter = 0 
    with open('week12_file/'+filename) as f:
        for line in f:
            line = line.split() # to deal with blank 
            if line and not first:            # lines (ie skip them)
                value = int(line[0])
                weight = int(line[1])
                knapsack_instance[counter,0] = value
                knapsack_instance[counter,1] = weight
                counter+=1
            else:
                knapsack_size = int(line[0])
                num_items = int(line[1])
                knapsack_instance = np.zeros((num_items,2),dtype=int)
                first = False
                
    #A[i,x] = value of best solution that: 1) uses only the first i item 2) has total size <= x          
    A = np.zeros((num_items,knapsack_size),dtype=int)
    
    for i in range(1,num_items):
        for x in range(knapsack_size):
            vi = knapsack_instance[i,0]
            wi = knapsack_instance[i,1]
            #edge cases if weight is more than knapsack size
            if wi>x:
                A[i,x] = A[i-1,x]
            else:
            #Compute the best solution considering two cases: 1) item i excluded 2) item i included
                A[i,x] = max(A[i-1,x],A[i-1,x-wi]+vi)
    
    return A[-1,-1]
    

In [31]:
start_time = time.time()
print(knapsack_problem_small('week12_test1.txt')) #147
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
print(knapsack_problem_small('week12_test2.txt')) #210
print("--- %s seconds ---" % (time.time() - start_time))

147
--- 0.014214038848876953 seconds ---
210
--- 0.0043487548828125 seconds ---


In [60]:
start_time = time.time()
print(knapsack_problem_small('week12_1.txt')) #2493893
print("--- %s seconds ---" % (time.time() - start_time))

2493893
--- 1.9353950023651123 seconds ---


This problem also asks you to solve a knapsack instance, but a much bigger one.

Download the text file below.

This file describes a knapsack instance, and it has the following format:

[knapsack_size][number_of_items]

[value_1] [weight_1]

[value_2] [weight_2]

...

For example, the third line of the file is "50074 834558", indicating that the second item has value 50074 and size 834558, respectively. As before, you should assume that item weights and the knapsack capacity are integers.

This instance is so big that the straightforward iterative implemetation uses an infeasible amount of time and space. So you will have to be creative to compute an optimal solution. One idea is to go back to a recursive implementation, solving subproblems --- and, of course, caching the results to avoid redundant work --- only on an "as needed" basis. Also, be sure to think about appropriate data structures for storing and looking up solutions to subproblems.

In the box below, type in the value of the optimal solution.

ADVICE: If you're not getting the correct answer, try debugging your algorithm using some small test cases. And then post them to the discussion forum!

In [25]:
def knapsack_recursive(knapsack_instance,i,x,memoization=None):
    '''Recursive approach of knapsack probllem. It utilizes hash table to store up the subproblem solution.'''
    
    #Base case
    if i == 0:
        return 0
    vi = knapsack_instance[i,0]
    wi = knapsack_instance[i,1]
    
    if memoization is None:
        memoization= {}
    
    #If we have not computed the subproblem before, recursively solves the solution.
    #If we have solved, retrieved it from hash table
    if memoization.get((i-1,x)) == None:
        S1 = knapsack_recursive(knapsack_instance,i-1,x,memoization)
        memoization[(i-1,x)] = S1
    else:
        S1 = memoization[(i-1,x)]
    
    #if weight is bigger than the knapsack size, return S1
    if wi >x:
        return S1
    else:
        if memoization.get((i-1,x-wi)) == None:
            S2 = knapsack_recursive(knapsack_instance,i-1,x-wi,memoization)
            memoization[(i-1,x-wi)] = S2 
        else:
            S2 = memoization[(i-1,x-wi)]
    return max(S1,S2+vi)

In [60]:
def knapsack_problem_big_recursive(filename):
    
    first = True
    counter = 0 
    with open('week12_file/'+filename) as f:
        for line in f:
            line = line.split() # to deal with blank 
            if line and not first:            # lines (ie skip them)
                value = int(line[0])
                weight = int(line[1])
                knapsack_instance[counter,0] = value
                knapsack_instance[counter,1] = weight
                counter+=1
            else:
                knapsack_size = int(line[0])
                num_items = int(line[1])
                knapsack_instance = np.zeros((num_items,2),dtype=int)
                first = False
                
    #Sort will make the recursion faster, but the sorting itself takes time
    #np.sort(knapsack_instance,axis=1)
    optim_value = knapsack_recursive(knapsack_instance,num_items-1,knapsack_size-1)
    
    return optim_value
    

In [61]:
start_time = time.time()
print(knapsack_problem_big_recursive('week12_test1.txt')) #147
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
print(knapsack_problem_big_recursive('week12_test2.txt')) #210
print("--- %s seconds ---" % (time.time() - start_time))

147
--- 0.004553079605102539 seconds ---
210
--- 0.0032570362091064453 seconds ---


In [62]:
start_time = time.time()
print(knapsack_problem_big_recursive('week12_1.txt')) #2493893
print("--- %s seconds ---" % (time.time() - start_time))

2493893
--- 3.2261688709259033 seconds ---


In [63]:
start_time = time.time()
print(knapsack_problem_big_recursive('week12_2.txt')) #4243395
print("--- %s seconds ---" % (time.time() - start_time))

4243395
--- 50.0116491317749 seconds ---


In [28]:
def knapsack_problem_big_iterative(filename):
    
    first = True
    counter = 0 
    with open('week12_file/'+filename) as f:
        for line in f:
            line = line.split() # to deal with blank 
            if line and not first:            # lines (ie skip them)
                value = int(line[0])
                weight = int(line[1])
                knapsack_instance[counter,0] = value
                knapsack_instance[counter,1] = weight
                counter+=1
            else:
                knapsack_size = int(line[0])
                num_items = int(line[1])
                knapsack_instance = np.zeros((num_items,2),dtype=int)
                first = False
    A = np.zeros((knapsack_size),dtype=int)
    
    #The trick is to scan the array in reversing order, i.e., from big end to small end. In other words,
    #we do the computation from big knapsack size to small knapsack size.
    
    #A[x]=max{A[x],A[x-wi]+vi}
    
    #The reason is that A[x] is updated by either unchanging or using A[x-wi] which
    #represents smaller knapsack size than x. Consequently, by such reversed scan,
    #you will never erase your previous result.
    
    for i in range(1,num_items):
        for x in range(knapsack_size-1,0,-1):
            vi = knapsack_instance[i,0]
            wi = knapsack_instance[i,1]
            if wi <=x:
                A[x] = max(A[x],A[x-wi]+vi)
    return A[-1]

In [29]:
start_time = time.time()
print(knapsack_problem_big_iterative('week12_test1.txt')) #147
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
print(knapsack_problem_big_iterative('week12_test2.txt')) #210
print("--- %s seconds ---" % (time.time() - start_time))

147
--- 0.011773109436035156 seconds ---
210
--- 0.002624988555908203 seconds ---


In [23]:
start_time = time.time()
print(knapsack_problem_big_iterative('week12_1.txt')) #2493893
print("--- %s seconds ---" % (time.time() - start_time))

2493893
--- 1.6817128658294678 seconds ---


In [None]:
start_time = time.time()
print(knapsack_problem_big_iterative('week12_2.txt')) #4243395
print("--- %s seconds ---" % (time.time() - start_time))