In [171]:
import time
import numpy as np
import pandas as pd
import math
from heapdict import heapdict
np.set_printoptions(suppress=True)

In this programming problem and the next you'll code up the greedy algorithms from lecture for minimizing the weighted sum of completion times..

Download the text file below.

jobs.txt
This file describes a set of jobs with positive and integral weights and lengths. It has the format

[number_of_jobs]

[job_1_weight] [job_1_length]

[job_2_weight] [job_2_length]

...

For example, the third line of the file is "74 59", indicating that the second job has weight 74 and length 59.

You should NOT assume that edge weights or lengths are distinct.

Your task in this problem is to run the greedy algorithm that schedules jobs in decreasing order of the difference (weight - length). Recall from lecture that this algorithm is not always optimal. IMPORTANT: if two jobs have equal difference (weight - length), you should schedule the job with higher weight first. Beware: if you break ties in a different way, you are likely to get the wrong answer. You should report the sum of weighted completion times of the resulting schedule --- a positive integer --- in the box below.

ADVICE: If you get the wrong answer, try out some small test cases to debug your algorithm (and post your test cases to the discussion forum).

In [45]:
def scheduling_difference(filename):
    data = pd.read_csv('week9_file/'+filename,skiprows=[0], sep=" ", header=None)
    data.columns = ["weight", "length"]
    data['difference'] = data['weight'] - data['length']
    data.sort_values(by=['difference','weight'],ascending=False,inplace=True)
    completion_time = (data['length'].cumsum()).dot(data['weight'])
    return completion_time

In [46]:
start_time = time.time()
print(scheduling_difference('week9_1.txt'))
print("--- %s seconds ---" % (time.time() - start_time))

69119377652
--- 0.05358290672302246 seconds ---


For this problem, use the same data set as in the previous problem.

Your task now is to run the greedy algorithm that schedules jobs (optimally) in decreasing order of the ratio (weight/length). In this algorithm, it does not matter how you break ties. You should report the sum of weighted completion times of the resulting schedule --- a positive integer --- in the box below.

In [47]:
def scheduling_ratio(filename):
    data = pd.read_csv('week9_file/'+filename,skiprows=[0], sep=" ", header=None)
    data.columns = ["weight", "length"]
    data['ratio'] = data['weight'] / data['length']
    data.sort_values(by=['ratio'],ascending=False,inplace=True)
    completion_time = (data['length'].cumsum()).dot(data['weight'])
    return completion_time

In [48]:
start_time = time.time()
print(scheduling_ratio('week9_1.txt'))
print("--- %s seconds ---" % (time.time() - start_time))

67311454237
--- 0.028902769088745117 seconds ---


In this programming problem you'll code up Prim's minimum spanning tree algorithm.

Download the text file below.

edges.txt
This file describes an undirected graph with integer edge costs. It has the format

[number_of_nodes] [number_of_edges]

[one_node_of_edge_1] [other_node_of_edge_1] [edge_1_cost]

[one_node_of_edge_2] [other_node_of_edge_2] [edge_2_cost]

...

For example, the third line of the file is "2 3 -8874", indicating that there is an edge connecting vertex #2 and vertex #3 that has cost -8874.

You should NOT assume that edge costs are positive, nor should you assume that they are distinct.

Your task is to run Prim's minimum spanning tree algorithm on this graph. You should report the overall cost of a minimum spanning tree --- an integer, which may or may not be negative --- in the box below.

IMPLEMENTATION NOTES: This graph is small enough that the straightforward O(mn) time implementation of Prim's algorithm should work fine. OPTIONAL: For those of you seeking an additional challenge, try implementing a heap-based version. The simpler approach, which should already give you a healthy speed-up, is to maintain relevant edges in a heap (with keys = edge costs). The superior approach stores the unprocessed vertices in the heap, as described in lecture. Note this requires a heap that supports deletions, and you'll probably need to maintain some kind of mapping between vertices and their positions in the heap.

In [142]:
def read_input(filename):
    first = True
    with open('week9_file/'+filename) as f:
        G = {}
        for line in f:
            line = line.split() # to deal with blank 
            if line and not first:            # lines (ie skip them)
                v = int(line[0])
                w = int(line[1])
                weight = int(line[2])
                if G.get(v) == None:
                    G[v] = [(v,w,weight)]
                else:
                    G[v].append((v,w,weight))
                if G.get(w) == None:
                    G[w] = [(w,v,weight)]
                else:
                    G[w].append((w,v,weight))
            else:
                num_nodes = int(line[0])
                num_edges = int(line[1])
                first = False
                    
    return G,num_nodes,num_edges

In [153]:
def prims_algorithm(filename):
    G,num_nodes,num_edges = read_input(filename)
    X = [1]
    T = []
    W = []
    while len(X) != num_nodes:
        vw_min = []
        for vertices in X:
            if G.get(vertices) != None:
                vw_pair_list = [vw_pair for vw_pair in G[vertices] if vw_pair[1] not in X]
                if len(vw_pair_list) == 0:
                    pass
                else:
                    u_min,v_min,weight_vw_min = min(vw_pair_list, key = lambda t: t[2])
                    vw_min.append((u_min,v_min,weight_vw_min))
        
        u_min_all,v_min_all,weight_vw_min_all = min(vw_min, key = lambda t: t[2])
        X.append(v_min_all)
        W.append(weight_vw_min_all)
    
    return sum(W)

In [156]:
start_time = time.time()
print(prims_algorithm('week9_2_test1.txt')) #-97121
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
print(prims_algorithm('week9_2_test2.txt')) #-64386
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
print(prims_algorithm('week9_2_test3.txt')) #-7430
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
print(prims_algorithm('week9_2_test4.txt')) #-12829
print("--- %s seconds ---" % (time.time() - start_time))

-97121
--- 0.0035152435302734375 seconds ---
-64386
--- 0.0031881332397460938 seconds ---
-7430
--- 0.008777856826782227 seconds ---
-12829
--- 0.00092315673828125 seconds ---


In [183]:
start_time = time.time()
print(prims_algorithm('week9_2.txt')) #-3612829
print("--- %s seconds ---" % (time.time() - start_time))

-3612829
--- 4.225016117095947 seconds ---


In [181]:
def prims_algorithm_heap(filename):
    G,num_nodes,num_edges = read_input(filename)
    X = [1]
    T = []
    W = []
    hd = heapdict()
    for i in list(G.keys()):
        if i not in X:
            hd[i] = math.inf
    
    for v,w,weight_vw in G[1]:
        hd[w] = weight_vw
    
    while any(hd):
        v_min,weight_min = hd.popitem()
        W.append(weight_min)
        for (v,w,weight_wv) in G[v_min]:
            if hd.get(w) != None: #if it is in V-X
                hd[w] = min(hd[w],weight_wv)
                
    return sum(W)

In [182]:
start_time = time.time()
print(prims_algorithm_heap('week9_2_test1.txt')) #-97121
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
print(prims_algorithm_heap('week9_2_test2.txt')) #-64386
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
print(prims_algorithm_heap('week9_2_test3.txt')) #-7430
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
print(prims_algorithm_heap('week9_2_test4.txt')) #-12829
print("--- %s seconds ---" % (time.time() - start_time))

-97121
--- 0.01171731948852539 seconds ---
-64386
--- 0.0029458999633789062 seconds ---
-7430
--- 0.001644134521484375 seconds ---
-12829
--- 0.001481771469116211 seconds ---


In [184]:
start_time = time.time()
print(prims_algorithm_heap('week9_2.txt')) #--3612829
print("--- %s seconds ---" % (time.time() - start_time))

-3612829
--- 0.10876202583312988 seconds ---


In [93]:
G,num_nodes,num_edges = read_input('week9_2.txt')

In [94]:
a = [(1,2),(3,4)]

In [95]:
b = []

In [96]:
b.append(*a)
print(b)

TypeError: append() takes exactly one argument (2 given)

In [97]:
G.get(10) != None

True

In [98]:
min(G[1], key = lambda t: t[2])

(1, 397, -5942)

In [99]:
print(prims_algorithm('week9_2_test1.txt'))

-56742


In [101]:
a =np.loadtxt('week9_file/week9_2_test1.txt',skiprows=1)
print(a)

[[    1.     2. -1833.]
 [    2.     3. -6643.]
 [    3.     4.  4589.]
 [    4.     5.  5877.]
 [    5.     6.  3712.]
 [    6.     7.   -32.]
 [    7.     8. -5199.]
 [    8.     9. -6875.]
 [    9.    10.   678.]
 [   10.    11. -6776.]
 [   11.    12. -1288.]
 [   12.    13. -1756.]
 [   13.    14.  4956.]
 [   14.    15.   395.]
 [   15.    16. -9792.]
 [   16.    17.  2817.]
 [   17.    18.  8432.]
 [   18.    19.  4996.]
 [   19.    20. -4048.]
 [   20.    21.  6410.]
 [   21.    22. -9713.]
 [   22.    23. -7890.]
 [   23.    24.  6979.]
 [   24.    25. -4616.]
 [   25.    26.  2224.]
 [   26.    27. -9646.]
 [   27.    28. -6172.]
 [   28.    29.   186.]
 [   29.    30. -7284.]
 [   30.    31.  2251.]
 [   31.    32.  3275.]
 [   32.    33. -2248.]
 [   33.    34. -6452.]
 [   34.    35.  3831.]
 [   35.    36.  3987.]
 [   36.    37. -3058.]
 [   37.    38. -8416.]
 [   38.    39.  4086.]
 [   39.    40.  9962.]
 [    1.    17. -7663.]
 [    2.    19.  7288.]
 [    4.     8. 

In [43]:
start_time = time.time()
print(two_sum_problem_dict('week8_test2.txt',0,4))
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
print(two_sum_problem_sort('week8_test2.txt',0,4))
print("--- %s seconds ---" % (time.time() - start_time))


NameError: name 'time' is not defined

In [71]:
start_time = time.time()
print(two_sum_problem_dict('week8_test3.txt',-10000,10000))
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
print(two_sum_problem_sort('week8_test3.txt',-10000,10000))
print("--- %s seconds ---" % (time.time() - start_time))

13
--- 0.30687427520751953 seconds ---
13
--- 0.0005242824554443359 seconds ---


In [72]:
start_time = time.time()
print(two_sum_problem_sort('week8.txt',-10000,10000)) #427
print("--- %s seconds ---" % (time.time() - start_time))

427
--- 2.2963762283325195 seconds ---


In [None]:
start_time = time.time()
print(two_sum_problem_dict('week8.txt',-10000,10000)) #427
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
print(numb)

In [None]:
my_dict = {}
some_key = 1
my_dict[some_key] = my_dict.get(some_key, 0) + 1


In [None]:
for k,v in my_dict.items():
    print(k,v)

In [None]:
my_dict[some_key] = my_dict.get(some_key, 0) + 1

In [None]:
for k,v in my_dict.items():
    print(k,v)

In [None]:
for k in my_dict.keys():
    print(k)

In [None]:
 np.searchsort[x+1 for x in mylist]ed([1,2,3,4,5], 2,'left')

In [None]:
 np.searchsorted([1,2,3,4,5], 4,'right')

In [None]:
rng = np.arange(1,4)

In [None]:
print(rng)

In [9]:
arr = [2,3,3,4,5,122,3,4]
arr.sort()

In [11]:
arr[np.where(4<arr<8)]

TypeError: unorderable types: int() < list()

In [12]:
a = np.array([1,2,3,4,5,6,7,8,9])
b = a[(a>2) & (a<8)]

In [18]:
b

array([3, 4, 5, 6, 7])

In [19]:
b + 3

array([ 6,  7,  8,  9, 10])

In [48]:
import bisect

In [50]:
 bisect.bisect_right([1, 5, 9, 12, 18, 35], 6)

2