## Code up some greedy algorithms, starting with a scheduling problem

In [8]:
## Get the job list into the notebook
with open('Downloads/algo2jobs.txt') as f:
    jobs = f.readlines()

In [9]:
print(jobs[0], len(jobs))

10000
 10001


In [10]:
jobs[1] 
## job weight, job length

'8 50\n'

In [12]:
jobs = [(int(wt), int(ln)) for [wt, ln] in [job.strip('\n').split() for job in jobs[1:]]]

In [13]:
jobs[0]

(8, 50)

In [14]:
jobs[9999]

(68, 15)

Problem 1:  Your task in this problem is to run the greedy algorithm that schedules jobs in decreasing order of the difference (weight - length). Recall from lecture that this algorithm is not always optimal. IMPORTANT: if two jobs have equal difference (weight - length), you should schedule the job with higher weight first. Beware: if you break ties in a different way, you are likely to get the wrong answer. You should report the sum of weighted completion times of the resulting schedule --- a positive integer --- as the answer.

In [26]:
jobs1 = sorted(jobs, key=lambda x: (x[0] - x[1], x[0]), reverse=True)

In [27]:
jobs1[:10]

[(99, 1),
 (100, 3),
 (100, 3),
 (99, 2),
 (99, 2),
 (98, 1),
 (98, 2),
 (98, 2),
 (98, 2),
 (99, 4)]

In [32]:
answer = 0
time = 0
for i in range(len(jobs1)):
    time += jobs1[i][1]
    answer += time * jobs1[i][0]
print(answer)

69119377652


Problem 2:  Your task now is to run the greedy algorithm that schedules jobs (optimally) in decreasing order of the ratio (weight/length). In this algorithm, it does not matter how you break ties. You should report the sum of weighted completion times of the resulting schedule --- a positive integer --- as the answer.

In [29]:
jobs2 = sorted(jobs, key=lambda x: x[1] / x[0])

In [30]:
jobs2[:10]

[(99, 1),
 (98, 1),
 (95, 1),
 (95, 1),
 (93, 1),
 (93, 1),
 (92, 1),
 (88, 1),
 (87, 1),
 (86, 1)]

In [33]:
answer = 0
time = 0
for i in range(len(jobs2)):
    time += jobs2[i][1]
    answer += time * jobs2[i][0]
print(answer)

67311454237


## Now for some Prim's MST

In [37]:
## Get the graph into the notebook
## This file describes an undirected graph with integer edge costs.
with open('Downloads/algo2edges.txt') as f:
    graph = f.readlines()

In [38]:
graph[0]   ## numNodes, numEdges

'500 2184\n'

In [39]:
graph[1]   ## u, v, length

'1 2 6807\n'

In [46]:
'2 1 6807\n' in graph

False

In [40]:
len(graph)

2185

In [53]:
## build a dict representation of the graph
graphDict = dict()
for e in graph[1:]:
    e = e.strip('\n').split()
    if e[0] in graphDict:
        graphDict[e[0]].add((e[1], int(e[2])))
    else: 
        graphDict[e[0]] = {(e[1], int(e[2]))}
    if e[1] in graphDict:
        graphDict[e[1]].add((e[0], int(e[2])))
    else: 
        graphDict[e[1]] = {(e[0], int(e[2]))}

In [54]:
graphDict['1']

{('132', -151),
 ('171', 8358),
 ('2', 6807),
 ('244', 6723),
 ('310', 9791),
 ('316', 569),
 ('324', -1612),
 ('397', -5942),
 ('414', 3655)}

In [55]:
graphDict['2']

{('1', 6807),
 ('104', -8744),
 ('157', 1036),
 ('173', -7751),
 ('25', 6267),
 ('3', -8874),
 ('309', -7230),
 ('39', 907)}

In [56]:
graphList = [edge.strip('\n').split() for edge in graph[1:]]

In [57]:
graphList[:3]

[['1', '2', '6807'], ['2', '3', '-8874'], ['3', '4', '-1055']]

In [58]:
for e in graphList:
    e[2] = int(e[2])

In [59]:
graphList[:3]

[['1', '2', 6807], ['2', '3', -8874], ['3', '4', -1055]]

In [65]:
graphList = [tuple(x) for x in graphList]  ## nicer to be able to hash each edge for lookup in the tree

In [66]:
graphList[:3]

[('1', '2', 6807), ('2', '3', -8874), ('3', '4', -1055)]

In [67]:
bkwds = set()
for e in graphList:
    if int(e[0]) > int(e[1]):
        bkwds.add(e)

In [68]:
len(bkwds)

0

OK, so smaller endpoint is always listed first in the tuple, which will help locate edges later, possibly

## Implement a heap structure that will allow for log(n) time deletion of items and constant time extraction of min-length items

In [47]:
## First, a way to compare heap nodes by shortest length key. 
def shorter(node1, node2):
    return node1[-1] <= node2[-1]

In [48]:
## Next, a utility to swap in place two array items while optionally maintaining a list of their locations in the array
def swap(arr, i, j, locator=None):
    if locator:
        locator[arr[i][0]] = j
        locator[arr[j][0]] = i
    temp = arr[i]
    arr[i] = arr[j]
    arr[j] = temp

In [76]:
def bubbleUp(array, newIndex, locDict=None):
    while newIndex > 0:
        oldIndex = (newIndex + 1) // 2 - 1
        if shorter(array[oldIndex], array[newIndex]):
            return
        swap(array, newIndex, oldIndex, locDict)
        newIndex = oldIndex

In [75]:
def bubbleDown(array, newIndex=0, locDict=None):
    # default newIndex to 0 for when the item to bubble down has just been swapped from end of array to start
    leftChild = (newIndex + 1) * 2 - 1 # left child of newIndex
    while leftChild < len(array):
        minChild = leftChild + 1  # right child of newIndex
        if minChild == len(array):  # rare case where the bubbleDown has reached a final, left child without sibling
            if shorter(array[newIndex], array[leftChild]): return
            else:
                swap(array, leftChild, newIndex, locDict)
                return
        if shorter(array[leftChild], array[minChild]):
            minChild = leftChild
        if shorter(array[newIndex], array[minChild]):
            return
        swap(array, newIndex, minChild, locDict)
        newIndex = minChild
        leftChild = (newIndex + 1) * 2 - 1

### Prim's with heap:

In [178]:
## start with the shortest edge in the graphList (not necessary, but seems appropriate)
shortest = min(graphList, key=lambda x: x[-1])  ## (e.g., ('152', '414', -10000))
mst = {shortest}  # store edges of the MST in case needed
spanned = {shortest[0], shortest[1]}  ## keep track of nodes that have been sucked into the tree
notSpanned = set(graphDict.keys()) - spanned
heap = []
shortestLink = {node: ('-1', float('inf')) for node in notSpanned}  ## to keep current link, if any, from mst to each node
#shortestLink[shortest[0]] = (shortest[1], shortest[-1])  ## these 2 assignments are sort of useless, but they avoid key errors
#shortestLink[shortest[1]] = (shortest[0], shortest[-1])
for v in graphDict[shortest[0]] - {(shortest[1], shortest[-1])}:  ## don't want to append shortest[1] to the heap
    ## to start out, any distance to the first tree node is the shortest so far
    i = len(heap)
    heap.append(v)
    bubbleUp(heap, i)
    shortestLink[v[0]] = (shortest[0], v[-1])  ## keep track of how each non-spanned node is attached to spanned, if at all

## Now just build the heap with infinity as closest link for all nodes not attached to shortest[0]
for v in notSpanned:
    if shortestLink[v][-1] == float('inf'):
        heap.append((v, float('inf')))
## store heap locations of each unattached vertex:
locs = dict()
for i, v in enumerate(heap):
    locs[v[0]] = i    

In [179]:
mst  ## just curious what the shortest edge is

{('152', '414', -10000)}

In [180]:
spanned

{'152', '414'}

In [181]:
heap[4]

('250', -2644)

In [182]:
locs['250']

4

In [183]:
graphDict['414']

{('1', 3655),
 ('152', -10000),
 ('252', 6273),
 ('286', 5381),
 ('31', -2222),
 ('413', 7295),
 ('415', -2987),
 ('53', -507)}

In [184]:
## still need to update shortest[1]'s connections, since it's spanned.
### This could've been incorporated into the main loop below, but this cell helps understand what's going on at the start
for v in graphDict[shortest[1]]: 
    if v[0] not in spanned and v[-1] < shortestLink[v[0]][-1]:
        shortestLink[v[0]] = (shortest[1], v[-1])
        heap[locs[v[0]]] = v
        bubbleUp(heap, newIndex=locs[v[0]], locDict=locs) ## can bubble up from current index since only smaller
    

In [185]:
heap[:14]

[('60', -5617),
 ('227', -4796),
 ('31', -2222),
 ('415', -2987),
 ('250', -2644),
 ('363', 1994),
 ('63', -1476),
 ('393', 663),
 ('53', -507),
 ('391', 5083),
 ('151', -1850),
 ('139', 3671),
 ('286', 5381),
 ('51', inf)]

In [186]:
while heap:  ## main loop depletes the heap and builds the MST 1 node at a time
    swap(heap, 0, -1, locs)   ## extract min by swapping first and last heap elements, popping last, and bubbling down first
    newEdge = heap.pop() ## e.g., ('60', -5617)
    newVert = newEdge[0]   ## e.g., '60'
    mst.add((newVert, shortestLink[newVert][0], newEdge[-1]))  ## e.g., ('60', '152', -5617)
    spanned.add(newVert)
    notSpanned.remove(newVert)  ## not currently using this set, but....
    bubbleDown(heap, locDict=locs)   # since the previous last element was swapped into the removed min node's spot to maintain shape
    for v in graphDict[newVert]:
        if v[0] not in spanned and v[-1] < shortestLink[v[0]][-1]:   ## need this order of ifs to avoid key errors
            shortestLink[v[0]] = (newVert, v[-1])
            heap[locs[v[0]]] = v
            bubbleUp(heap, newIndex=locs[v[0]], locDict=locs)



In [171]:
shortestLink['60']

('152', -5617)

In [172]:
graphDict['60']

{('151', 5579),
 ('152', -5617),
 ('158', 5691),
 ('281', -2544),
 ('337', 302),
 ('427', 2681),
 ('454', 632),
 ('456', 4219),
 ('483', -953),
 ('59', 1910),
 ('61', 4401),
 ('75', -4458),
 ('98', 1851)}

In [174]:
shortestLink['226']

('227', -5058)

In [175]:
mst

{('1', '397', -5942),
 ('10', '9', -5012),
 ('100', '202', -7606),
 ('101', '22', -9660),
 ('102', '398', -8299),
 ('103', '454', -9992),
 ('104', '2', -8744),
 ('105', '478', -4873),
 ('106', '459', -7473),
 ('107', '31', -9611),
 ('108', '66', -8381),
 ('109', '65', -7488),
 ('11', '176', -9470),
 ('110', '132', -8834),
 ('111', '146', -9966),
 ('112', '496', -5901),
 ('113', '373', -5059),
 ('114', '113', -9800),
 ('115', '364', -5521),
 ('116', '197', -6213),
 ('117', '313', -8467),
 ('118', '117', -9012),
 ('119', '247', -1467),
 ('12', '11', -6736),
 ('120', '373', -9213),
 ('121', '382', -8779),
 ('122', '87', -8824),
 ('123', '124', -8728),
 ('124', '157', -8496),
 ('125', '124', -6474),
 ('126', '466', -7322),
 ('127', '94', -2436),
 ('128', '354', -9071),
 ('129', '318', -7647),
 ('13', '12', -7604),
 ('130', '237', -9047),
 ('131', '132', -7293),
 ('132', '363', -7224),
 ('133', '365', -8417),
 ('134', '133', -4059),
 ('135', '220', -9436),
 ('136', '259', -9993),
 ('137', '

In [176]:
len(mst)

499

In [177]:
total = 0
for e in mst:
    total += e[-1]
print(total)   ## Assignment answer

-3612829
