# Dijkstra's shortest path algorithm, including a heap implementation from scratch

In [27]:
# load graph represented by adjacency lists
with open('Downloads/dijkstraData.txt') as f:
    graph = f.readlines()

In [28]:
len(graph)

200

In [29]:
min(graph)

'1\t80,982\t163,8164\t170,2620\t145,648\t200,8021\t173,2069\t92,647\t26,4122\t140,546\t11,1913\t160,6461\t27,7905\t40,9047\t150,2183\t61,9146\t159,7420\t198,1724\t114,508\t104,6647\t30,4612\t99,2367\t138,7896\t169,8700\t49,2437\t125,2909\t117,2597\t55,6399\t\n'

In [30]:
max(graph)

'99\t104,9140\t18,5717\t174,5675\t157,6818\t132,6234\t182,2897\t151,4990\t183,3577\t59,671\t133,2090\t23,6485\t153,4560\t31,8583\t74,7031\t1,2367\t127,1408\t37,7757\t193,4566\t194,5832\t38,6169\t\n'

### Looks like graph needs some pre-processing

In [31]:
graph = [line.strip('\t\n') for line in graph]

In [32]:
graph = [line.split('\t') for line in graph]

In [50]:
graphDict = {int(line.pop(0)):set() for line in graph}

In [53]:
graph = [[edge.split(',') for edge in line] for line in graph]

In [79]:
print(graph[0])

[['80', '982'], ['163', '8164'], ['170', '2620'], ['145', '648'], ['200', '8021'], ['173', '2069'], ['92', '647'], ['26', '4122'], ['140', '546'], ['11', '1913'], ['160', '6461'], ['27', '7905'], ['40', '9047'], ['150', '2183'], ['61', '9146'], ['159', '7420'], ['198', '1724'], ['114', '508'], ['104', '6647'], ['30', '4612'], ['99', '2367'], ['138', '7896'], ['169', '8700'], ['49', '2437'], ['125', '2909'], ['117', '2597'], ['55', '6399']]


In [58]:
for i in range(200):
    for e in graph[i]:
        graphDict[i+1].add((int(e[0]), int(e[1])))

In [78]:
print(graphDict[1])

{(159, 7420), (145, 648), (125, 2909), (104, 6647), (138, 7896), (114, 508), (30, 4612), (92, 647), (163, 8164), (198, 1724), (80, 982), (99, 2367), (150, 2183), (61, 9146), (173, 2069), (140, 546), (55, 6399), (26, 4122), (200, 8021), (11, 1913), (170, 2620), (49, 2437), (27, 7905), (40, 9047), (169, 8700), (117, 2597), (160, 6461)}


## Guess we need a reverse adjacency list as well, for referencing edges by heads

In [60]:
revGraph = {h:set() for h in range(1,201)}

In [70]:
for tail in graphDict:
    for (head, dist) in graphDict[tail]:
        revGraph[head].add((tail, dist))

In [77]:
print(revGraph[1])

{(159, 7420), (145, 648), (125, 2909), (104, 6647), (138, 7896), (114, 508), (30, 4612), (92, 647), (163, 8164), (198, 1724), (80, 982), (99, 2367), (150, 2183), (61, 9146), (173, 2069), (140, 546), (55, 6399), (26, 4122), (200, 8021), (11, 1913), (170, 2620), (49, 2437), (27, 7905), (40, 9047), (169, 8700), (117, 2597), (160, 6461)}


## hmmmmm, revGraph appears to be same as graphDict, so apparently this is an undirected graph, and the reverse dict was unnecessary

In [75]:
print(graphDict[13])

{(135, 861), (50, 758), (45, 1794), (178, 4194), (144, 9987), (151, 9629), (57, 6850), (163, 3870), (29, 2784), (97, 7026), (43, 5400), (105, 6821), (59, 9801), (77, 8638), (172, 2070)}


In [76]:
print(graphDict[135])

{(23, 4560), (48, 8869), (10, 4650), (20, 6434), (194, 5726), (190, 1431), (78, 6919), (122, 6345), (144, 6396), (82, 519), (150, 9732), (174, 6977), (158, 8348), (127, 1752), (184, 7629), (134, 276), (13, 861), (4, 7582), (173, 5664)}


## Now to implement a heap structure that will allow for log(n) time deletion of edges and constant time extraction of min-length edges

In [82]:
## First, a way to compare heap nodes by "greedy dijkstra" key.
##  Seems like heap items will be in the form of (node, score). 
def shorter(edge1, edge2):
    return edge1[-1] <= edge2[-1]

In [138]:
## Next, a utility to swap in place two array items while maintaining a list of their locations in the array
def swap(arr, i, j, locator=None):
    if locator:
        locator[arr[i][0]] = j
        locator[arr[j][0]] = i
    temp = arr[i]
    arr[i] = arr[j]
    arr[j] = temp

In [125]:
def bubbleUp(array, newIndex, locList=None):
    while newIndex > 0:
        oldIndex = (newIndex + 1) // 2 - 1
        if shorter(array[oldIndex], array[newIndex]):
            return
        swap(array, newIndex, oldIndex, locList)
        newIndex = oldIndex

In [126]:
def bubbleDown(array, newIndex=0, locList=None):
    # default newIndex to 0 for when the item to bubble down has just been swapped from end of array to start
    leftChild = (newIndex + 1) * 2 - 1 # left child of newIndex
    while leftChild < len(array):
        minChild = leftChild + 1  # right child of newIndex
        if minChild == len(array):  # rare case where the bubbleDown has reached a final, left child without sibling
            if shorter(array[newIndex], array[leftChild]): return
            else:
                swap(array, leftChild, newIndex, locList)
                return
        if shorter(array[leftChild], array[minChild]):
            minChild = leftChild
        if shorter(array[newIndex], array[minChild]):
            return
        swap(array, newIndex, minChild, locList)
        newIndex = minChild
        leftChild = (newIndex + 1) * 2 - 1

## Dijkstra's with heap

In [142]:
## Build the heap
source = 1
attached = {source}
unattached = set(graphDict.keys()) - attached
heap = []
for edge in graphDict[source]:
    i = len(heap)
    heap.append(edge)
    bubbleUp(heap, i)
for node in set(graphDict.keys()) - {t[0] for t in graphDict[1]} - attached:
    heap.append((node, float('inf')))
    
locs = [-1 for _ in range(201)]  ## this will store heap locations of each unattached vertex
for i, v in enumerate(heap):
    locs[v[0]] = i    

In [143]:
len(heap)

199

In [144]:
heap

[(114, 508),
 (140, 546),
 (145, 648),
 (92, 647),
 (80, 982),
 (150, 2183),
 (173, 2069),
 (55, 6399),
 (26, 4122),
 (11, 1913),
 (198, 1724),
 (125, 2909),
 (99, 2367),
 (61, 9146),
 (30, 4612),
 (159, 7420),
 (104, 6647),
 (163, 8164),
 (200, 8021),
 (138, 7896),
 (170, 2620),
 (49, 2437),
 (27, 7905),
 (40, 9047),
 (169, 8700),
 (117, 2597),
 (160, 6461),
 (2, inf),
 (3, inf),
 (4, inf),
 (5, inf),
 (6, inf),
 (7, inf),
 (8, inf),
 (9, inf),
 (10, inf),
 (12, inf),
 (13, inf),
 (14, inf),
 (15, inf),
 (16, inf),
 (17, inf),
 (18, inf),
 (19, inf),
 (20, inf),
 (21, inf),
 (22, inf),
 (23, inf),
 (24, inf),
 (25, inf),
 (28, inf),
 (29, inf),
 (31, inf),
 (32, inf),
 (33, inf),
 (34, inf),
 (35, inf),
 (36, inf),
 (37, inf),
 (38, inf),
 (39, inf),
 (41, inf),
 (42, inf),
 (43, inf),
 (44, inf),
 (45, inf),
 (46, inf),
 (47, inf),
 (48, inf),
 (50, inf),
 (51, inf),
 (52, inf),
 (53, inf),
 (54, inf),
 (56, inf),
 (57, inf),
 (58, inf),
 (59, inf),
 (60, inf),
 (62, inf),
 (63, inf)

In [145]:
locs[114]

0

In [146]:
numNodes = 200
distList = [1000000 for _ in range(numNodes + 1)]  # return a million in this assignment for nodes not connected to source
distList[source] = 0
while heap:
    swap(heap, 0, -1, locs)
    newEdge = heap.pop()
    distList[newEdge[0]] = newEdge[1]
    attached.add(newEdge[0])
    unattached.remove(newEdge[0])
    bubbleDown(heap, locList=locs)   # since the previous last element was swapped into the removed min node's spot to maintain shape
    ## Now rescore any new crossing edges and bubble them up, since scores can only go down
    for otherEnd in graphDict[newEdge[0]]:
        if otherEnd[0] in unattached:
            if newEdge[1] + otherEnd[1] < heap[locs[otherEnd[0]]][1]:
                heap[locs[otherEnd[0]]] = (otherEnd[0], newEdge[1] + otherEnd[1])
                bubbleUp(heap, newIndex=locs[otherEnd[0]], locList=locs)
    

In [147]:
distList

[1000000,
 0,
 2971,
 2644,
 3056,
 2525,
 2818,
 2599,
 1875,
 745,
 3205,
 1551,
 2906,
 2394,
 1803,
 2942,
 1837,
 3111,
 2284,
 1044,
 2351,
 3630,
 4028,
 2650,
 3653,
 2249,
 2150,
 1222,
 2090,
 3540,
 2303,
 3455,
 3004,
 2551,
 2656,
 998,
 2236,
 2610,
 3548,
 1851,
 4091,
 2732,
 2040,
 3312,
 2142,
 3438,
 2937,
 2979,
 2757,
 2437,
 3152,
 2503,
 2817,
 2420,
 3369,
 2862,
 2609,
 2857,
 3668,
 2947,
 2592,
 1676,
 2573,
 2498,
 2047,
 826,
 3393,
 2535,
 4636,
 3650,
 743,
 1265,
 1539,
 3007,
 4286,
 2720,
 3220,
 2298,
 2795,
 2806,
 982,
 2976,
 2052,
 3997,
 2656,
 1193,
 2461,
 1608,
 3046,
 3261,
 2018,
 2786,
 647,
 3542,
 3415,
 2186,
 2398,
 4248,
 3515,
 2367,
 2970,
 3536,
 2478,
 1826,
 2551,
 3368,
 2303,
 2540,
 1169,
 3140,
 2317,
 2535,
 1759,
 1899,
 508,
 2399,
 3513,
 2597,
 2176,
 1090,
 2328,
 2818,
 1306,
 2805,
 2057,
 2618,
 1694,
 3285,
 1203,
 676,
 1820,
 1445,
 2468,
 2029,
 1257,
 1533,
 2417,
 3599,
 2494,
 4101,
 546,
 1889,
 2616,
 2141,
 

In [148]:
for i in [7,37,59,82,99,115,133,165,188,197]:
    print(distList[i])

2599
2610
2947
2052
2367
2399
2029
2442
2505
3068


In [None]:
# answers for quiz question, hopefully:  2599,2610,2947,2052,2367,2399,2029,2442,2505,3068