>### HW 7.0: Shortest path graph distances (toy networks)

>In this part of your assignment you will develop the base of your code for the week.

>Write MRJob classes to find shortest path graph distances, 
as described in the lectures. In addition to finding the distances, 
your code should also output a distance-minimizing path between the source and target.
Work locally for this part of the assignment, and use 
both of the undirected and directed toy networks.

>To proof you code's function, run the following jobs

>- shortest path in the undirected network from node 1 to node 4  
Solution: 1,5,4 

>- shortest path in the directed network from node 1 to node 5  
Solution: 1,2,4,5

>and report your output---make sure it is correct!

In [1]:
%%writefile shortest_path.py
from mrjob import MRJob, MRStep
import json

class Node():
    def __init__(self, nodeid, links={}, distance=-1, state='U'):
        self.links = json.loads(links)
        self.distance = distance
        self.STATE = state
        self.ID = nodeid
    
    def setDistance(self, distance):
        self.distance = distance
    
    def setVisited(self):
        self.STATE = 'V'
    
    def setQueued(self):
        self.STATE = 'Q'
    
    def sendNode(self):
        yield self.ID, '\t'.join([self.ID, json.dumps(self.links), self.distance, self.STATE])
    
    def sendQueuedNodes(self):
        for l in self.links:
            yield self.ID, '\t'.join([ l, '{}', self.distance+1, 'Q' ])

class ShortestPathBFS(MRJob):
    def process_node_occurances(self, nodeID, nodeinfo):
        ''' Parse nodes within reducer 
        '''
        links, distance, state = nodeinfo.split('\t')
        yield Node(nodeID, links, int(distance), state)
        
    def mapper(self, _, line):
        ''' Read each node from temp file
            and send node / queued nodes 
            to stream
        '''
        # read line as a node
        nodeID, links, distance, state = line.strip().split('\t')
        current_node = Node(nodeID, links, int(distance), state)
        
        # send queued nodes 
        if current_node.STATE == 'Q':
            distance = current_node.distance
            current_node.sendQueuedNodes()
            current_node.setVisited()
        
        # send current node
        current_node.sendNode()
    
    def reducer(self, nodeID, occurances):
        ''' Join all information for each node 
        '''
        # read each node occurance
        node_data = [ self.process_node_occurances(nodeID, o) for o in occurances ]
        
        # join all node data together 
        node_distance = -1
        node_links = {}
        node_state = 'U'
        for n in node_data:
            if n.distance != -1 and n.distance < node_distance: node_distance = n.distance
            node_links.update(n.links)
            if n.STATE != node_state: node_state = n.STATE
        current_node = Node(nodeID, node_links, node_distance, node_state)
        
        # send node 
        current_node.sendNode


Writing shortest_path.py


In [None]:
from shortest_path import ShortestPathBFS

SOURCE_FILE = 'Data/directed_toy.txt'
TEMP_FILE = 'Data/'
args = [GRAPH_STRUCTURE_FILE, '--strict-protocols']
mrjob = ShortestPathBFS(args=args)

