# The London Railway Network

The cell below defines the abstract class whose API you will need to impement. Do NOT modify it.

In [418]:
# DO NOT MODIFY THIS CELL

from abc import ABC, abstractmethod  

class AbstractLondonRailwayMapper(ABC):
    
    # constructor
    @abstractmethod
    def __init__(self):
        pass           
        
    # data initialisation
    @abstractmethod
    def loadStationsAndLines(self):
        pass

    # returns the minimum number of stops to connect station "fromS" to station  "toS"
    # fromS : str
    # toS : str
    # numStops : int
    @abstractmethod
    def minStops(self, fromS, toS):     
        numStops = -1
        return numStops    
    
    # returns the minimum distance in miles to connect station "fromS" to station  "toS"
    # fromS : str
    # toS : str
    # minDistance : float
    @abstractmethod
    def minDistance(self, fromS, toS):
        minDistance = -1.0
        return minDistance
    
    # given an unordered list of station names, returns a new railway line 
    # (represented as a list of adjacent station names), connecting all such stations 
    # and such that the sum of the distances (in miles) between adjacent stations is minimised
    # inputList : set<str>
    # outputList : list<str>
    @abstractmethod
    def newRailwayLine(self, inputList):
        outputList = []
        return outputList

Use the cell below to define any data structure and auxiliary python function you may need. Leave the implementation of the main API to the next code cell instead.

In [419]:
# AVL Tree

class AVLTreeNode:

    def __init__(self, key, value):
        self.key = key
        self.left = None
        self.right = None
        self.height = 1
        self.value = value # [index, lat, lon]

class AVLTree:

    def get(self, node, key):

        if node.key == key:
            return node
        elif key < node.key and node.left:
            return self.get(node.left, key)
        elif key > node.key and node.right:
            return self.get(node.right, key)
        else:
            return None

    def put(self, node, key, value):

        if (node is None):
            return AVLTreeNode(key, value)
        elif key < node.key:
            node.left = self.put(node.left, key, value)
        else:
            node.right = self.put(node.right, key, value)

        node.height = 1 + max(self.getHeight(node.left), self.getHeight(node.right))

        balance = self.getBalance(node)
        
        if balance > 1:
            if key < node.left.key:
                return self.rightRotate(node)
            else:
                node.left = self.leftRotate(node.left)
                return self.rightRotate(node)

        if balance < -1:
            if key > node.right.key:
                return self.leftRotate(node)
            else:
                node.right = self.rightRotate(node.right)
                return self.leftRotate(node)

        return node

    def leftRotate(self, node):
        savedRight = node.right
        savedRightLeft = savedRight.left
        savedRight.left = node
        node.right = savedRightLeft
        node.height = 1 + max(self.getHeight(node.left),
                           self.getHeight(node.right))
        savedRight.height = 1 + max(self.getHeight(savedRight.left),
                           self.getHeight(savedRight.right))
        return savedRight

    def rightRotate(self, node):
        savedLeft = node.left
        savedLeftRight = savedLeft.right
        savedLeft.right = node
        node.left = savedLeftRight
        node.height = 1 + max(self.getHeight(node.left),
                           self.getHeight(node.right))
        savedLeft.height = 1 + max(self.getHeight(savedLeft.left),
                           self.getHeight(savedLeft.right))
        return savedLeft


    def getHeight(self, node):
        if node is None:
            return 0
        return node.height

    def getBalance(self, node):
        if node is None:
            return 0
        return self.getHeight(node.left) - self.getHeight(node.right)



In [420]:
# Queue

class QueueNode:
    def __init__(self, data = None):
        self.data = data
        self.next = None
        self.prev = None

class MyQueue:

    def __init__(self):
        self.size = 0
        self.head = None
        self.tail = None

    def enqueue(self, data):
        node = QueueNode(data)
        if (self.head == None):
            self.head = node
            self.tail = node
        else:
            self.tail.next = node
            node.prev = self.tail
            self.tail = node
        self.size += 1

    def dequeue(self):
        data = self.head.data
        self.head = self.head.next
        self.size -= 1
        if (self.size == 0):
            self.tail = self.head
        return data

    def isEmpty(self):
        return self.size == 0



In [421]:
# Min-Oriented Priority Queue using Binary Heap

# TODO Fix the queue
class MinPQ:

    def __init__(self):
        self.size = 0
        self.list = [None]

    def enqueue(self, value, key):
        self.size += 1
        # self.array[self.size] = (value, key)
        self.list.append((value, key))
        self.swim(self.size)

    def swim(self, i):
        while (i > 1 and self.list[i//2][1] > self.list[i][1]):
            self.swap(i, i//2)
            i = i // 2

    def dequeue(self):
        min = self.list[1]
        self.swap(1, self.size)
        self.size -= 1
        self.list.pop()
        self.sink(1)
        return min

    def sink(self, i):
        while (i*2 <= self.size):
            childPos = i*2
            if (childPos < self.size and self.list[childPos][1] > self.list[childPos + 1][1]):
                childPos += 1
            if (self.list[i][1] < self.list[childPos][1]): 
                break
            self.swap(i, childPos)
            i = childPos

    def swap(self, i, j):
        self.list[i], self.list[j] = self.list[j], self.list[i]

    def isEmpty(self):
        return self.size == 0


# class MinPQ:
#     def __init__(self, max_size):
#         self.max_size = max_size
#         self.heap = [0 for _ in range(max_size + 1)]
#         self.size = 0

#     def enqueue(self, val, priority):
#         if self.size >= self.max_size:
#             print("message")
#             return
#         self.size += 1
#         self.heap[self.size] = (val, priority)
#         self.swim_up(self.size)

#     def dequeue(self):
#         min_elem = self.heap[1]
#         self.heap[1] = self.heap[self.size]
#         self.heapify(1)
#         self.heap[self.size] = 0
#         self.size -= 1
#         return min_elem

#     def isEmpty(self):
#         if self.size > 0:
#             return False
#         return True

#     def swim_up(self, index):
#         temp = index
#         parent_index = self.get_parent(temp)
#         if parent_index != 0:
#             while (self.heap[temp][1] < self.heap[parent_index][1]):
#                 self.swap(temp, parent_index)
#                 temp = self.get_parent(temp)
#                 parent_index = self.get_parent(temp)
#                 if parent_index == 0:
#                     return

#     def get_parent(self, index):
#         return index // 2

#     def get_right_child(self, index):
#         return (2 * index) + 1

#     def get_left_child(self, index):
#         return 2 * index

#     def is_leaf(self, index):
#         if index <= self.size and index > self.size // 2:
#             return True
#         return False

#     def swap(self, x, y):
#         temp = self.heap[x]
#         self.heap[x] = self.heap[y]
#         self.heap[y] = temp

#     def heapify(self, index):
#         lc_index = self.get_left_child(index)

        


In [422]:
import csv
from math import radians, sin, cos, asin, sqrt

# ADD YOUR DATA STRUCTURE DEFINITIONS AND HELPER CODE HERE



class DataLoader:

    def __init__(self):
        self.stationsTree = AVLTree()
        self.root = None
        self.stationsCount = -1
        self.graph = None

    def distance(self, longitude1, latitude1, longitude2, latitude2):
        
        longitude1 = radians(longitude1)
        longitude2 = radians(longitude2)
        latitude1 = radians(latitude1)
        latitude2 = radians(latitude2)
        
        # Haversine formula 
        dlon = longitude2 - longitude1 
        dlat = latitude2 - latitude1
        x = sin(dlat / 2)**2 + cos(latitude1) * cos(latitude2) * sin(dlon / 2)**2
        c = 2 * asin(sqrt(x)) 
        radius = 6371
        return(c * radius)

    def loadFiles(self):

        with open('londonstations.csv') as stationsFile:
            reader = csv.reader(stationsFile, delimiter=',')
            for row in reader:
                if (self.stationsCount == -1):
                    self.stationsCount += 1
                    continue
                self.root = self.stationsTree.put(self.root, row[0], [self.stationsCount, float(row[1]), float(row[2])])
                self.stationsCount += 1

        self.graph = WeightedGraph(self.stationsCount)
        print(self.graph)

        with open('londonrailwaylines.csv') as linesFile:
            reader = csv.reader(linesFile, delimiter=',')
            lineCount = 0
            for row in reader:
                if (lineCount == 0): 
                    lineCount += 1
                    continue
                lineName = row[0]
                fromStation = row[1]
                toStation = row[2]
                fromStationNode = self.stationsTree.get(self.root, fromStation)
                toStationNode = self.stationsTree.get(self.root, toStation)

                dist = self.distance(fromStationNode.value[2], fromStationNode.value[1], toStationNode.value[2], toStationNode.value[1])
                edge = Edge(fromStationNode.value[0], toStationNode.value[0], dist, lineName)
                self.graph.addEdge(edge)
                lineCount += 1

        return self.graph, self.stationsTree, self.root



class WeightedGraph:

    def __init__(self, V):
        self.V = V
        self.adj = []
        for _ in range (0, V):
            self.adj.append([])

    def addEdge(self, e):
        v = e.endPoint()
        w = e.otherEndPoint(v)
        self.adj[v].append(e)
        self.adj[w].append(e)

    def adjList(self, v):
        return self.adj[v]

    def getVertexCount(self):
        return self.V

class Edge:

    def __init__(self, v, w, weight, lineName):
        self.v = v
        self.w = w
        self.weight = weight
        self.lineName = lineName

    def getLineName(self):
        return self.lineName

    def endPoint(self):
        return self.v

    def otherEndPoint(self, vertex):
        if vertex == self.v: return self.w
        else: return self.v

    def compareTo(self, edge):
        if (self.weight < edge.weight): return -1
        elif (self.weight < edge.weight): return +1
        else: return 0

    def getWeight(self):
        return self.weight

class MinStopsBFS:

    def __init__(self, graph):
        self.graph = graph
        self.distToSource = [-1] * self.graph.getVertexCount()
    
    def minStops(self, start, end):
        queue = MyQueue()    # Implement a queue
        queue.enqueue(start)

        self.distToSource[start] = 0

        while (not queue.isEmpty()):
            v = queue.dequeue()
            for edge in self.graph.adjList(v):
                w = edge.otherEndPoint(v)
                if (self.distToSource[w] == -1):
                    queue.enqueue(w)
                    self.distToSource[w] = self.distToSource[v] + 1
        return self.distToSource[end]

class DijkstraSP:

    def __init__(self, graph, source):
        self.distTo = [20000000 for v in range(0, graph.getVertexCount())]
        self.distTo[source] = 0
        self.visited = [False for v in range(0, graph.getVertexCount())]
        self.graph = graph

        self.pq = MinPQ()
        self.pq.enqueue(source, 0)


        while (not self.pq.isEmpty()):
            curVertex = self.pq.dequeue()[0]
            if (self.visited[curVertex]): continue
            for edge in self.graph.adjList(curVertex):
                self.relax(edge, curVertex)

            self.visited[curVertex] = True

        # print(self.distTo)

    def relax(self, edge, fromVertex):
        toVertex = edge.otherEndPoint(fromVertex)
        if (self.distTo[toVertex] > self.distTo[fromVertex] + edge.getWeight()):
            self.distTo[toVertex] = self.distTo[fromVertex] + edge.getWeight()
            self.pq.enqueue(toVertex, self.distTo[toVertex])

    def minDistances(self):
        return self.distTo

class ShortestPath:

    def __init__(self, graph, source):
        self.dijkstra = DijkstraSP(graph, source)

    def findMinDistance(self, destination):
        distTo = self.dijkstra.minDistances()
        return distTo[destination]








In [423]:
import csv

class LondonRailwayMapper(AbstractLondonRailwayMapper):
    
    def __init__(self):
        # ADD YOUR CODE HERE
        self.graph = None
        self.stations = None
        self.rootStationsTree = None

        pass           
     
    
        
    def loadStationsAndLines(self):
        # ADD YOUR CODE HERE
        print("here1")
        loader = DataLoader()
        self.graph, self.stations, self.rootStationsTree = loader.loadFiles()
        print("Graph: ", self.graph)
        print("here2")
    
    

    def minStops(self, fromS, toS):     
        numStops = -1
        # ADD YOUR CODE HERE
        finder = MinStopsBFS(self.graph)
        numStops = finder.minStops(self.stations.get(self.rootStationsTree, fromS).value[0], self.stations.get(self.rootStationsTree, toS).value[0])
        
        return numStops    
    
    
    
    def minDistance(self, fromS, toS):
        minDistance = -1.0
        # ADD YOUR CODE HERE
        # In order to implement this requirement I decided to use the Dijkstra (binary heap) 
        # algorithm. This because it is O(E * logV) in typical 
        finder = ShortestPath(self.graph, self.stations.get(self.rootStationsTree, fromS).value[0])
        minDistance = finder.findMinDistance(self.stations.get(self.rootStationsTree, toS).value[0])
        
        return minDistance
    
    
    
    
    def newRailwayLine(self, inputList):
        outputList = []
        # ADD YOUR CODE HERE

        
        return outputList

Use the cell below for all python code needed to test the `LondonRailwayMapper` class above.

In [424]:
import timeit

# ADD YOUR TEST CODE HERE

mapper = LondonRailwayMapper()
mapper.loadStationsAndLines()

stops0 = mapper.minStops("Green Park", "Covent Garden")
stops1 = mapper.minStops("Covent Garden", "Green Park")
stops2 = mapper.minStops("Willesden Junction", "Upminster")
stops3 = mapper.minStops("Upminster", "Willesden Junction")
stops4 = mapper.minStops("Upminster", "Upminster")

dist0 = mapper.minDistance("Green Park", "Covent Garden")
dist1 = mapper.minDistance("Covent Garden", "Green Park")
dist2 = mapper.minDistance("Willesden Junction", "Upminster")
dist3 = mapper.minDistance("Upminster", "Willesden Junction")
dist4 = mapper.minDistance("Upminster", "Upminster")
dist5 = mapper.minDistance("Upminster", "Centrale")

print("Stops:")

print(stops0, stops1, stops2, stops3, stops4)
print(dist0, dist1, dist2, dist3, dist4, dist5)



here1
<__main__.WeightedGraph object at 0x00000274E5AD54C0>
Graph:  <__main__.WeightedGraph object at 0x00000274E5AD54C0>
here2
Stops:
3 3 9 9 0
1.412704746239717 1.4127047462397173 35.87378600705385 35.87378600705386 0 38.68253797289412


The cell below exemplifies the test code I will invoke on your submission. Do NOT modify it. 

In [425]:
# DO NOT MODIFY THIS CELL

import timeit

testMapper = LondonRailwayMapper()

#
# testing the loadStationsAndLines() API 
#
starttime = timeit.default_timer()
testMapper.loadStationsAndLines()
endtime = timeit.default_timer()
print("\nExecution time to load:", round(endtime-starttime,3))

#
# testing the minStops() and minStops() API on a sample of from/to station pairs  
#
fromList = ["Baker Street", "Epping", "Canonbury", "Vauxhall"]
toList = ["North Wembley", "Belsize Park", "Balham", "Leytonstone"]

for i in range(len(fromList)):
    starttime = timeit.default_timer()
    stops = testMapper.minStops(fromList[i], toList[i])
    endtime = timeit.default_timer()
    print("\nExecution time minStops:", round(endtime-starttime,3))

    starttime = timeit.default_timer()
    dist = testMapper.minStops(fromList[i], toList[i])
    endtime = timeit.default_timer()
    print("Execution time minDistance:", round(endtime-starttime,3))

    print("From", fromList[i], "to", toList[i], "in", stops, "stops and", dist, "miles")  
    
#
# testing the newRailwayLine() API on a small list of stations  
#
stationsList = ["Queens Park", "Chigwell", "Moorgate", "Swiss Cottage", "Liverpool Street", "Highgate"]

starttime = timeit.default_timer()
newLine = testMapper.newRailwayLine(stationsList)
endtime = timeit.default_timer()

print("\n\nStation list", stationsList)
print("New station line", newLine)
print("Total track length from", newLine[0], "to", newLine[len(newLine)-1], ":", testMapper.minDistance(newLine[0], newLine[len(newLine)-1]), "miles")
print("Execution time newLine:", round(endtime-starttime,3))

#
# testing the newRailwayLine() API on a big list of stations  
#
stationsList = ["Abbey Road", "Barbican", "Bethnal Green", "Cambridge Heath", "Covent Garden", "Dollis Hill", "East Finchley", "Finchley Road and Frognal", "Great Portland Street", "Hackney Wick", "Isleworth", "Kentish Town West", "Leyton", "Marble Arch", "North Wembley", "Old Street", "Pimlico", "Queens Park", "Richmond", "Shepherds Bush", "Tottenham Hale", "Uxbridge", "Vauxhall", "Wapping"]

starttime = timeit.default_timer()
newLine = testMapper.newRailwayLine(stationsList)
endtime = timeit.default_timer()

print("\n\nStation list", stationsList)
print("New station line", newLine)
print("Total track length from", newLine[0], "to", newLine[len(newLine)-1], ":", testMapper.minDistance(newLine[0], newLine[len(newLine)-1]), "miles")
print("Execution time newLine:", round(endtime-starttime,3))

here1
<__main__.WeightedGraph object at 0x00000274E6250910>
Graph:  <__main__.WeightedGraph object at 0x00000274E6250910>
here2

Execution time to load: 0.02

Execution time minStops: 0.005
Execution time minDistance: 0.002
From Baker Street to North Wembley in 6 stops and 6 miles

Execution time minStops: 0.002
Execution time minDistance: 0.002
From Epping to Belsize Park in 17 stops and 17 miles

Execution time minStops: 0.001
Execution time minDistance: 0.002
From Canonbury to Balham in 10 stops and 10 miles

Execution time minStops: 0.003
Execution time minDistance: 0.002
From Vauxhall to Leytonstone in 6 stops and 6 miles


Station list ['Queens Park', 'Chigwell', 'Moorgate', 'Swiss Cottage', 'Liverpool Street', 'Highgate']
New station line []


IndexError: list index out of range