# The London Railway Network

The cell below defines the abstract class whose API you will need to impement. Do NOT modify it.

In [208]:
# DO NOT MODIFY THIS CELL

from abc import ABC, abstractmethod  

class AbstractLondonRailwayMapper(ABC):
    
    # constructor
    @abstractmethod
    def __init__(self):
        pass           
        
    # data initialisation
    @abstractmethod
    def loadStationsAndLines(self):
        pass

    # returns the minimum number of stops to connect station "fromS" to station  "toS"
    # fromS : str
    # toS : str
    # numStops : int
    @abstractmethod
    def minStops(self, fromS, toS):     
        numStops = -1
        return numStops    
    
    # returns the minimum distance in miles to connect station "fromS" to station  "toS"
    # fromS : str
    # toS : str
    # minDistance : float
    @abstractmethod
    def minDistance(self, fromS, toS):
        minDistance = -1.0
        return minDistance
    
    # given an unordered list of station names, returns a new railway line 
    # (represented as a list of adjacent station names), connecting all such stations 
    # and such that the sum of the distances (in miles) between adjacent stations is minimised
    # inputList : set<str>
    # outputList : list<str>
    @abstractmethod
    def newRailwayLine(self, inputList):
        outputList = []
        return outputList

Use the cell below to define any data structure and auxiliary python function you may need. Leave the implementation of the main API to the next code cell instead.

In [209]:
# Queue

'''
To implement a 
'''

class QueueNode:
    def __init__(self, data = None):
        self.data = data
        self.next = None
        self.prev = None

class MyQueue:

    def __init__(self):
        self.size = 0
        self.head = None
        self.tail = None

    def enqueue(self, data):
        node = QueueNode(data)
        if (self.head == None):
            self.head = node
            self.tail = node
        else:
            self.tail.next = node
            node.prev = self.tail
            self.tail = node
        self.size += 1

    def dequeue(self):
        data = self.head.data
        self.head = self.head.next
        self.size -= 1
        if (self.size == 0):
            self.tail = self.head
        return data

    def isEmpty(self):
        return self.size == 0



In [210]:
# Min-Oriented Priority Queue using Binary Heap

# TODO Fix the queue
class MinPQ:

    def __init__(self):
        self.size = 0
        self.list = [None]

    def enqueue(self, value, key):
        self.size += 1
        # self.array[self.size] = (value, key)
        self.list.append((value, key))
        self.swim(self.size)

    def swim(self, i):
        while (i > 1 and self.list[i//2][1] > self.list[i][1]):
            self.swap(i, i//2)
            i = i // 2

    def dequeue(self):
        min = self.list[1]
        self.swap(1, self.size)
        self.size -= 1
        self.list.pop()
        self.sink(1)
        return min

    def sink(self, i):
        while (i*2 <= self.size):
            childPos = i*2
            if (childPos < self.size and self.list[childPos][1] > self.list[childPos + 1][1]):
                childPos += 1
            if (self.list[i][1] < self.list[childPos][1]): 
                break
            self.swap(i, childPos)
            i = childPos

    def swap(self, i, j):
        self.list[i], self.list[j] = self.list[j], self.list[i]

    def isEmpty(self):
        return self.size == 0


        


In [211]:
import csv
from math import radians, sin, cos, asin, sqrt
from pandas import *   #TODO delete this import

# ADD YOUR DATA STRUCTURE DEFINITIONS AND HELPER CODE HERE



class DataLoader:

    '''
    In my DataLoader class I decied to store a pèython dictionary stationsDict containing all the information about stations
    using the following format: {"stationName" : [ keyForGraph, lat, lon ], ...}. The reason behind chosing a dictionary is
    that it allows us to retrieve data in O(1) on average. This will be extremely useful in every situation in which we need
    to access by name information about a station. This occures, for example, when reading the "londonrailwaylines.csv" file
    to load the edges. The worstcase scenario for a dictionary retrival is O(N), where N is the number of stations. However,
    this case is extremely rare, because of the way the hasmap implementing the python default dictionary is structured.  
    '''

    def __init__(self):
        self.stationsDict = {} # Dictionary of tipe {"stationName" : [ keyForGraph, lat, lon ]}
        self.stationsCount = -1
        self.graph = None

    def distance(self, longitude1, latitude1, longitude2, latitude2):
        
        longitude1 = radians(longitude1)
        longitude2 = radians(longitude2)
        latitude1 = radians(latitude1)
        latitude2 = radians(latitude2)
        
        # Haversine formula 
        dlon = longitude2 - longitude1 
        dlat = latitude2 - latitude1
        x = sin(dlat / 2)**2 + cos(latitude1) * cos(latitude2) * sin(dlon / 2)**2
        c = 2 * asin(sqrt(x)) 
        radius = 6371
        return(c * radius)

    def loadFiles(self):

        with open('londonstations.csv') as stationsFile:
            reader = csv.reader(stationsFile, delimiter=',')
            for row in reader:
                if (self.stationsCount == -1):
                    self.stationsCount += 1
                    continue
                self.stationsDict[row[0]] = [self.stationsCount, float(row[1]), float(row[2])]
                self.stationsCount += 1

        self.graph = WeightedGraph(self.stationsCount)
        print(self.graph)

        with open('londonrailwaylines.csv') as linesFile:
            reader = csv.reader(linesFile, delimiter=',')
            lineCount = 0
            for row in reader:
                if (lineCount == 0): 
                    lineCount += 1
                    continue
                lineName = row[0]
                fromStation = row[1]
                toStation = row[2]
                fromStationValue = self.stationsDict.get(fromStation)
                toStationValue = self.stationsDict.get(toStation)

                dist = self.distance(fromStationValue[2], fromStationValue[1], toStationValue[2], toStationValue[1])
                edge = Edge(fromStationValue[0], toStationValue[0], dist, lineName)
                self.graph.addEdge(edge)
                lineCount += 1

        return self.graph, self.stationsDict



class WeightedGraph:

    def __init__(self, V):
        self.V = V
        self.adj = []
        for _ in range (0, V):
            self.adj.append([])

    def addEdge(self, e):
        v = e.endPoint()
        w = e.otherEndPoint(v)
        self.adj[v].append(e)
        self.adj[w].append(e)

    def adjList(self, v):
        return self.adj[v]

    def getVertexCount(self):
        return self.V

class Edge:

    def __init__(self, v, w, weight, lineName):
        self.v = v
        self.w = w
        self.weight = weight
        self.lineName = lineName

    def getLineName(self):
        return self.lineName

    def endPoint(self):
        return self.v

    def otherEndPoint(self, vertex):
        if vertex == self.v: return self.w
        else: return self.v

    def compareTo(self, edge):
        if (self.weight < edge.weight): return -1
        elif (self.weight < edge.weight): return +1
        else: return 0

    def getWeight(self):
        return self.weight

class MinStopsBFS:

    def __init__(self, graph):
        self.graph = graph
        self.distToSource = [-1] * self.graph.getVertexCount()
    
    def minStops(self, start, end):
        queue = MyQueue()    # Implement a queue
        queue.enqueue(start)

        self.distToSource[start] = 0

        while (not queue.isEmpty()):
            v = queue.dequeue()
            for edge in self.graph.adjList(v):
                w = edge.otherEndPoint(v)
                if (self.distToSource[w] == -1):
                    queue.enqueue(w)
                    self.distToSource[w] = self.distToSource[v] + 1
        return self.distToSource[end]

class DijkstraSP:

    def __init__(self, graph, source):
        self.distTo = [float('inf') for v in range(0, graph.getVertexCount())]
        self.distTo[source] = 0
        self.visited = [False for v in range(0, graph.getVertexCount())]
        self.graph = graph

        self.pq = MinPQ()
        self.pq.enqueue(source, 0)


        while (not self.pq.isEmpty()):
            curVertex = self.pq.dequeue()[0]
            if (self.visited[curVertex]): continue
            for edge in self.graph.adjList(curVertex):
                self.relax(edge, curVertex)

            self.visited[curVertex] = True


    def relax(self, edge, fromVertex):
        toVertex = edge.otherEndPoint(fromVertex)
        if (self.distTo[toVertex] > self.distTo[fromVertex] + edge.getWeight()):
            self.distTo[toVertex] = self.distTo[fromVertex] + edge.getWeight()
            self.pq.enqueue(toVertex, self.distTo[toVertex])

    def minDistances(self):
        return self.distTo

class ShortestPath:

    def __init__(self, graph, source):
        self.dijkstra = DijkstraSP(graph, source)

    def findMinDistance(self, destination):
        distTo = self.dijkstra.minDistances()
        return distTo[destination]

class NewRailwayConstructor:

    def __init__(self, stations, inputList):
        # Construct a weighted graph in matrix format
        self.inputList = inputList
        self.stations = stations
        self.graphMatrix = []
        self.graphMatrix.append([0]*(len(self.inputList) + 1))
        for i in range(0, len(self.inputList)):
            self.graphMatrix.append([0])
        print(DataFrame(self.graphMatrix))
        for i in range(0, len(self.inputList)):
            for j in range(0, len(self.inputList)):
                fromStationValue = self.stations.get(self.inputList[i])
                toStationValue = self.stations.get(self.inputList[j])
                loader = DataLoader()
                dist = loader.distance(fromStationValue[2], fromStationValue[1], toStationValue[2], toStationValue[1])
                self.graphMatrix[i + 1].append(dist)
        print(DataFrame(self.graphMatrix))

    def findNewLine(self):
        generator = self.permutations([i in range (0, len(self.inputList))])
        minDist = float('inf')
        minPath = None
        for permutation in generator:
            print("hi")
            dist = 0
            cur = s
            for i in permutation:
                currentWeight += self.graphMatrix[cur + 1][i + 1]
                cur = i
            currentWeight += self.graphMatrix[k][s]

            if (currentWeight < minDist):
                minDist = currentWeight
                minPath = permutation

        # Generate result from permutation




    def permutations(self, elems):
        if len(elems) <= 1:
            yield elems
        else:
            for permutation in self.permutations(elems[1:]):
                for i in range(len(elems)):
                    yield permutation[:i] + elems[0:1] + permutation[i:]










In [212]:
import csv

class LondonRailwayMapper(AbstractLondonRailwayMapper):
    
    def __init__(self):
        # ADD YOUR CODE HERE
        self.graph = None
        self.stations = None
    
        
    def loadStationsAndLines(self):
        # ADD YOUR CODE HERE
        loader = DataLoader()
        self.graph, self.stations = loader.loadFiles()
    
    

    def minStops(self, fromS, toS):     
        numStops = -1
        # ADD YOUR CODE HERE
        finder = MinStopsBFS(self.graph)
        numStops = finder.minStops(self.stations.get(fromS)[0], self.stations.get(toS)[0])
        
        return numStops    
    
    
    
    def minDistance(self, fromS, toS):
        minDistance = -1.0
        # ADD YOUR CODE HERE
        # In order to implement this requirement I decided to use the Dijkstra (binary heap) 
        # algorithm. This because it is O(E * logV) in typical 
        finder = ShortestPath(self.graph, self.stations.get(fromS)[0])
        minDistance = finder.findMinDistance(self.stations.get(toS)[0])
        
        return minDistance
    
    
    
    
    def newRailwayLine(self, inputList):
        outputList = []
        # ADD YOUR CODE HERE

        newrailwayConstructor = NewRailwayConstructor(self.stations, inputList)
        newrailwayConstructor.findNewLine()
        # REMEMBER TO USE THIS:
        # https://cs.stackexchange.com/questions/43549/what-tsp-variant-doesnt-return-to-start-point
        # You can reduce to a normal TSP variant by adding a dummy city that is distance 0 away from 
        # each of the existing cities. (See also this answer on StackOverflow.)

        
        return outputList

Use the cell below for all python code needed to test the `LondonRailwayMapper` class above.

In [213]:
import timeit

# ADD YOUR TEST CODE HERE

mapper = LondonRailwayMapper()
mapper.loadStationsAndLines()
# dict = mapper.stations

stops0 = mapper.minStops("Green Park", "Covent Garden")
stops1 = mapper.minStops("Covent Garden", "Green Park")
stops2 = mapper.minStops("Willesden Junction", "Upminster")
stops3 = mapper.minStops("Upminster", "Willesden Junction")
stops4 = mapper.minStops("Upminster", "Upminster")

dist0 = mapper.minDistance("Green Park", "Covent Garden")
dist1 = mapper.minDistance("Covent Garden", "Green Park")
dist2 = mapper.minDistance("Willesden Junction", "Upminster")
dist3 = mapper.minDistance("Upminster", "Willesden Junction")
dist4 = mapper.minDistance("Upminster", "Upminster")
dist5 = mapper.minDistance("Upminster", "Centrale")

mapper.newRailwayLine(["Upminster", "Willesden Junction", "Centrale"])
# print("Stops:")

# print(stops0, stops1, stops2, stops3, stops4)
# print(dist0, dist1, dist2, dist3, dist4, dist5)

# starttime = timeit.default_timer()
# for i in range(0, 10000):
#     dict.get("Covent Garden")
# endtime = timeit.default_timer()
# print(endtime - starttime)


<__main__.WeightedGraph object at 0x00000200E7C6A910>
   0    1    2    3
0  0  0.0  0.0  0.0
1  0  NaN  NaN  NaN
2  0  NaN  NaN  NaN
3  0  NaN  NaN  NaN
   0          1          2          3
0  0   0.000000   0.000000   0.000000
1  0   0.000000  34.378079  31.933420
2  0  34.378079   0.000000  19.920606
3  0  31.933420  19.920606   0.000000
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi


[]

The cell below exemplifies the test code I will invoke on your submission. Do NOT modify it. 

In [214]:
# DO NOT MODIFY THIS CELL

import timeit

testMapper = LondonRailwayMapper()

#
# testing the loadStationsAndLines() API 
#
starttime = timeit.default_timer()
testMapper.loadStationsAndLines()
endtime = timeit.default_timer()
print("\nExecution time to load:", round(endtime-starttime,3))

#
# testing the minStops() and minStops() API on a sample of from/to station pairs  
#
fromList = ["Baker Street", "Epping", "Canonbury", "Vauxhall"]
toList = ["North Wembley", "Belsize Park", "Balham", "Leytonstone"]

for i in range(len(fromList)):
    starttime = timeit.default_timer()
    stops = testMapper.minStops(fromList[i], toList[i])
    endtime = timeit.default_timer()
    print("\nExecution time minStops:", round(endtime-starttime,3))

    starttime = timeit.default_timer()
    dist = testMapper.minStops(fromList[i], toList[i])
    endtime = timeit.default_timer()
    print("Execution time minDistance:", round(endtime-starttime,3))

    print("From", fromList[i], "to", toList[i], "in", stops, "stops and", dist, "miles")  
    
#
# testing the newRailwayLine() API on a small list of stations  
#
stationsList = ["Queens Park", "Chigwell", "Moorgate", "Swiss Cottage", "Liverpool Street", "Highgate"]

starttime = timeit.default_timer()
newLine = testMapper.newRailwayLine(stationsList)
endtime = timeit.default_timer()

print("\n\nStation list", stationsList)
print("New station line", newLine)
print("Total track length from", newLine[0], "to", newLine[len(newLine)-1], ":", testMapper.minDistance(newLine[0], newLine[len(newLine)-1]), "miles")
print("Execution time newLine:", round(endtime-starttime,3))

#
# testing the newRailwayLine() API on a big list of stations  
#
stationsList = ["Abbey Road", "Barbican", "Bethnal Green", "Cambridge Heath", "Covent Garden", "Dollis Hill", "East Finchley", "Finchley Road and Frognal", "Great Portland Street", "Hackney Wick", "Isleworth", "Kentish Town West", "Leyton", "Marble Arch", "North Wembley", "Old Street", "Pimlico", "Queens Park", "Richmond", "Shepherds Bush", "Tottenham Hale", "Uxbridge", "Vauxhall", "Wapping"]

starttime = timeit.default_timer()
newLine = testMapper.newRailwayLine(stationsList)
endtime = timeit.default_timer()

print("\n\nStation list", stationsList)
print("New station line", newLine)
print("Total track length from", newLine[0], "to", newLine[len(newLine)-1], ":", testMapper.minDistance(newLine[0], newLine[len(newLine)-1]), "miles")
print("Execution time newLine:", round(endtime-starttime,3))

<__main__.WeightedGraph object at 0x00000200E7066F70>

Execution time to load: 0.013

Execution time minStops: 0.003
Execution time minDistance: 0.003
From Baker Street to North Wembley in 6 stops and 6 miles

Execution time minStops: 0.003
Execution time minDistance: 0.004
From Epping to Belsize Park in 17 stops and 17 miles

Execution time minStops: 0.002
Execution time minDistance: 0.003
From Canonbury to Balham in 10 stops and 10 miles

Execution time minStops: 0.003
Execution time minDistance: 0.003
From Vauxhall to Leytonstone in 6 stops and 6 miles
   0    1    2    3    4    5    6
0  0  0.0  0.0  0.0  0.0  0.0  0.0
1  0  NaN  NaN  NaN  NaN  NaN  NaN
2  0  NaN  NaN  NaN  NaN  NaN  NaN
3  0  NaN  NaN  NaN  NaN  NaN  NaN
4  0  NaN  NaN  NaN  NaN  NaN  NaN
5  0  NaN  NaN  NaN  NaN  NaN  NaN
6  0  NaN  NaN  NaN  NaN  NaN  NaN
   0          1          2          3          4          5          6
0  0   0.000000   0.000000   0.000000   0.000000   0.000000   0.000000
1  0   0.000000 

IndexError: list index out of range