In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx


In [4]:


df = pd.read_csv('tubedata.csv', 
                 names=['start','end','tube_line','average_time','zone1','zone2'],
                header=None)
df


Unnamed: 0,start,end,tube_line,average_time,zone1,zone2
0,Harrow & Wealdstone,Kenton,Bakerloo,3,5,0
1,Kenton,South Kenton,Bakerloo,2,4,0
2,South Kenton,North Wembley,Bakerloo,2,4,0
3,North Wembley,Wembley Central,Bakerloo,2,4,0
4,Wembley Central,Stonebridge Park,Bakerloo,3,4,0
...,...,...,...,...,...,...
369,Victoria,Pimlico,Victoria,3,1,0
370,Pimlico,Vauxhall,Victoria,1,1,0
371,Vauxhall,Stockwell,Victoria,3,1,2
372,Stockwell,Brixton,Victoria,2,2,0


In [15]:
def step_dictionary(df):
    station_dict = {}
    zone_dict = {}

    # get data row by row
    for index, row in df.iterrows():
    
        start_station = row[0]
        end_station = row[1]
        act_cost = int(row[3])

        zone1 = row[4]
        zone2 = row[5]

        # station dictionary of child station tuples (child_name, cost from parent to the child)
        # {"Mile End": [("Stepney Green", 2), ("Wembley", 1)]}
        
        # Add entry for start_station if not present
        if start_station not in station_dict:
            station_dict[start_station] = []
        
        # Add entry for end_station if not present
        if end_station not in station_dict:
            station_dict[end_station] = []

        station_dict[start_station].append((end_station, act_cost))
        station_dict[end_station].append((start_station, act_cost))  # add the other direction of the tube "step"

        # add the main zone
        if start_station not in zone_dict:
            zone_dict[start_station] = set()
        zone_dict[start_station].add(zone1)

        # add the secondary zone
        if end_station not in zone_dict:
            zone_dict[end_station] = set()

        if zone2 != "0":
            zone_dict[start_station].add(zone2)
            # if the secondary zone is not 0 it's the main zone for the ending station
            zone_dict[end_station].add(zone2)
        else:
            # otherwise the main zone for the ending station is the same as for the starting station
            zone_dict[end_station].add(zone1)

    return station_dict, zone_dict

In [10]:
station_dict, zone_dict = step_dictionary(df)
station_dict['Epping']

[('Theydon Bois', 2)]

In [16]:
# Create lookup dictionary with integer indices
# station_names = list(df['start'].unique()) + list(df['end'].unique())
# station_names = list(set(station_names))
station_names = list(station_dict.keys())

inv_station_dict = {}
for i, station in enumerate(station_names):
    inv_station_dict[station] = i


station_dict, zone_dict = step_dictionary(df)

# Use lookup dictionary to create adjacency matrix
n = len(station_names)  
adj_matrix = [[0 for _ in range(n)] for _ in range(n)]
for station in station_dict:
    for child, cost in station_dict[station]:
        adj_matrix[inv_station_dict[station]][inv_station_dict[child]] = cost

In [None]:
G = nx.from_numpy_matrix(np.array(adj_matrix))
plt.figure(figsize=(30,30))
labels = {i: station_names[i] for i in range(n)}
nx.draw(G, with_labels=True, labels=labels)
plt.show()

In [27]:
# Breadth First Search

def bfs(start, end, inv_station_dict, adj_matrix):
    queue = [(start, [start])]
    visited = set()
    while queue:
        (vertex, path) = queue.pop(0)
        if vertex not in visited:
            if vertex == end:   # GOAL-TEST
                return path
            visited.add(vertex)
            for child, _ in enumerate(adj_matrix[inv_station_dict[vertex]]):
                if adj_matrix[inv_station_dict[vertex]][child] != 0:
                    queue.append((station_names[child], path + [station_names[child]]))

    return []               # Retun failure


# Depth First Search

def dfs(start, end, inv_station_dict, adj_matrix):
    stack = [(start, [start])]                # A stack is a FIFO queue
    visited = set()
    while stack:
        (vertex, path) = stack.pop()
        if vertex not in visited:       
            if vertex == end:                 # GOAL-TEST
                return path
            visited.add(vertex)
            for child, _ in enumerate(adj_matrix[inv_station_dict[vertex]]):
                if adj_matrix[inv_station_dict[vertex]][child] != 0:
                    stack.append((station_names[child], path + [station_names[child]]))

    return []           # Retun failure


# Uniform Cost Search  
def ucs(start, end, inv_station_dict, adj_matrix):
    queue = [(0, start, [start])]
    visited = set()
    while queue:
        (vertex, path, cost) = queue.pop(0)
        if vertex not in visited:
            if vertex == end:
                return path
            visited.add(vertex)
            for child, child_cost in enumerate(adj_matrix[inv_station_dict[vertex]]):
                if child_cost != 0:         # child_cost = 0 indicates no connection
                    queue.append((station_names[child], path + [station_names[child]], cost + child_cost))
            queue.sort(key=lambda x: x[0])          # Sort queue by cost

    return []

In [33]:
dfs('Mile End', 'Epping', inv_station_dict, adj_matrix)

['Mile End',
 'Bow Road',
 'Bromley-by-Bow',
 'West Ham',
 'Stratford',
 'Leyton',
 'Leytonstone',
 'Snaresbrook',
 'South Woodford',
 'Woodford',
 'Buckhurst Hill',
 'Loughton',
 'Debden',
 'Theydon Bois',
 'Epping']

In [35]:
ucs('Mile End', 'Epping', inv_station_dict, adj_matrix)

['Mile End',
 'Stratford',
 'Leyton',
 'Leytonstone',
 'Snaresbrook',
 'South Woodford',
 'Woodford',
 'Buckhurst Hill',
 'Loughton',
 'Debden',
 'Theydon Bois',
 'Epping']