https://www.earthdatascience.org/tutorials/introduction-to-leaflet-animated-maps/

https://medium.com/@kumartan1912/spatial-visualization-folium-maps-python-43c3bc150603
    
https://www.analyticsvidhya.com/blog/2020/06/guide-geospatial-analysis-folium-python/

https://www.doogal.co.uk/london_stations.php

https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads

https://www.gov.uk/guidance/about-the-price-paid-data

https://www.freemaptools.com/download-uk-postcode-lat-lng.htm

https://towardsdatascience.com/geocode-with-python-161ec1e62b89

https://www.doogal.co.uk/

In [1]:
import os
import webbrowser

import folium
from folium import plugins
from folium.plugins import HeatMap

import numpy as np
import pandas as pd
import networkx as nx 

from copy import copy, deepcopy
from collections import OrderedDict

import matplotlib
from branca.element import Figure

pd.set_option('max_colwidth', None)

import warnings
warnings.filterwarnings("ignore")

### Configurations

In [2]:
in_dir             = 'input'
out_dir            = 'output'
transition_time    = 6
station_stop_time  = 0.7
wait_time_discount = 0.5

### Input Data - Nodes and connections

In [3]:
infile = os.path.join(in_dir, 'Tube_station_coordinates.csv')
tube_df = pd.read_csv(infile)
tube_df.head()

Unnamed: 0,Station,Zone,Postcode,Latitude,Longitude,Easting,Northing
0,Abbey Road,3,E15 3NB,51.531952,0.003723,539081,183352
1,Abbey Wood,4,SE2 9RH,51.490784,0.120272,547297,179002
2,Acton Central,2,W3 6BH,51.508758,-0.26343,520613,180299
3,Acton Main Line,3,W3 9EH,51.516887,-0.26769,520296,181196
4,Acton Town,3,W3 8HN,51.503071,-0.280303,519457,179639


In [4]:
infile = os.path.join(in_dir, 'Tube_connections_time.csv')

tube_connections_df = pd.read_csv(infile)
tube_connections_df.rename(columns={'Travel_time':'Time'}, inplace=True)
tube_connections_df['Time'] += station_stop_time

tube_connections_df.head()

Unnamed: 0,Station,Connection,Time
0,Chesham (MET),Chalfont and Latimer (MET),6.7
1,Amersham (MET),Chalfont and Latimer (MET),3.7
2,Watford (MET),Croxley (MET),2.7
3,Croxley (MET),Moor Park (MET),2.7
4,Chalfont and Latimer (MET),Chorleywood (MET),3.7


In [5]:
tube_connections_reversed_df = pd.DataFrame()
tube_connections_reversed_df['Station'] = tube_connections_df['Connection']
tube_connections_reversed_df['Connection'] = tube_connections_df['Station']
tube_connections_reversed_df['Time'] = tube_connections_df['Time']

tube_connections_reversed_df.head()

Unnamed: 0,Station,Connection,Time
0,Chalfont and Latimer (MET),Chesham (MET),6.7
1,Chalfont and Latimer (MET),Amersham (MET),3.7
2,Croxley (MET),Watford (MET),2.7
3,Moor Park (MET),Croxley (MET),2.7
4,Chorleywood (MET),Chalfont and Latimer (MET),3.7


In [6]:
infile = os.path.join(in_dir, 'Tube_stations_waiting_time.csv')
tube_stations_df = pd.read_csv(infile)
tube_stations_df.rename(columns={'Wait_time':'Time'}, inplace=True)
tube_stations_df['Time'] = wait_time_discount * tube_stations_df['Time'] + transition_time #Take train waiting time into account here
tube_stations_df.head()

Unnamed: 0,Station,Connection,Time
0,Chesham,Chesham (MET),21.0
1,Amersham,Amersham (MET),13.5
2,Watford,Watford (MET),12.0
3,Croxley,Croxley (MET),12.0
4,Chalfont and Latimer,Chalfont and Latimer (MET),13.5


In [7]:
len(tube_stations_df['Station'].unique())

340

In [8]:
tube_stations_reversed_df = pd.DataFrame()
tube_stations_reversed_df['Station'] = tube_stations_df['Connection']
tube_stations_reversed_df['Connection'] = tube_stations_df['Station']
tube_stations_reversed_df['Time'] = 0
# tube_stations_reversed_df['Time'] = tube_stations_df['Time']
tube_stations_reversed_df.head()

Unnamed: 0,Station,Connection,Time
0,Chesham (MET),Chesham,0
1,Amersham (MET),Amersham,0
2,Watford (MET),Watford,0
3,Croxley (MET),Croxley,0
4,Chalfont and Latimer (MET),Chalfont and Latimer,0


In [9]:
df = pd.DataFrame()

dfs = [tube_connections_df, tube_connections_reversed_df,
       tube_stations_df, tube_stations_reversed_df,
      ]

for _df in dfs:
    df = pd.concat([df, _df], sort=False)

In [10]:
df.head()

Unnamed: 0,Station,Connection,Time
0,Chesham (MET),Chalfont and Latimer (MET),6.7
1,Amersham (MET),Chalfont and Latimer (MET),3.7
2,Watford (MET),Croxley (MET),2.7
3,Croxley (MET),Moor Park (MET),2.7
4,Chalfont and Latimer (MET),Chorleywood (MET),3.7


### Build Directional Graph

In [11]:
G = nx.DiGraph()

for (start, end, time) in zip(df.Station.values, df.Connection.values, df.Time.values):        
    G.add_edge(start, end, length=float(time))    

In [18]:
def print_path_and_length(G, source, target):
    path   = nx.dijkstra_path(G, source, target, weight='length')
    length = nx.dijkstra_path_length(G, source, target, weight='length')
    
    for S, T in zip(path[:-1], path[1:]):
        _S, _T = nx.dijkstra_path(G, S, T, weight='length')
        _time = nx.dijkstra_path_length(G, S, T, weight='length')    
        print(f"{_S :<30} -->    {_T :<30} in {_time :<5}")   
        
    print(f"Total time: {length :.1f} minutes")

In [19]:
print_path_and_length(G, 'Liverpool Street', "Moorgate")

Liverpool Street               -->    Liverpool Street (MET)         in 8.0  
Liverpool Street (MET)         -->    Moorgate (MET)                 in 1.7  
Moorgate (MET)                 -->    Moorgate                       in 0.0  
Total time: 9.7 minutes


In [20]:
print_path_and_length(G, "Cyprus", "Piccadilly Circus")

Cyprus                         -->    Cyprus (DLR)                   in 9.0  
Cyprus (DLR)                   -->    Beckton Park (DLR)             in 1.7  
Beckton Park (DLR)             -->    Royal Albert (DLR)             in 2.7  
Royal Albert (DLR)             -->    Prince Regent (DLR)            in 1.7  
Prince Regent (DLR)            -->    Custom House (DLR)             in 2.7  
Custom House (DLR)             -->    Royal Victoria (DLR)           in 1.7  
Royal Victoria (DLR)           -->    Canning Town (DLR)             in 2.7  
Canning Town (DLR)             -->    Canning Town                   in 0.0  
Canning Town                   -->    Canning Town (JUB)             in 7.5  
Canning Town (JUB)             -->    North Greenwich (JUB)          in 1.7  
North Greenwich (JUB)          -->    Canary Wharf (JUB)             in 1.7  
Canary Wharf (JUB)             -->    Canada Water (JUB)             in 2.7  
Canada Water (JUB)             -->    Bermondsey (JUB)          

In [21]:
print_path_and_length(G, 'Leyton', "Liverpool Street")
# print_path_and_length(G, "Liverpool Street", 'Northwood')

Leyton                         -->    Leyton (CEN)                   in 7.0  
Leyton (CEN)                   -->    Stratford (CEN)                in 1.7  
Stratford (CEN)                -->    Mile End (CEN)                 in 3.7  
Mile End (CEN)                 -->    Bethnal Green (CEN)            in 1.7  
Bethnal Green (CEN)            -->    Liverpool Street (CEN)         in 1.7  
Liverpool Street (CEN)         -->    Liverpool Street               in 0.0  
Total time: 15.8 minutes


### Get Shortest Travel Time from Source

In [22]:
tube_stations = tube_stations_df['Station'].unique()

In [23]:
def get_single_source_dijkstra(G, source):
    ssd = nx.single_source_dijkstra(G, source, weight='length')
    return ssd[0] 

In [24]:
def get_single_source_dijkstra(G, source):
    travel_time = dict()
    for station in tube_stations:
        travel_time[station] = nx.dijkstra_path_length(G, station, source, weight='length')        
    return travel_time

In [25]:
travel_time = get_single_source_dijkstra(G, 'Liverpool Street')

In [26]:
%%time
width, height = (950, 580)

centre = 'Liverpool Street'
loc = tube_df[tube_df.Station==centre][['Latitude', 'Longitude']].values

m1 = folium.Map(width=width, height=height, location=loc,
                zoom_start=10, min_zoom=10, max_zoom=16)

# Add markers
locs = tube_df[['Latitude', 'Longitude']].values
logo_url = os.path.join(in_dir, 'tube_logo.png')

for loc in locs:     
    folium.Marker(location=loc, icon=folium.features.CustomIcon(logo_url, icon_size=(14, 13))).add_to(m1)


CPU times: user 84.6 ms, sys: 32.1 ms, total: 117 ms
Wall time: 125 ms


In [27]:
# m1

In [28]:
# filepath = 'folium_tube_map_logo.html'
# m1.save(filepath)

### Heatmap ref:
https://www.w3schools.com/colors/colors_picker.asp

In [29]:
def heatmap(minimum, maximum, value):
    minimum, maximum = float(minimum), float(maximum)    
    x = (value - minimum) / (maximum - minimum)

    if x <= 0.25:
#         r, g, b = 0, 4*x, 1 
        r, g, b = 0, (4*x)**1.1, 1 
    elif x <= 0.5:
        r, g, b = 0, 1, 2-4*x        
    else:    
        r, g, b = 1, 2-2*x, 0

    return r, g, b

In [30]:
def heatmap(minimum, maximum, value):
    minimum, maximum = float(minimum), float(maximum)    
    x = (value - minimum) / (maximum - minimum)
    
    a = 0.5

    if x <= 0.25:
        r, g, b = 0, 4*x, a + 4*(1-a)*x 
    elif x <= 0.5:
        r, g, b = 4*x-1, 1, 2-4*x 
    elif x <= 0.75:
        r, g, b = 1, 3-4*x, 4*x-2
    else:
        r, g, b = (4-3*a)-4*(1-a)*x, 0, 4-4*x        

    return r, g, b

In [31]:
def get_journey_time_df(source):
    travel_time = get_single_source_dijkstra(G, source)
    tmp = []
    for station in tube_df.Station.values:
        if station in travel_time:
            tmp.append(travel_time[station])
        else:
            tmp.append(np.nan)
            
    tmp_df = tube_df[['Latitude', 'Longitude']]
    tmp_df['journey_time'] = tmp
    tmp_df = tmp_df.sort_values(by=['journey_time'], ascending=False)
    
    return tmp_df, travel_time

In [32]:
def get_tube_lines_coords():
    """ Return a list of segments coord in random order for a line, 
        eg [[Northwood coord, Moor Park coord], [North Harrow coord, Pinner coord]]"
    """
    tube_lines = ['MET', 'DIS', 'HAM', 'PIC', 'BAK', 'CEN', 'CIR', 
                  'JUB', 'NOR', 'VIC', 'WAT', 'OGD', 'DLR']
    
    lines = OrderedDict()
    
    for line in tube_lines:
        lines[line] = []

    A = tube_connections_df.Station.values
    B = tube_connections_df.Connection.values    
    
    for s1, s2 in zip(A, B):
        station1, line1 = s1[:-6], s1[-5:][1:-1].upper()
        station2, line2 = s2[:-6], s2[-5:][1:-1].upper()
    
        if station1 in tube_df.Station.values:
            coord1 = tube_df[tube_df.Station==station1][['Latitude', 'Longitude']].values.tolist()[0]
        else:
            print(f"{station1} not in tube_df")
            coord1 = None
        
        if station2 in tube_df.Station.values:
            coord2 = tube_df[tube_df.Station==station2][['Latitude', 'Longitude']].values.tolist()[0]
        else:
            print(f"{station2} not in tube_df")
            coord2 = None
        
        if coord1 and coord2:
            lines[line1].append([[coord1, coord2]])
        
    return lines
    

In [33]:
lines = get_tube_lines_coords()

In [34]:
def get_journey_time_graph(source):
            
    #####################################
    # Base Map 
    #####################################
#     width, height = (950, 580)
#     width, height = (1920, 960)
    
    centre = "King's Cross"
    loc = tube_df[tube_df.Station==centre][['Latitude', 'Longitude']].values

    m = folium.Map(location=loc, zoom_start=11, min_zoom=10, max_zoom=16)

    #####################################
    # Adding Feature Groups
    #####################################
    fg1 = folium.FeatureGroup(name='Journey Time', show=True)
    m.add_child(fg1)
    
    fg2 = folium.FeatureGroup(name='Tube lines', show=True)
    m.add_child(fg2)
    
    folium.LayerControl().add_to(m)

    #####################################
    # Journey Time markers 
    #####################################
    journey_time_df, travel_time = get_journey_time_df(source)
            
    data = []
    for x, y, t in journey_time_df.values:    
        if not np.isnan(t):
            data.append([x, y, t])
            
    maxval = np.nanmax(journey_time_df.journey_time.values)
    
    for [x, y, t] in data:
        color = heatmap(0, maxval, t)
        color = matplotlib.colors.to_hex(color)

        folium.CircleMarker(location=[x,y],
                            radius=30,                                                                      
                            fill_color=color,
                            color='clear', #clear border                           
                            tooltip=int(np.round(t)),
                            fill_opacity=0.5).add_to(fg1)
        
    #####################################
    # Tube lines 
    #####################################     
    lines_cfg = {'BAK': ('Bakerloo', (0.6, 0.2, 0), 3),
                 'CEN': ('Central', (1, 0, 0), 3),
                 'CIR': ('Circle', (1, 1, 0), 3),
                 'DIS': ('District', (0, 0.6, 0.2), 15),
                 'HAM': ('Hammersmith & City', (1, 0.6, 0.8), 9),
                 'JUB': ('Jubilee', (0.5, 0.5, 0.5), 3),
                 'MET': ('Metropolitan', (0.6, 0, 0.2), 15),
                 'NOR': ('Northern', (0, 0 ,0), 6),
                 'PIC': ('Piccadilly', (0, 0, 0.6), 9),
                 'VIC': ('Victoria', (0.2, 0.6, 1), 3),
                 'WAT': ('Waterloo & City', (0.2, 0.8, 0.8), 3),
                 'OGD': ('Overground', (1, 0.6, 0), 3),
                 'DLR': ('DLR', (0, 0.4, 0.4), 3),             
                 }
    
    for k, line_segments in lines.items(): 
        (name, color, w) = lines_cfg[k]
        color = matplotlib.colors.to_hex(color)
        for segment in line_segments:            
            folium.vector_layers.PolyLine(segment, 
                                          tooltip=f'{name} line', 
                                          color=color, 
                                          weight=w, 
                                          opacity=0.6).add_to(fg2)
    
    return m, travel_time

In [35]:
m, travel_time = get_journey_time_graph('Piccadilly Circus')

In [36]:
m

In [38]:
outfile = os.path.join(out_dir, 'journey_time_map.html')
m.save(outfile)

# Open on browser:
# webbrowser.open(f'file://{os.getcwd()}/{filepath}')

In [37]:
# For sanitary checks:

infile = os.path.join(in_dir, 'Test_stations.csv')
test_df = pd.read_csv(infile)

for station in test_df.Stations.values:
    print(np.round(travel_time[station]))
#     if station in travel_time:
#         print(station, np.round(travel_time[station]))
# #         print(station)
#     else:
#         print(station)

55.0
77.0
63.0
43.0
26.0
58.0
43.0
37.0
19.0
49.0
23.0
19.0
9.0
24.0
43.0
80.0
46.0
41.0
19.0
16.0
54.0
35.0
19.0
34.0
34.0
39.0
38.0
25.0
53.0
28.0
22.0
9.0
27.0
51.0
39.0
25.0
30.0
65.0
49.0
58.0
36.0
31.0
77.0
55.0
47.0
49.0
52.0
44.0
65.0
61.0
43.0
63.0
49.0
31.0
54.0
