In [1]:
import networkx as nx
import pandas as pd
import numpy as np


In [2]:
# Load the data
data = pd.read_csv("london_flows.csv")

In [3]:
# Create graph object
G = nx.from_pandas_edgelist(data, 'station_origin', 'station_destination')

### II.1

#### Weighted betweenness centrality

In [4]:
# Compute betweenness centrality with edge weights
bet_london_w = nx.betweenness_centrality(G, weight='flows',normalized=True)
# We can add these values to the nodes attributes:
nx.set_node_attributes(G,bet_london_w,'betweenness_w')

In [5]:
# To dataframe using the nodes as the index
df = pd.DataFrame(index=G.nodes())
df['betweenness_w'] = pd.Series(nx.get_node_attributes(G, 'betweenness_w'))

df_sorted = df.sort_values(["betweenness_w"], ascending=False)
df_sorted[0:10].head(10)

Unnamed: 0,betweenness_w
Stratford,0.098553
Liverpool Street,0.034307
Canary Wharf,0.027956
Bank and Monument,0.027956
Canning Town,0.027757
West Ham,0.024551
Highbury & Islington,0.023023
Whitechapel,0.019682
Canada Water,0.017898
Shadwell,0.01707


#### Closeness Centrality:

In [6]:
# Calculate the shortest path lengths for all pairs of nodes in the weighted graph
shortest_path_lengths = dict(nx.all_pairs_dijkstra_path_length(G, weight='flows'))

# Compute the weighted closeness centrality for each node
closeness_w = {}
for node, path_lengths in shortest_path_lengths.items():
    total_path_length = sum(path_lengths.values())
    closeness_w[node] = 1 / total_path_length if total_path_length > 0 else 0

# Add the weighted closeness centrality values to the graph nodes as attributes
nx.set_node_attributes(G, closeness_w, 'closeness_w')

# To ataframe using the nodes as the index
df = pd.DataFrame(index=G.nodes())
df['closeness_w'] = pd.Series(nx.get_node_attributes(G, 'closeness_w'))

df_sorted = df.sort_values(["closeness_w"], ascending=False)
df_sorted[0:10]

Unnamed: 0,closeness_w
Stratford,0.002331
Highbury & Islington,0.002101
Whitechapel,0.002062
West Brompton,0.002053
Canada Water,0.002045
Richmond,0.002037
Canary Wharf,0.002037
Bank and Monument,0.002037
Liverpool Street,0.002033
Canning Town,0.002033


#### II.2.

In [14]:
# Create a weighted graph
G_weighted = nx.DiGraph()

In [15]:
# Iterate through the rows in the DataFrame and add edges with weights (passenger flows)
for _, row in data.iterrows():
    origin = row['station_origin']
    destination = row['station_destination']
    flow = row['flows']
    G_weighted.add_edge(origin, destination, weight=1 / (flow if flow > 0 else 1))

In [16]:
# Function to calculate the total passenger weighted shortest path length
def total_passenger_weighted_shortest_path_length(G):
    total = 0
    all_path_lengths = nx.all_pairs_dijkstra_path_length(G, weight='weight')
    
    for source, path_lengths in all_path_lengths:
        for target, length in path_lengths.items():
            if source != target:
                total += length
    
    return total

In [17]:
# Remove self-loops from the graph
self_loops = list(nx.selfloop_edges(G_weighted))
G_weighted.remove_edges_from(self_loops)

In [18]:
# Calculate the original total passenger weighted shortest path length
original_total_weighted_spl = total_passenger_weighted_shortest_path_length(G_weighted)

In [19]:
# Calculate the impact of node removal for the top 10 nodes from the previous step
impact_list = []

for node in df_sorted.index[:10]:
    G_removed = G_weighted.copy()
    G_removed.remove_node(node)
    new_total_weighted_spl = total_passenger_weighted_shortest_path_length(G_removed)
    impact = (new_total_weighted_spl - original_total_weighted_spl) / original_total_weighted_spl
    impact_list.append((node, impact))

In [20]:
# Create a DataFrame to display the results
impact_df = pd.DataFrame(impact_list, columns=['Node', 'Impact'])
impact_df.sort_values('Impact', ascending=False, inplace=True)
impact_df.reset_index(drop=True, inplace=True)
impact_df

Unnamed: 0,Node,Impact
0,Stratford,0.133112
1,Canning Town,0.040794
2,Bank and Monument,0.02704
3,Canada Water,0.019615
4,Liverpool Street,0.016624
5,Whitechapel,0.008734
6,Canary Wharf,0.004836
7,Highbury & Islington,0.002704
8,Richmond,0.001005
9,West Brompton,-0.003374


### II.3

In [21]:
# Top 3 nodes from betweenness centrality
top_3_betweenness = ['Stratford', 'Liverpool Street', 'Canary Wharf']

In [22]:
# Function to calculate the total weighted shortest path length and global efficiency
def total_weighted_shortest_path_length_and_global_efficiency(G):
    total_spl = 0
    total_inv_spl = 0
    num_nodes = len(G.nodes())
    all_path_lengths = nx.all_pairs_dijkstra_path_length(G, weight='weight')
    
    for source, path_lengths in all_path_lengths:
        for target, length in path_lengths.items():
            if source != target:
                total_spl += length
                total_inv_spl += 1 / length
    
    global_efficiency = total_inv_spl / (num_nodes * (num_nodes - 1))
    return total_spl, global_efficiency

In [23]:
# Calculate the original total weighted shortest path length and global efficiency
original_spl, original_ge = total_weighted_shortest_path_length_and_global_efficiency(G_weighted)


In [24]:
# Calculate the impact of node removal for the top 3 nodes from betweenness centrality
impact_list = []

for node in top_3_betweenness:
    G_removed = G_weighted.copy()
    G_removed.remove_node(node)
    new_spl, new_ge = total_weighted_shortest_path_length_and_global_efficiency(G_removed)
    spl_impact = (original_spl - new_spl) / original_spl
    ge_impact = (original_ge - new_ge) / original_ge
    impact_list.append((node, spl_impact, ge_impact))

In [25]:
# Create a DataFrame to display the results
impact_df = pd.DataFrame(impact_list, columns=['Node', 'SPL_Impact', 'Global_Efficiency_Impact'])
impact_df.sort_values('SPL_Impact', ascending=False, inplace=True)
impact_df.reset_index(drop=True, inplace=True)
impact_df

Unnamed: 0,Node,SPL_Impact,Global_Efficiency_Impact
0,Canary Wharf,-0.004836,0.021315
1,Liverpool Street,-0.016624,0.037101
2,Stratford,-0.133112,0.126392


According to the Shortest Path Length (SPL) Impact, the greatest impact on passengers would be caused by the removal of Stratford station, with an impact of -0.133112. This means that the removal of Stratford station would result in a significant increase in the total weighted shortest path length. This suggests that the total travel time for passengers would increase significantly if Stratford station were to close.

On the other hand, the global efficiency impact measure indicates that the removal of Stratford station would also result in the largest increase in global efficiency, with an impact of 0.126392. This suggests that the removal of Stratford station may improve the overall connectivity and efficiency of the network, despite the increase in the total weighted shortest path length.

In summary, the closure of Stratford station is likely to have the greatest impact on passengers under both measures, but the impact of these measures is not consistent. While the closure would lead to an overall increase in travel time, it may also lead to a more efficient and connected network. Policy makers should carefully consider the trade-offs and potential consequences of station closures before taking any action.