In [1]:
import networkx as nx
import matplotlib.pyplot as plt

import pandas as pd
from operator import itemgetter

In [2]:
# Load the data
data = pd.read_csv("london_flows.csv")

In [3]:
# Print data (first five rows)
data.head()

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance
0,Abbey Road,Bank and Monument,0,599,78549,8131.525097
1,Abbey Road,Beckton,1,599,442,8510.121774
2,Abbey Road,Blackwall,3,599,665,3775.448872
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422
4,Abbey Road,Canning Town,37,599,15428,2228.923167


## Part 1: London’s underground resilience

### I. Topological network

#### I.1. Centrality measures

In [4]:
# Create graph object
G = nx.from_pandas_edgelist(data, 'station_origin', 'station_destination')

#### Degree centrality

In [5]:
#  We can calculate the degree centrality using networkx function:我们可以使用 networkx 任数计算集中度：
deg_london =nx.degree_centrality(G)
nx.set_node_attributes(G,dict(deg_london),'degree')

In [6]:
# To dataframe using the nodes as the index以节点为索引的数据框
df = pd.DataFrame(index=G.nodes())
#df['station_name'] = pd.Series(nx.get_node_attributes(G, 'station_name'))
df['degree'] = pd.Series(nx.get_node_attributes(G, 'degree'))

df_sorted = df.sort_values(["degree"], ascending=False)
df_sorted[0:10]

Unnamed: 0,degree
Stratford,0.922111
Highbury & Islington,0.806533
Whitechapel,0.781407
West Brompton,0.776382
Canada Water,0.771357
Canary Wharf,0.771357
Liverpool Street,0.768844
Bank and Monument,0.766332
Richmond,0.766332
Canning Town,0.763819


#### Betweenness centrality

In [7]:
#  We can calculate the betweenness centrality using networkx function
bet_london =nx.betweenness_centrality(G)
nx.set_node_attributes(G,dict(bet_london),'betweenness')

In [8]:
# To dataframe using the nodes as the index以节点为索引的数据框
df = pd.DataFrame(index=G.nodes())
#df['station_name'] = pd.Series(nx.get_node_attributes(G, 'station_name'))
df['betweenness'] = pd.Series(nx.get_node_attributes(G, 'betweenness'))

df_sorted = df.sort_values(["betweenness"], ascending=False)
df_sorted[0:10]

Unnamed: 0,betweenness
Stratford,0.098553
Liverpool Street,0.034307
Canary Wharf,0.027956
Bank and Monument,0.027956
Canning Town,0.027757
West Ham,0.024551
Highbury & Islington,0.023023
Whitechapel,0.019682
Canada Water,0.017898
Shadwell,0.01707


#### Closeness centrality

In [9]:
#  We can calculate the betweenness centrality using networkx function
clos_london =nx.closeness_centrality(G)
nx.set_node_attributes(G,dict(clos_london),'closeness')

In [10]:
# To dataframe using the nodes as the index以节点为索引的数据框
df = pd.DataFrame(index=G.nodes())
#df['station_name'] = pd.Series(nx.get_node_attributes(G, 'station_name'))
df['closeness'] = pd.Series(nx.get_node_attributes(G, 'closeness'))

df_sorted = df.sort_values(["closeness"], ascending=False)
df_sorted[0:10]

Unnamed: 0,closeness
Stratford,0.927739
Highbury & Islington,0.836134
Whitechapel,0.820619
West Brompton,0.817248
Canada Water,0.813906
Richmond,0.810591
Canary Wharf,0.810591
Bank and Monument,0.810591
Liverpool Street,0.808943
Canning Town,0.808943


#### I.2. Impact measures

In [11]:
import networkx as nx

In [12]:
def network_properties(G):
    connected = nx.is_connected(G)
    avg_shortest_path = nx.average_shortest_path_length(G) if connected else None
    efficiency = nx.global_efficiency(G)
    return connected, avg_shortest_path, efficiency

In [13]:
# Calculate initial network properties
connected, avg_shortest_path, efficiency = network_properties(G)
print("Initial Network Properties:")
print(f"Connected: {connected}")
#print(f"Average Shortest Path Length: {avg_shortest_path}")
print(f"Global Efficiency: {efficiency}")

Initial Network Properties:
Connected: True
Global Efficiency: 0.697696922372896


In [14]:
# Analyze the impact of removing a station
station_to_remove = 'Stratford'  # Replace with the station you want to remove
#复制一个新的G
G_removed = G.copy()
G_removed.remove_node(station_to_remove)

In [15]:
# Calculate network properties after removing the station
connected_removed, avg_shortest_path_removed, efficiency_removed = network_properties(G_removed)
print("\nNetwork Properties after removing the station:")
print(f"Connected: {connected_removed}")
#print(f"Average Shortest Path Length: {avg_shortest_path_removed}")
print(f"Global Efficiency: {efficiency_removed}")


Network Properties after removing the station:
Connected: True
Global Efficiency: 0.6922057812149586


In [16]:
# Compare the results
print("\nImpact of removing the station:")
print(f"Connected: {connected_removed == connected}")
#print(f"Average Shortest Path Length: {avg_shortest_path_removed - avg_shortest_path if connected_removed else 'N/A'}")
print(f"Global Efficiency: {efficiency_removed - efficiency   }")


Impact of removing the station:
Connected: True
Global Efficiency: -0.005491141157937474


Explanation of results:

Robustness:
Connected (True/False) - This indicator represents the connectivity of the network after removing a station. If the network remains connected (True), it suggests that the network is robust, as it can maintain its connectivity despite the removal of a node.

Efficiency:
Average Shortest Path Length - This metric indicates the average shortest path length between all pairs of stations in the network. A shorter average path length typically corresponds to a more efficient network, as it implies that stations can be reached more quickly on average.
Global Efficiency - This measure represents the average inverse shortest path length. A higher global efficiency value indicates a more efficient network, as it means that the network is better at facilitating traffic flow between stations.

Reson for they could also be used to evaluate the resilience of any other network:

Connectivity (Robustness):
Connectivity is a global measure because it evaluates the network as a whole, assessing whether there is a path between every pair of nodes in the network. When the network remains connected after removing a node, it indicates that the network can still maintain its overall structure and function. This measure is not specific to any particular node or link but rather describes the property of the entire network.

Average Shortest Path Length (Efficiency):
This measure is a global metric because it computes the average of the shortest paths between all pairs of nodes in the network. It does not focus on a single node or link but considers the overall efficiency of the network in terms of how quickly nodes can be reached from one another.

Global Efficiency (Efficiency):
Global efficiency is the average of the inverse shortest path lengths between all pairs of nodes in the network. It is a global measure because it considers the entire network's ability to facilitate traffic flow betw