In [1]:
import preprocessing.Preprocessing as pp
import classes.transportnetwork as tn
from visualisation.visualisation import *
from characterisation.degree import *
from characterisation.centrality import *
from characterisation.distance import *
from robustness_analysis.robustness import *
from ML.embedding import *
from clustering.cluster import *
from GNN.data import *
from GNN.model import *
from GNN.run import *

### Create the network from a GTFS file

In [2]:
G = pp.create_network_from_GTFS('data/gtfs_3')

Network creation: 


100%|██████████| 144634/144634 [00:30<00:00, 4765.46it/s]


### Create the transport network object

In [3]:
TN = tn.TransportNetwork(G, pos_argument=["lon", "lat"], time_arguments=["departure_time", "arrival_time"])

In [4]:
print(TN)

Graph type: <class 'networkx.classes.graph.Graph'>
- Number of nodes: 613
 |--- lon
 |--- lat
- Number of edges: 776
 |--- distance
 |--- route_id
 |--- trip_id
 |--- arrival_time
 |--- departure_time
 |--- euclidian_distance
Graph type: <class 'networkx.classes.digraph.DiGraph'>
- Number of nodes: 613
 |--- lon
 |--- lat
- Number of edges: 1333
 |--- distance
 |--- route_id
 |--- trip_id
 |--- arrival_time
 |--- departure_time
 |--- euclidian_distance
Graph type: <class 'networkx.classes.multigraph.MultiGraph'>
- Number of nodes: 613
 |--- lon
 |--- lat
- Number of edges: 81868
 |--- distance
 |--- route_id
 |--- trip_id
 |--- arrival_time
 |--- departure_time
 |--- euclidian_distance
Graph type: <class 'networkx.classes.multidigraph.MultiDiGraph'>
- Number of nodes: 613
 |--- lon
 |--- lat
- Number of edges: 140098
 |--- distance
 |--- route_id
 |--- trip_id
 |--- arrival_time
 |--- departure_time
 |--- euclidian_distance



### Visualize the network

In [None]:
map_network(TN)

In [None]:
map_dynamic_network(TN, step=100)

### Characteristics of the network

#### Degree analysis

In [None]:
degree_analysis = compute_node_degree_analysis(TN, data=False)
degree_analysis

In [None]:
degree_analysis = compute_node_degree_analysis(TN, data=True)
degree_analysis

In [None]:
plot_distribution_degree_analysis(TN)

#### Centrality analysis

In [None]:
plot_centrality_analysis(TN)

In [None]:
map_centrality_analysis(TN)

### Robustness analysis

In [None]:
plot_robustness_analysis(TN, precision=0.01)

In [None]:
map_robustness_analysis(TN)

### Machine learning pipeline to cluster the network

In [None]:
gw = GraphWave()

emb_df = gw.get_embedding_df(TN.get_higher_complexity())

clusters_dct = get_clusters(gw.get_embedding_df(TN.get_higher_complexity()), type='kmeans', embedding=emb_df, k=4)

plot_clusters_embedding(emb_df, clusters_dct)
map_clusters(TN, clusters_dct)

### GNN pipeline to cluster the network

In [None]:
args = {
    "node_features" : ["one_hot"], # choices are ["degree_one_hot", "one_hot", "constant", "pagerank", "degree", "betweenness", "closeness", "eigenvector", "clustering", "position", "distance"]
    "node_attrs" : None,
    "edge_attrs" : ["dep_time"], # choices are ["distance", "dep_time", "arr_time"]
    "train_ratio" : 0.8,
    "val_ratio" : 0.1,

    "device" : torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    "model" : "gat", # choices are ["gcn", "gin", "gat", "sage"]
    "layers" : 2,
    "hidden_channels" : 128,
    "dim_embedding" : 64,
    "save" : "ssl_model.pth",

    "lr" : 0.001,
    "epochs" : 200,
    "num_workers" : 4,

    "loss" : "infonce",
    "augment_list" : ["edge_perturbation", "node_dropping"],
}

args = AttributeDict(args)

#### Create the dataset

In [None]:
args = {
    "node_features" : ["position"], # choices are ["degree_one_hot", "one_hot", "constant", "pagerank", "degree", "betweenness", "closeness", "eigenvector", "clustering", "position", "distance"]
    "edge_attrs" : ["departure_time"], # choices are ["distance", "dep_time", "arr_time"]
    "train_ratio" : 0.8,
    "val_ratio" : 0.2,

    "layers" : 2,
    "model": "gat",  # choices are ["gcn", "gin", "gat", "sage"]

    "lr" : 0.001,
    "epochs" : 200,

    "loss" : "infonce",
    "augment_list" : ["edge_perturbation", "node_dropping"],
}



args = GNNConfig(args)

In [None]:
#Create data
data = create_data_from_transport_network(TN.graph, TN, args)

ssl_model = SSL_GNN(data.num_node_features, args).to(args.device)


# Move data to device
data = data.to(args.device)

# Create the optimizer
optimizer = torch.optim.Adam(ssl_model.parameters(), lr=args.lr)

# Train model
train_self_supervised(data, ssl_model, optimizer, args)

In [None]:
emb = get_graph_embedding(data, ssl_model)

comm_dct = get_clusters(emb, type='kmeans', embedding=emb, k=22)

plot_tsne_embedding(emb, node_cluster=comm_dct)
map_weighted_network(TN, custom_node_weigth=comm_dct, edge_weigth=False, scale=2, node_size=5, discrete_color=True)