In [1]:
from preprocessing import Preprocessing as pp
from classes import transportnetwork as tn
from data import *
from model import *
from run import *
from visualisation.visualisation import *
from characterisation.page_rank import *
from networkx import NetworkXNoPath
from tqdm import tqdm
import multiprocessing as mp
import torch.optim as optim
from sklearn.metrics import roc_auc_score
import pandas as pd


In [2]:
G = pp.create_network_from_trailway("../data/Railway Data_JL.xlsx")
TN = tn.TransportNetwork(G, pos_argument=['lon', 'lat'], time_arguments=['dep_time', 'arr_time'], distance_argument='distance')

graph=TN.get_higher_complexity()

Network creation: 


100%|██████████| 69638/69638 [00:16<00:00, 4238.07it/s]


In [3]:
args = {
    "node_features" : ["degree_one_hot"], # choices are ["degree_one_hot", "one_hot", "constant", "pagerank", "degree", "betweenness", "closeness", "eigenvector", "clustering", "position", "distance"]
    "node_attrs" : None,
    "edge_attrs" : None, # choices are ["distance", "dep_time", "arr_time"]
    "train_ratio" : 0.8,
    "val_ratio" : 0.1,

    "device" : torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    "model" : "gat", # choices are ["gcn", "gin", "gat", "sage"]
    "layers" : 2,
    "hidden_channels" : 128,
    "dim_embedding" : 64,
    "save" : "ssl_model.pth",

    "lr" : 0.001,
    "epochs" : 200,
    "num_workers" : 4,

    "loss" : "infonce",
    "augment_list" : ["edge_perturbation", "node_dropping"],
}

In [4]:
class AttributeDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttributeDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

args = AttributeDict(args)

data = create_data_from_transport_network(graph, TN, node_features=args.node_features, edge_attrs=args.edge_attrs, train_ratio=args.train_ratio, val_ratio=args.val_ratio, num_workers=args.num_workers)

# Open a csv in dataframe
df = pd.read_csv('../playground/charviz/robustness.csv')

# Create a tensor from the dataframe
tensor = torch.tensor(df.values).float()
# Keep only the first column
tensor = tensor[:,0]

data.y = tensor

print(tensor)


import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GNN(torch.nn.Module):
    def __init__(self, num_features, hidden_channels, num_classes):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GNN(data.num_node_features, 64, 1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()


def train(train_data):
    model.train()
    loss_all = 0
    train_data = train_data.to(device)
    optimizer.zero_grad()
    output = model(train_data.x, train_data.edge_index)
    loss = criterion(output[train_data.train_mask], train_data.y[train_data.train_mask])
    loss.backward()
    optimizer.step()
    loss_all += loss.item()
    return loss_all / len(train_data.y)

def validate(val_data):
    model.eval()
    loss_all = 0
    val_data = val_data.to(device)
    output = model(val_data.x, val_data.edge_index)
    loss = criterion(output[val_data.val_mask], val_data.y[val_data.val_mask])
    loss_all += loss.item()
    return loss_all / len(val_data.y)


train_data = data.subgraph(data.train_mask)
print(train_data)
print(train_data.num_nodes)

val_data = data.subgraph(data.val_mask)
print(val_data)
print(val_data.num_nodes)

# Training loop
for epoch in range(400):
    train_loss = train(train_data)
    val_loss = validate(val_data)
    print(f'Epoch: {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

tensor([1512.,  692., 2335.,  ..., 2682., 2657., 2684.])
Data(edge_index=[2, 42082], lon=[2175], lat=[2175], dep_time=[42082], arr_time=[42082], train=[64155], train_max_speed=[64155], day=[64155], distance=[42082], euclidian_distance=[42082], num_nodes=2175, x=[2175, 481], train_mask=[2175], val_mask=[2175], y=[2175])
2175
Data(edge_index=[2, 784], lon=[271], lat=[271], dep_time=[784], arr_time=[784], train=[64155], train_max_speed=[64155], day=[64155], distance=[784], euclidian_distance=[784], num_nodes=271, x=[271, 481], train_mask=[271], val_mask=[271], y=[271])
271


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch: 1, Train Loss: 1220.7713, Validation Loss: 9252.6541
Epoch: 2, Train Loss: 1220.6299, Validation Loss: 9251.4437
Epoch: 3, Train Loss: 1220.4936, Validation Loss: 9250.1863
Epoch: 4, Train Loss: 1220.3433, Validation Loss: 9248.8293
Epoch: 5, Train Loss: 1220.1801, Validation Loss: 9247.3293
Epoch: 6, Train Loss: 1219.9959, Validation Loss: 9245.6356
Epoch: 7, Train Loss: 1219.8030, Validation Loss: 9243.8081
Epoch: 8, Train Loss: 1219.5852, Validation Loss: 9241.8367
Epoch: 9, Train Loss: 1219.3493, Validation Loss: 9239.7168
Epoch: 10, Train Loss: 1219.1044, Validation Loss: 9237.4419
Epoch: 11, Train Loss: 1218.8230, Validation Loss: 9235.0065
Epoch: 12, Train Loss: 1218.5266, Validation Loss: 9232.4077
Epoch: 13, Train Loss: 1218.2448, Validation Loss: 9229.6402
Epoch: 14, Train Loss: 1217.9239, Validation Loss: 9226.6993
Epoch: 15, Train Loss: 1217.5711, Validation Loss: 9223.5793
Epoch: 16, Train Loss: 1217.1910, Validation Loss: 9220.2758
Epoch: 17, Train Loss: 1216.8011,

In [5]:
def predict(new_graph):
    model.eval()  # Set the model to evaluation mode
    new_graph = new_graph.to(device)  # Move the new graph to the device (GPU or CPU)
    with torch.no_grad():  # Temporarily disable gradient calculation
        output = model(new_graph.x, new_graph.edge_index)  # Forward pass through the model
    return output.detach().cpu().numpy()  # Convert the output tensor to a NumPy array

G = pp.create_network_from_GTFS("../data/gtfs_3")
TN = tn.TransportNetwork(G, pos_argument=['lon', 'lat'], time_arguments=['dep_time', 'arr_time'])

graph=TN.get_higher_complexity()

data = create_data_from_transport_network(graph, TN, node_features=args.node_features, edge_attrs=args.edge_attrs, train_ratio=args.train_ratio, val_ratio=args.val_ratio, num_workers=args.num_workers)

predictions = predict(data)

Network creation: 


100%|██████████| 144634/144634 [00:23<00:00, 6220.27it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (613x2171 and 481x64)