In [1]:

import argparse
import json
import math
import multiprocessing
import os
import time
from contextlib import contextmanager
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parallel import DistributedDataParallel as DDP
from torch_geometric.data import Data
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import GCNConv, GraphConv, GATConv

In [3]:
# Set random seed for reproducibility
torch.manual_seed(1)
#print available GPUs
print(torch.cuda.device_count())

data_folder_path = ""
nodes = pd.read_csv(os.path.join(data_folder_path, "nodes.csv"))
edges = pd.read_csv(os.path.join(data_folder_path, "edges.csv"))
edge_attributes = pd.read_csv(os.path.join(data_folder_path, "edges_attributes.csv"))
node_split = pd.read_csv(os.path.join(data_folder_path, "splits.csv"))
pos = pd.read_csv(os.path.join(data_folder_path, "pos.csv"))

# edges features were not used.
# pos includes the x and y coordinates of the nodes
edge_attributes["lanes"] = edge_attributes["lanes"].astype(float)
edge_attributes["oneway"] = edge_attributes["oneway"].astype(float)
# fill maxspeed nan values with 0
edge_attributes["maxspeed"] = edge_attributes["maxspeed"].fillna(0).astype(float)

# Min-Max normalize maxspeed
edge_attributes["maxspeed"] = (edge_attributes["maxspeed"] - edge_attributes["maxspeed"].min()) / (edge_attributes["maxspeed"].max() - edge_attributes["maxspeed"].min())
# Min-Max normalize the attributes length and lanes
edge_attributes["length"] = (edge_attributes["length"] - edge_attributes["length"].min()) / (edge_attributes["length"].max() - edge_attributes["length"].min())
edge_attributes["lanes"] = (edge_attributes["lanes"] - edge_attributes["lanes"].min()) / (edge_attributes["lanes"].max() - edge_attributes["lanes"].min())


attributes = nodes.drop(columns=["accident_score"])
target = nodes["accident_score"]

x = torch.tensor(attributes.values, dtype=torch.float)
y = torch.tensor(target.values, dtype=torch.float)
edges = torch.tensor(edges.values, dtype=torch.long).t().contiguous()
edge_attr = torch.tensor(edge_attributes.values, dtype=torch.float)

pos = torch.tensor(pos.values, dtype=torch.float)  # assuming pos["x"] and pos["y"] are the columns in pos DataFrame

dataset = Data(
    x=x, y=y, edge_index=edges, edge_attr=edge_attr, pos=pos,
    test_mask=torch.tensor(node_split["test"].values, dtype=torch.bool),
    val_mask=torch.tensor(node_split["validation"].values, dtype=torch.bool),
    train_mask=torch.tensor(node_split["train"].values, dtype=torch.bool)
)

batch_size = 2024*16  # Adjust based on your GPU memory

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class GNNModel(nn.Module):
    def __init__(self, n_layers, n_units_l0, dropout_l0, n_units_l1, dropout_l1):
        super(GNNModel, self).__init__()
        self.layers = nn.ModuleList()
        in_features = dataset.num_node_features
        for i in range(n_layers):
            if i == 0:
                out_features = n_units_l0
                dropout = dropout_l0
            else:
                out_features = n_units_l1
                dropout = dropout_l1
            self.layers += [GraphConv(in_features, out_features), nn.ReLU(), nn.Dropout(dropout)]
            in_features = out_features
            

        self.layers.append(GraphConv(in_features, 1))

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        for layer in self.layers:
            if isinstance(layer, GraphConv):
                x = layer(x, edge_index)
            else:
                x = layer(x)
        return x.squeeze()
    

# Use the provided parameters
params = {'n_layers': 2,
 'n_units_l0': 110,
 'dropout_l0': 0.26069098480733804,
 'n_units_l1': 53,
 'dropout_l1': 0.26847623784478303,
 'optimizer': 'AdamW',
 'lr': 0.008557674940641,
 'weight_decay': 0.04690895678009,
 'epochs': 53}


model = GNNModel(params['n_layers'], params['n_units_l0'], params['dropout_l0'], params['n_units_l1'], params['dropout_l1'])
model = model.to(device)

optimizer_name = params['optimizer']
lr = params['lr']
weight_decay = params['weight_decay']

optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

train_loader = NeighborLoader(dataset, input_nodes=dataset.train_mask, num_neighbors=[10]*params['n_layers'], batch_size=batch_size, shuffle=True)
val_loader = NeighborLoader(dataset,input_nodes=dataset.val_mask, num_neighbors=[10]*params['n_layers'], batch_size=batch_size, shuffle=False)
test_loader = NeighborLoader(dataset, input_nodes=dataset.test_mask, num_neighbors=[10]*params['n_layers'], batch_size=batch_size, shuffle=False)



for epoch in range(params['epochs']):
    model.train()
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.mse_loss(output, data.y)
        loss.backward()
        optimizer.step()

# Validation
model.eval()
total_loss = 0
with torch.no_grad():
    for data in val_loader:
        data = data.to(device)
        output = model(data)
        loss = F.mse_loss(output, data.y)
        total_loss += loss.item()
print(f"Epoch: {epoch+1}, Validation Loss: {total_loss/len(val_loader)}")

# Testing
model.eval()
total_loss = 0
with torch.no_grad():
    for data in test_loader:
        data = data.to(device)
        output = model(data)
        loss = F.mse_loss(output, data.y)
        total_loss += loss.item()
print(f"Test Loss: {total_loss/len(test_loader)}")


# Evaluate the model on test data
# Evaluate the model on test data
model.eval()
true_values = []
prediction_values = []
positions_x = []
positions_y = []
addt_values = []
with torch.no_grad():
    for data in test_loader:
        data = data.to(device)
        output = model(data)
        true_values.extend(data.y.tolist())
        prediction_values.extend(output.tolist())
        positions_x.extend(data.pos[:, 0].tolist())
        positions_y.extend(data.pos[:, 1].tolist())
        addt_values.extend(data.x[:, 0].tolist())

        
# Create a dataframe with the true values, predicted values, and positions
df = pd.DataFrame({'true_values': true_values, 'predicted_values': prediction_values, 'pos_x': positions_x, 'pos_y': positions_y, 'addt_values': addt_values})



1
Epoch: 53, Validation Loss: 0.0003211244866179186
Test Loss: 0.0006613911486615856
