In [1]:
import wandb
import math
import random
import torch, torchvision
import torch.nn as nn
import torchvision.transforms as T
import pickle
import pandas as pd
import geopandas as gpd

import gnn_io

In [2]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33menatterer[0m ([33mtum-traffic-engineering[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
# Load the dictionary
with open('../results/result_dic.pkl', 'rb') as f:
    results_dict = pickle.load(f)

datasets = []
for key, df in results_dict.items():
    print(f"Policy: {key}")
    
    if isinstance(df, pd.DataFrame):
        gdf = gpd.GeoDataFrame(df, geometry='geometry')
        gdf.crs = "EPSG:2154"  # Assuming the original CRS is EPSG:2154
        gdf.to_crs("EPSG:4326", inplace=True)
        edge_index, car_volume_tensor, policy_tensor, nodes = gnn_io.create_edge_index_and_tensors(gdf)
        datasets.append((policy_tensor, car_volume_tensor))
    else:
        print(f"The value for key '{key}' is not a GeoDataFrame.")

Policy: policy introduced in Arrondissement(s) 5, 6
Policy: policy introduced in Arrondissement(s) 1, 2, 3
Policy: policy introduced in Arrondissement(s) 3, 4
Policy: policy introduced in Arrondissement(s) 16, 17, 18
Policy: policy introduced in Arrondissement(s) 13, 14, 15
Policy: policy introduced in Arrondissement(s) 8, 9, 10, 11
Policy: policy introduced in Arrondissement(s) 12, 13
Policy: policy introduced in Arrondissement(s) 2, 3, 4, 5, 6, 7
Policy: policy introduced in Arrondissement(s) 9, 10, 11
Policy: policy introduced in Arrondissement(s) 15, 16, 17, 18, 19
Policy: policy introduced in Arrondissement(s) 7, 8, 9, 10, 11, 12
Policy: policy introduced in Arrondissement(s) 13, 14
Policy: policy introduced in Arrondissement(s) 2, 3, 4, 5, 6
Policy: policy introduced in Arrondissement(s) 5
Policy: policy introduced in Arrondissement(s) 2
Policy: policy introduced in Arrondissement(s) 11, 12, 13, 14
Policy: policy introduced in Arrondissement(s) 12, 13, 14, 15
Policy: policy intro

In [4]:
from two_channel_edge_gnn import TwoChannelEdgeGNN

policy_input_dim = 3  # Dimensionality of policy features: capacity, freespeed flow, modes
traffic_input_dim = 1  # Dimensionality of traffic flow features
hidden_dim = 32  # Dimensionality of hidden representations
num_nodes = max(max(edge_index[0]), max(edge_index[1])) + 1  # Number of nodes in the graph

device = "cuda:0" if torch.cuda.is_available() else "cpu"

def get_dataloader(is_train, batch_size, slice=5):
    "Get a training dataloader"
    # full_dataset = torchvision.datasets.MNIST(root=".", train=is_train, transform=T.ToTensor(), download=True)
    # sub_dataset = torch.utils.data.Subset(full_dataset, indices=range(0, len(full_dataset), slice))
    loader = torch.utils.data.DataLoader(dataset=datasets, 
                                         batch_size=batch_size, 
                                         shuffle=True if is_train else False, 
                                         pin_memory=True, num_workers=2)
    return loader

def get_model(policy_input_d, traffic_input_d, hidden_d):
    model = TwoChannelEdgeGNN(policy_input_d, traffic_input_d, hidden_d)
    return model

def validate_model(model, valid_dl, loss_func, log_images=False, batch_idx=0):
    "Compute performance of the model on the validation dataset and log a wandb.Table"
    model.eval()
    val_loss = 0.
    with torch.inference_mode():
        correct = 0
        for i, (policy_features, flow_targets) in enumerate(valid_dl):
            data, labels = policy_features.float(), flow_targets.float().unsqueeze(1)
            num_edges = edge_index.shape[1]
            adjacency_matrix = gnn_io.create_edge_adjacency_matrix(edge_index, num_edges)

            # Forward pass ➡
            outputs = model(data, labels, adjacency_matrix)
            val_loss += loss_func(outputs, labels)*labels.size(0)

            # Compute accuracy and accumulate
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()

            # Log one batch of images to the dashboard, always same batch_idx.
            if i==batch_idx and log_images:
                log_image_table(data, predicted, labels, outputs.softmax(dim=1))
    return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)

def log_image_table(images, predicted, labels, probs):
    "Log a wandb.Table with (img, pred, target, scores)"
    # 🐝 Create a wandb Table to log images, labels and predictions to
    table = wandb.Table(columns=["image", "pred", "target"]+[f"score_{i}" for i in range(10)])
    for img, pred, targ, prob in zip(images.to("cpu"), predicted.to("cpu"), labels.to("cpu"), probs.to("cpu")):
        table.add_data(wandb.Image(img[0].numpy()*255), pred, targ, *prob.numpy())
    wandb.log({"predictions_table":table}, commit=False)

In [5]:
policy_input_dim = 3  # Dimensionality of policy features: capacity, freespeed flow, modes
traffic_input_dim = 1  # Dimensionality of traffic flow features
hidden_dim = 32  # Dimensionality of hidden representations
num_nodes = max(max(edge_index[0]), max(edge_index[1])) + 1  # Number of nodes in the graph


def normalize(tensor):
    mean = tensor.mean(dim=0, keepdim=True)
    std = tensor.std(dim=0, keepdim=True) + 1e-6  # Add a small epsilon to avoid division by zero
    return (tensor - mean) / std

# Launch 5 experiments, trying different dropout rates
for _ in range(5):
    # 🐝 initialise a wandb run
    wandb.init(
        project="my_project",
        config={
            "epochs": 10,
            "batch_size": 20,
            "lr": 1e-3,
            "dropout": random.uniform(0.01, 0.80),
            })
    
    # Copy your config 
    config = wandb.config

    # Get the data
    train_dl = get_dataloader(is_train=True, batch_size=config.batch_size)
    valid_dl = get_dataloader(is_train=False, batch_size=2*config.batch_size)
    n_steps_per_epoch = math.ceil(len(train_dl.dataset) / config.batch_size)
    
    # A simple GNN model
    # model = get_model(policy_input_dim, traffic_input_dim, hidden_dim)
    model = TwoChannelEdgeGNN(policy_input_dim, traffic_input_dim, hidden_dim)

    # Make the loss and optimizer
    loss_func = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)

   # Training
    example_ct = 0
    step_ct = 0
    for epoch in range(config.epochs):
        model.train()
        for step, (policy_features, flow_targets) in enumerate(train_dl):
            data, labels = normalize(policy_features.float()), normalize(flow_targets.float().unsqueeze(2))
            print(data.shape)
            print(labels.shape)
            
            num_edges = edge_index.shape[1]
            
            adjacency_matrix = gnn_io.create_edge_adjacency_matrix(edge_index, num_edges)
            
            outputs = model(data, labels, adjacency_matrix)

            train_loss = loss_func(outputs, labels)
            
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
            
            example_ct += len(data)
            metrics = {"train/train_loss": train_loss, 
                       "train/epoch": (step + 1 + (n_steps_per_epoch * epoch)) / n_steps_per_epoch, 
                       "train/example_ct": example_ct}
            
            if step + 1 < n_steps_per_epoch:
                # 🐝 Log train metrics to wandb 
                wandb.log(metrics)
                
            step_ct += 1

        val_loss, accuracy = validate_model(model, valid_dl, loss_func, log_images=(epoch==(config.epochs-1)))

        # 🐝 Log train and validation metrics to wandb
        val_metrics = {"val/val_loss": val_loss, 
                       "val/val_accuracy": accuracy}
        wandb.log({**metrics, **val_metrics})
        
        print(f"Train Loss: {train_loss:.3f}, Valid Loss: {val_loss:3f}, Accuracy: {accuracy:.2f}")

    # If you had a test set, this is how you could log it as a Summary metric
    wandb.summary['test_accuracy'] = 0.8

    # 🐝 Close your wandb run 
    wandb.finish()

torch.Size([20, 31216, 3])
torch.Size([20, 31216, 1])
policy_hidden shape: torch.Size([20, 31216, 32])
traffic_hidden shape: torch.Size([20, 31216, 32])


  adj_matrix = torch.sparse.FloatTensor(indices, values, torch.Size([num_edges, num_edges]))


torch.Size([20, 31216, 3])
torch.Size([20, 31216, 1])
policy_hidden shape: torch.Size([20, 31216, 32])
traffic_hidden shape: torch.Size([20, 31216, 32])
torch.Size([20, 31216, 3])
torch.Size([20, 31216, 1])
policy_hidden shape: torch.Size([20, 31216, 32])
traffic_hidden shape: torch.Size([20, 31216, 32])
torch.Size([20, 31216, 3])
torch.Size([20, 31216, 1])
policy_hidden shape: torch.Size([20, 31216, 32])
traffic_hidden shape: torch.Size([20, 31216, 32])
torch.Size([20, 31216, 3])
torch.Size([20, 31216, 1])
policy_hidden shape: torch.Size([20, 31216, 32])
traffic_hidden shape: torch.Size([20, 31216, 32])
torch.Size([5, 31216, 3])
torch.Size([5, 31216, 1])
policy_hidden shape: torch.Size([5, 31216, 32])
traffic_hidden shape: torch.Size([5, 31216, 32])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (40x31216 and 1x32)