In [1]:
import wandb
import math
import random
import torch, torchvision
import torch.nn as nn
import torchvision.transforms as T
import pickle
import pandas as pd
import geopandas as gpd

import gnn_io

import torch_geometric
from torch_geometric.data import Data
from torch_geometric.transforms import LineGraph

from shapely.geometry import LineString

# Abstract

This is the current working version.
The steps are the following:

1. Load graph
2. How to model everything?
3. Compute the LineGraph
4. Split into train and testdata
5. Pick loss function
6. Training loop

In [2]:
with open('../results/results_pop_1pct_toy_example.pkl', 'rb') as f:
    results_dict = pickle.load(f)

In [31]:
# Assuming 'results_dict' is already defined
for key, df in results_dict.items():
    if isinstance(df, pd.DataFrame):
        gdf = gpd.GeoDataFrame(df, geometry='geometry')
        gdf.crs = "EPSG:2154"  # Assuming the original CRS is EPSG:2154
        gdf.to_crs("EPSG:4326", inplace=True)
        
        nodes = []
        edges = []
        edge_attrs = []
        node_to_idx = {}

        # Iterate through the rows of the GeoDataFrame
        for idx, row in gdf.iterrows():
            from_node = row['from_node']
            to_node = row['to_node']
            car_volume = row['vol_car']
            capacity = row['capacity']
            
            # Assign unique indices to nodes
            if from_node not in node_to_idx:
                node_to_idx[from_node] = len(nodes)
                nodes.append(from_node)
            if to_node not in node_to_idx:
                node_to_idx[to_node] = len(nodes)
                nodes.append(to_node)
            
            # Append edge index and attributes
            edge = (node_to_idx[from_node], node_to_idx[to_node])
            if edge not in edges:
                edges.append(edge)
                edge_attrs.append([car_volume, capacity])

        # Convert lists to tensors
        edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
        edge_attr = torch.tensor(edge_attrs, dtype=torch.float)
        x = torch.zeros((len(nodes), 1))

        # Create Data object
        data = Data(edge_index=edge_index, edge_attr=edge_attr, x=x)
        
        print("Original graph data: ")
        print(data)
        
        # Transform to line graph
        linegraph_transformation = LineGraph()
        linegraph_data = linegraph_transformation(data)

        print("Line graph data: ")
        print(linegraph_data)

        # Print details to verify the transformation
        print("Linegraph edge index:")
        print(linegraph_data.edge_index)   
        
        print("Linegraph edge attr:")
        print(linegraph_data.edge_attr)  # This should be None as LineGraph doesn't preserve edge_attr directly
        
        print("Number of nodes in original graph:")
        print(data.num_nodes)  # should be len(nodes)
        
        print("Number of edges in original graph:")
        print(data.num_edges)  # should be len(edges)
        
        print("Number of nodes in line graph:")
        print(linegraph_data.num_nodes)  # should be len(edges)

        print("Linegraph x:")
        print(linegraph_data.x)  # x should be edge_attr from original data if set

        break

Original graph data: 
Data(x=[18425, 1], edge_index=[2, 31559], edge_attr=[31559, 2])
Line graph data: 
Data(x=[31559, 2], edge_index=[2, 59851], num_nodes=31559)
Linegraph edge index:
tensor([[    0,     1,     1,  ..., 31557, 31558, 31558],
        [    2, 13470, 13471,  ..., 31555, 31557, 31558]])
Linegraph edge attr:
None
Number of nodes in original graph:
18425
Number of edges in original graph:
31559
Number of nodes in line graph:
31559
Linegraph x:
tensor([[  56.0000,  480.0000],
        [  54.0000,  480.0000],
        [  27.0000,  960.0000],
        ...,
        [   0.0000, 7999.2002],
        [   0.0000, 7999.2002],
        [   0.0000, 7999.2002]])


In [3]:
len(results_dict.keys())

9

In [7]:
# X: Policy vector
# y: Flow targets

class MyDataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        if not torch.is_tensor(X) and not torch.is_tensor(y):
            X = torch.from_numpy(X)
            y = torch.from_numpy(y)
        self.X = X
        self.y = y
            
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, i):
        return self.X[i], self.y[i]
    
dataset = MyDataset(final_policy_tensor, final_car_volume_tensor)

In [9]:
trainloader = torch.utils.data.DataLoader(dataset, batch_size = 10, shuffle = True, num_workers = 1) 

In [11]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

In [12]:
class GnnModel(nn.Module):
    def __init__(self):
        super(GnnModel, self).__init__()
        self.layers = nn.Sequential(
        nn.Linear(3, 64),
        nn.ReLU(),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Linear(32, 1)
        )
        
    def forward(self, x):
        return self.layers(x)
    
model = GnnModel()

In [13]:
# Define the loss function and optimizer
loss_function = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [14]:
import tqdm 
import torch.nn.functional as F

def train(model, train_loader, optimizer, epoch):
    model.train()
    total_loss = 0
    for data, target in tqdm.tqdm(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        
        loss.backward()
        total_loss += loss.item()
        
        optimizer.step()
    
    print("Train Epoch: {}\t Loss: {:.6f}".format(epoch, total_loss / len(train_loader)))

In [15]:
# import tqdm
wandb.login()
for epoch in range(5):
    train(model= model, train_loader=trainloader, optimizer=optimizer, epoch = epoch)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33menatterer[0m ([33mtum-traffic-engineering[0m). Use [1m`wandb login --relogin`[0m to force relogin
  0%|          | 0/28472 [00:00<?, ?it/s]Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/elenanatterer/anaconda3/envs/Paris_Analysis/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/Users/elenanatterer/anaconda3/envs/Paris_Analysis/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'MyDataset' on <module '__main__' (built-in)>
  0%|          | 0/28472 [00:01<?, ?it/s]


RuntimeError: DataLoader worker (pid(s) 5118) exited unexpectedly