In [1]:
import wandb
import math
import random
import torch, torchvision
import torch.nn as nn
import torchvision.transforms as T
import pickle
import pandas as pd
import geopandas as gpd

import gnn_io

import torch_geometric
from torch_geometric.data import Data
from torch.utils.data import DataLoader
from torch_geometric.transforms import LineGraph

from shapely.geometry import LineString
import tqdm 
import torch.nn.functional as F

from torch_geometric.data import Batch

def collate_fn(data_list):
    return Batch.from_data_list(data_list)


# Abstract

This is the current working version.
The steps are the following:

1. Load data
2. Pick a loss function
3. Split into train and test data
4. Training loop

## 1. Load data and create the dataset

In [2]:
with open('../results/results_pop_1pct_toy_example.pkl', 'rb') as f:
    results_dict = pickle.load(f)

In [3]:
class TrafficPoliciesDataset(torch.utils.data.Dataset):
    ''' Prepare the dataset for regression task
    '''
    
    def __init__(self, data_list):
        self.data_list = data_list
            
    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, i):
        return self.data_list[i]

In [4]:
datalist = []
for key, df in results_dict.items():
    if isinstance(df, pd.DataFrame):
        gdf = gpd.GeoDataFrame(df, geometry='geometry')
        gdf.crs = "EPSG:2154"  # Assuming the original CRS is EPSG:2154
        gdf.to_crs("EPSG:4326", inplace=True)
        
        nodes = []
        edges = []
        edge_attrs = []
        node_to_idx = {}
        edge_positions = []

        # Iterate through the rows of the GeoDataFrame
        for idx, row in gdf.iterrows():
            from_node = row['from_node']
            to_node = row['to_node']
            car_volume = row['vol_car']
            capacity = row['capacity']
            
            # Get coordinates from the LINESTRING geometry
            coords = row.geometry.coords
            from_position = coords[0]
            to_position = coords[-1]
            
            # Assign unique indices to nodes
            if from_node not in node_to_idx:
                node_to_idx[from_node] = len(nodes)
                nodes.append(from_node)
            if to_node not in node_to_idx:
                node_to_idx[to_node] = len(nodes)
                nodes.append(to_node)
            
            # Append edge index and attributes
            edge = (node_to_idx[from_node], node_to_idx[to_node])
            if edge not in edges:
                edges.append(edge)
                edge_attrs.append([car_volume, capacity])
                
                # Compute edge position (e.g., midpoint)
                edge_position = ((from_position[0] + to_position[0]) / 2, (from_position[1] + to_position[1]) / 2)
                edge_positions.append(edge_position)

        # Convert lists to tensors
        edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
        edge_attr = torch.tensor(edge_attrs, dtype=torch.float)
        edge_positions_tensor = torch.tensor(edge_positions, dtype=torch.float)
        x = torch.zeros((len(nodes), 1))

        # Create Data object
        data = Data(edge_index=edge_index, edge_attr=edge_attr, x=x)
        
        # Transform to line graph
        linegraph_transformation = LineGraph()
        linegraph_data = linegraph_transformation(data)
        linegraph_data.pos = edge_positions_tensor
        linegraph_data.num_node_features = 2
        linegraph_data.y 
        
        if (linegraph_data.validate(raise_on_error=True)):
            datalist.append(linegraph_data)
        else:
            print("Invalid line graph data")

['24972409', '24972408', '5904976363', '24983651', '664205947', '24972376', '24972375', '324579210', '4964831516', '24972333', '24972382', '4964831514', '24972143', '24972144', '24972325', '24972327', '24972373', '24972410', '24972324', '24972308', '24972326', '24984240', '541421449', '541421454', '24984242', '24958023', '25183615', '94267105', '159935928', '159894354', '159893055', '125496888', '116126870', '267486763', '267486796', '271428246', '267486841', '8116443108', '8116443106', '8116443103', '967224607', '8179861622', '808532783', '94181728', '3224123114', '268234194', '1343567797', '6197561108', '320153255', '303069549', '303069550', '303069519', '27235522', '27235519', '27235518', '27235565', '6435395764', '2705349267', '356301571', '356300952', '27181019', '27181011', '356301570', '27181022', '622024413', '622024412', '6480734151', '94270046', '151351827', '151351977', '94258428', '94269206', '141789927', '94258426', '14798302', '5871094687', '137705346', '96145095', '25804

In [5]:
dataset = TrafficPoliciesDataset(data_list=datalist)

dataloader = DataLoader(dataset, batch_size = 1, shuffle = True, collate_fn=collate_fn)


tensor([[ 2.3386, 48.8518],
        [ 2.3387, 48.8524],
        [ 2.3387, 48.8524],
        ...,
        [ 2.3143, 48.8912],
        [ 2.2712, 48.8380],
        [ 2.2750, 48.8370]])
tensor([[ 2.3386, 48.8518],
        [ 2.3387, 48.8524],
        [ 2.3387, 48.8524],
        ...,
        [ 2.3143, 48.8912],
        [ 2.2712, 48.8380],
        [ 2.2750, 48.8370]])
tensor([[ 2.3386, 48.8518],
        [ 2.3387, 48.8524],
        [ 2.3387, 48.8524],
        ...,
        [ 2.3143, 48.8912],
        [ 2.2712, 48.8380],
        [ 2.2750, 48.8370]])
tensor([[ 2.3386, 48.8518],
        [ 2.3387, 48.8524],
        [ 2.3387, 48.8524],
        ...,
        [ 2.3143, 48.8912],
        [ 2.2712, 48.8380],
        [ 2.2750, 48.8370]])
tensor([[ 2.3386, 48.8518],
        [ 2.3387, 48.8524],
        [ 2.3387, 48.8524],
        ...,
        [ 2.3143, 48.8912],
        [ 2.2712, 48.8380],
        [ 2.2750, 48.8370]])
tensor([[ 2.3386, 48.8518],
        [ 2.3387, 48.8524],
        [ 2.3387, 48.8524],
      

In [19]:
for data in dataloader:
    print(data.x.shape)

torch.Size([31559, 2])
torch.Size([31559, 2])
torch.Size([31559, 2])
torch.Size([31559, 2])
torch.Size([31559, 2])
torch.Size([31559, 2])
torch.Size([31559, 2])
torch.Size([31559, 2])
torch.Size([31559, 2])


## Define the model

In [13]:
class GnnModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch_geometric.nn.GCNConv(2, 16)
        self.conv2 = torch_geometric.nn.GCNConv(16, 1)
        # self.layers = nn.Sequential(
        # nn.Linear(3, 64),
        # nn.ReLU(),
        # nn.Linear(64, 32),
        # nn.ReLU(),
        # nn.Linear(32, 1)
        # )
        
    def forward(self, x):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)
        # return self.layers(x)

## Pick a loss function

In [14]:
# Initalize the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GnnModel().to(device)
# data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(10):
    for data in dataloader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

TypeError: nll_loss_nd(): argument 'target' (position 2) must be Tensor, not NoneType

In [8]:
def train(model, train_loader, optimizer, epoch):
    model.train()
    total_loss = 0
    for data, target in tqdm.tqdm(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        
        loss.backward()
        total_loss += loss.item()
        
        optimizer.step()
    
    print("Train Epoch: {}\t Loss: {:.6f}".format(epoch, total_loss / len(train_loader)))

In [9]:
# # import tqdm
# wandb.login()
for epoch in range(1):
    train(model= model, train_loader=, optimizer=optimizer, epoch = epoch)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33menatterer[0m ([33mtum-traffic-engineering[0m). Use [1m`wandb login --relogin`[0m to force relogin


NameError: name 'trainloader' is not defined