In [None]:
# ===============================
# Setup
# ===============================
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.neighbors import NearestNeighbors

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


Device: cpu


In [None]:
# ===============================
# Load Dataset
# ===============================
CSV_PATH = "/content/urban_traffic_flow_randomized (1).csv"
df = pd.read_csv(CSV_PATH)
print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())
print(df.head())


Dataset shape: (200, 24)
Columns: ['Timestamp', 'Location', 'Vehicle_Count', 'Vehicle_Speed', 'Congestion_Level', 'Peak_Off_Peak', 'Target_Vehicle_Count', 'Temperature', 'Rainfall_mm', 'Visibility_km', 'Wind_Speed', 'Road_Type', 'Num_Lanes', 'Traffic_Signal', 'Accident_Flag', 'Day_of_Week', 'Hour_of_Day', 'Season', 'Nearby_PT_Stop', 'PT_Frequency', 'Pollution_AQI', 'Noise_Level_dB', 'Vehicle_Density', 'Speed_Variance']
             Timestamp   Location  Vehicle_Count  Vehicle_Speed  \
0  2024-01-01 00:00:00  Sensor_02             63      55.680777   
1  2024-01-01 00:15:00  Sensor_04             50      76.680379   
2  2024-01-01 00:30:00  Sensor_02             42      48.598038   
3  2024-01-01 00:45:00  Sensor_02             55      59.796931   
4  2024-01-01 01:00:00  Sensor_01             46      48.094884   

   Congestion_Level Peak_Off_Peak  Target_Vehicle_Count  Temperature  \
0                 1      Off-Peak                    50           21   
1                 0      Off-P

In [None]:
# ===============================
#Detect node/flow columns
# ===============================
cols = [c.lower() for c in df.columns]

sensor_col = 'Location' # Explicitly set sensor_col to 'Location'
flow_col = None
lat_col, lon_col = None, None

for cand in ['flow','traffic_flow','vehicle_count','count','volume']:
    if cand in cols:
        flow_col = df.columns[cols.index(cand)]
        break
for cand in ['lat','latitude']:
    if cand in cols: lat_col = df.columns[cols.index(cand)]
for cand in ['lon','lng','longitude']:
    if cand in cols: lon_col = df.columns[cols.index(cand)]


print("Detected:", sensor_col, flow_col, lat_col, lon_col)

Detected: Location Vehicle_Count None None


In [None]:
# ===============================
# Build node features
# ===============================
# Take average flow per sensor as node feature
features = df.groupby(sensor_col)[flow_col].mean().values
features = features.reshape(-1, 1).astype(np.float32)  # (num_nodes Ã— feat_dim)

unique_sensors = sorted(df[sensor_col].unique())
N = len(unique_sensors)
print("Num sensors (nodes):", N)

x = torch.tensor(features, dtype=torch.float32).to(device)  # node features

Num sensors (nodes): 5


In [None]:
# ===============================
#Build edges (k-NN on coords if available)
# ===============================
if lat_col and lon_col:
    coords_df = df.groupby(sensor_col)[[lat_col, lon_col]].first().loc[unique_sensors].values
else:
    coords_df = np.arange(N).reshape(-1,1)

from sklearn.neighbors import NearestNeighbors

k = min(5, N-1) if N > 1 else 0
edges = []
if k > 0:
    nbrs = NearestNeighbors(n_neighbors=k+1, algorithm='auto').fit(coords_df)
    distances, indices = nbrs.kneighbors(coords_df)
    for i in range(N):
        for j in indices[i, 1:]:
            edges.append((i, int(j)))
            edges.append((int(j), i))

edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous().to(device)
print("Built edge_index shape:", edge_index.shape)

Built edge_index shape: torch.Size([2, 40])


In [None]:
#------------------
# Define MPNNs
#------------------
import torch.nn as nn
import torch.nn.functional as F

class Graph:
    def __init__(self, edge_index: torch.LongTensor, x: torch.FloatTensor):
        self.edge_index = edge_index
        self.x = x

class MPNNLayer(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.msg = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim)
        )
        self.update = nn.GRUCell(hidden_dim, hidden_dim)

    def forward(self, x, edge_index):
        src, dst = edge_index
        m_in = torch.cat([x[src], x[dst]], dim=-1)
        m_ij = self.msg(m_in)
        agg = torch.zeros_like(x)
        agg.index_add_(0, dst, m_ij)
        return self.update(agg, x)

class MPNN(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, num_layers=2):
        super().__init__()
        self.embed = nn.Linear(in_dim, hidden_dim)
        self.layers = nn.ModuleList([MPNNLayer(hidden_dim) for _ in range(num_layers)])
        self.head = nn.Linear(hidden_dim, out_dim)

    def forward(self, g: Graph):
        x = self.embed(g.x)
        for layer in self.layers:
            x = F.relu(layer(x, g.edge_index))
        return self.head(x)


In [None]:
# Example forward pass
g = Graph(edge_index=edge_index, x=x)   # x is your node features tensor
model = MPNN(in_dim=1, hidden_dim=32, out_dim=1, num_layers=2).to(device)

with torch.no_grad():
    out = model(g)

print("Input features:", x[:5].cpu().numpy().ravel())
print("Output embeddings:", out[:5].cpu().numpy().ravel())


Input features: [49.65     50.65306  48.78788  50.975    50.605263]
Output embeddings: [2.531786  2.547937  2.5184045 2.5536969 2.5471082]
