In [1]:
# =============================================================
# 1Ô∏è‚É£ IMPORTS
# =============================================================
import numpy as np
import pandas as pd
import pickle
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)


Using device: cuda


In [3]:

# =============================================================
# 2Ô∏è‚É£ FILE PATHS  ‚≠ê (CONFIRMED FROM YOUR FOLDER)
# =============================================================
csv_path = "pems_bay_final_with_extra_features.csv"
adj_path = "adj_mx_PEMS-BAY.pkl"


In [5]:
# =============================================================
# 3Ô∏è‚É£ LOAD CSV
# =============================================================
print("\nLoading CSV...")
df = pd.read_csv(csv_path, index_col="timestamp", parse_dates=True)
print("Full dataset shape:", df.shape)


Loading CSV...
Full dataset shape: (52116, 338)


In [7]:
# =============================================================
# 4Ô∏è‚É£ SELECT COLUMNS
# =============================================================

# Sensors (graph nodes)
sensor_cols = [c for c in df.columns if c.isdigit()]

# Time/Calendar features (extra features)
time_cols = [
    "hour_sin", "hour_cos",
    "dow_sin", "dow_cos",
    "weekend", "holiday"
]

traffic = df[sensor_cols].values
time_feat = df[time_cols].values

print("Traffic shape:", traffic.shape)
print("Time features shape:", time_feat.shape)

Traffic shape: (52116, 325)
Time features shape: (52116, 6)


In [9]:
# =============================================================
# 5Ô∏è‚É£ NORMALIZE TRAFFIC ONLY (IMPORTANT)
# =============================================================
mean = traffic.mean()
std = traffic.std()

traffic = (traffic - mean) / std

In [11]:
# =============================================================
# 6Ô∏è‚É£ LOAD ADJACENCY MATRIX
# =============================================================
print("\nLoading adjacency...")

with open(adj_path, "rb") as f:
    adj_data = pickle.load(f, encoding="latin1")

adj_mx = adj_data[2]  # we confirmed earlier
adj_mx = torch.tensor(adj_mx, dtype=torch.float32).to(device)

print("Adjacency shape:", adj_mx.shape)


Loading adjacency...
Adjacency shape: torch.Size([325, 325])


In [13]:
# =============================================================
# 7Ô∏è‚É£ ADD TIME FEATURES TO EVERY NODE
# =============================================================
N = traffic.shape[1]

# copy same time features to all sensors
time_feat = np.repeat(time_feat[:, None, :], N, axis=1)

traffic = traffic[..., None]

data = np.concatenate([traffic, time_feat], axis=2)

print("Combined data shape:", data.shape)
print("Features per node:", data.shape[2])

Combined data shape: (52116, 325, 7)
Features per node: 7


In [17]:
# IMPORTANT: reduce memory usage
data = data.astype(np.float32)


In [19]:
split = int(len(data) * 0.8)

train_data = data[:split]
test_data  = data[split:]

print("Train data:", train_data.shape)
print("Test data :", test_data.shape)


Train data: (41692, 325, 7)
Test data : (10424, 325, 7)


In [21]:
import torch
from torch.utils.data import Dataset, DataLoader

SEQ_LEN = 12
PRED_LEN = 3


class TrafficDataset(Dataset):

    def __init__(self, data):
        self.data = torch.tensor(data, dtype=torch.float32)

    def __len__(self):
        return len(self.data) - SEQ_LEN - PRED_LEN

    def __getitem__(self, idx):

        # input window
        x = self.data[idx : idx+SEQ_LEN]                # (T,N,F)

        # target (flow only)
        y = self.data[idx+SEQ_LEN : idx+SEQ_LEN+PRED_LEN, :, 0]

        # reshape for GNN ‚Üí (F,N,T)
        x = x.permute(2,1,0)

        return x, y


In [23]:
train_loader = DataLoader(
    TrafficDataset(train_data),
    batch_size=32,
    shuffle=True
)

test_loader = DataLoader(
    TrafficDataset(test_data),
    batch_size=32
)


In [25]:
import torch.nn as nn

class GraphWaveNet(nn.Module):

    def __init__(self, in_channels, nodes, pred_len, adj):
        super().__init__()

        self.A = adj

        self.temporal = nn.Conv2d(in_channels, 32, kernel_size=(1,3), padding=(0,1))
        self.relu = nn.ReLU()
        self.out = nn.Conv2d(32, pred_len, kernel_size=(1,1))

    def forward(self, x):

        x = self.temporal(x)                    # temporal conv
        x = torch.einsum("bfnt,nm->bfmt", x, self.A)  # graph conv
        x = self.relu(x)
        x = self.out(x)

        x = x.mean(dim=-1)

        return x


In [27]:
device = "cuda" if torch.cuda.is_available() else "cpu"

adj_tensor = torch.tensor(adj_mx, dtype=torch.float32).to(device)

model = GraphWaveNet(
    in_channels=data.shape[2],   # 7 features
    nodes=N,
    pred_len=3,
    adj=adj_tensor
).to(device)

print("Model ready ‚úì")


Model ready ‚úì


  adj_tensor = torch.tensor(adj_mx, dtype=torch.float32).to(device)


In [29]:
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

EPOCHS = 10

for epoch in range(EPOCHS):

    model.train()
    total_loss = 0

    for x, y in train_loader:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()

        pred = model(x)
        loss = criterion(pred, y)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS}  Loss: {total_loss/len(train_loader):.4f}")


Epoch 1/10  Loss: 0.3871
Epoch 2/10  Loss: 0.3569
Epoch 3/10  Loss: 0.3519
Epoch 4/10  Loss: 0.3489
Epoch 5/10  Loss: 0.3465
Epoch 6/10  Loss: 0.3447
Epoch 7/10  Loss: 0.3441
Epoch 8/10  Loss: 0.3436
Epoch 9/10  Loss: 0.3432
Epoch 10/10  Loss: 0.3430


In [31]:
import numpy as np

model.eval()

mae, mse, count = 0, 0, 0

with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)

        pred = model(x)

        mae += torch.abs(pred-y).sum().item()
        mse += ((pred-y)**2).sum().item()
        count += y.numel()

mae /= count
rmse = np.sqrt(mse/count)

print("\nFINAL RESULTS")
print("MAE :", mae)
print("RMSE:", rmse)



FINAL RESULTS
MAE : 0.3477514338293369
RMSE: 0.622957692858842


In [33]:
real_mae  = 0.3477514338293369 * std
real_rmse = 0.622957692858842 * std

print("Real MAE :", real_mae)
print("Real RMSE:", real_rmse)


Real MAE : 3.336456300452965
Real RMSE: 5.976887273668469


In [4]:
# =============================================================
# 1Ô∏è‚É£ IMPORTS
# =============================================================
import numpy as np
import pandas as pd
import pickle
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import r2_score

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# =============================================================
# 2Ô∏è‚É£ FILE PATHS
# =============================================================
csv_path = "pems_bay_final_with_extra_features.csv"
adj_path = "adj_mx_PEMS-BAY.pkl"

# =============================================================
# 3Ô∏è‚É£ LOAD DATA
# =============================================================
print("\nLoading CSV...")
df = pd.read_csv(csv_path, index_col="timestamp", parse_dates=True)
print("Dataset shape:", df.shape)

# Sensor columns
sensor_cols = [c for c in df.columns if c.isdigit()]

# Extra time features
time_cols = [
    "hour_sin", "hour_cos",
    "dow_sin", "dow_cos",
    "weekend", "holiday"
]

traffic = df[sensor_cols].values
time_feat = df[time_cols].values

print("Traffic shape:", traffic.shape)
print("Time feature shape:", time_feat.shape)

# =============================================================
# 4Ô∏è‚É£ NORMALIZE TRAFFIC ONLY
# =============================================================
mean = traffic.mean()
std = traffic.std()
traffic = (traffic - mean) / std

# =============================================================
# 5Ô∏è‚É£ LOAD ADJACENCY MATRIX
# =============================================================
print("\nLoading adjacency matrix...")
with open(adj_path, "rb") as f:
    adj_data = pickle.load(f, encoding="latin1")

adj_mx = adj_data[2]
adj_mx = torch.tensor(adj_mx, dtype=torch.float32).to(device)

print("Adjacency shape:", adj_mx.shape)

# =============================================================
# 6Ô∏è‚É£ ADD TIME FEATURES TO EACH NODE
# =============================================================
N = traffic.shape[1]

time_feat = np.repeat(time_feat[:, None, :], N, axis=1)
traffic = traffic[..., None]

data = np.concatenate([traffic, time_feat], axis=2)
data = data.astype(np.float32)

print("Combined data shape:", data.shape)

# =============================================================
# 7Ô∏è‚É£ TRAIN / TEST SPLIT
# =============================================================
split = int(len(data) * 0.8)
train_data = data[:split]
test_data  = data[split:]

print("Train:", train_data.shape)
print("Test :", test_data.shape)

# =============================================================
# 8Ô∏è‚É£ DATASET CLASS
# =============================================================
SEQ_LEN = 12
PRED_LEN = 3

class TrafficDataset(Dataset):

    def __init__(self, data):
        self.data = torch.tensor(data, dtype=torch.float32)

    def __len__(self):
        return len(self.data) - SEQ_LEN - PRED_LEN

    def __getitem__(self, idx):
        x = self.data[idx : idx+SEQ_LEN]                 # (T,N,F)
        y = self.data[idx+SEQ_LEN : idx+SEQ_LEN+PRED_LEN, :, 0]

        x = x.permute(2,1,0)                             # (F,N,T)
        return x, y

train_loader = DataLoader(TrafficDataset(train_data), batch_size=32, shuffle=True)
test_loader  = DataLoader(TrafficDataset(test_data), batch_size=32)

# =============================================================
# 9Ô∏è‚É£ LIGHT STGNN MODEL
# =============================================================
class LightSTGNN(nn.Module):

    def __init__(self, in_channels, pred_len, adj):
        super().__init__()

        self.A = adj

        # Temporal Convolution
        self.temporal = nn.Conv2d(in_channels, 32, kernel_size=(1,3), padding=(0,1))

        # Activation
        self.relu = nn.ReLU()

        # Output Projection
        self.out = nn.Conv2d(32, pred_len, kernel_size=(1,1))

    def forward(self, x):

        # Temporal modeling
        x = self.temporal(x)

        # Graph propagation
        x = torch.einsum("bfnt,nm->bfmt", x, self.A)

        # Non-linearity
        x = self.relu(x)

        # Map to prediction
        x = self.out(x)

        # Aggregate time dimension
        x = x.mean(dim=-1)

        return x

model = LightSTGNN(
    in_channels=data.shape[2],
    pred_len=PRED_LEN,
    adj=adj_mx
).to(device)

print("Light STGNN Model Ready ‚úì")

# =============================================================
# üîü TRAINING
# =============================================================
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

EPOCHS = 10

for epoch in range(EPOCHS):

    model.train()
    total_loss = 0

    for x, y in train_loader:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()
        pred = model(x)
        loss = criterion(pred, y)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS}  Loss: {total_loss/len(train_loader):.4f}")

# Save model
torch.save(model.state_dict(), "light_stgnn_model.pth")

# =============================================================
# 1Ô∏è‚É£1Ô∏è‚É£ EVALUATION
# =============================================================
model.eval()

all_preds = []
all_true  = []

with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        pred = model(x)

        all_preds.append(pred.cpu().numpy())
        all_true.append(y.cpu().numpy())

y_pred = np.concatenate(all_preds, axis=0)
y_true = np.concatenate(all_true, axis=0)

y_pred = y_pred.reshape(-1)
y_true = y_true.reshape(-1)

# Metrics
mae  = np.mean(np.abs(y_pred - y_true))
rmse = np.sqrt(np.mean((y_pred - y_true)**2))

mask = y_true != 0
mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

r2 = r2_score(y_true, y_pred)

print("\nüìä NORMALIZED RESULTS")
print("MAE  :", mae)
print("RMSE :", rmse)
print("MAPE :", mape)
print("R2   :", r2)

# Real-scale results
real_mae  = mae * std
real_rmse = rmse * std

print("\nüìä REAL SCALE RESULTS")
print("Real MAE  :", real_mae)
print("Real RMSE :", real_rmse)


Using device: cuda

Loading CSV...
Dataset shape: (52116, 338)
Traffic shape: (52116, 325)
Time feature shape: (52116, 6)

Loading adjacency matrix...
Adjacency shape: torch.Size([325, 325])
Combined data shape: (52116, 325, 7)
Train: (41692, 325, 7)
Test : (10424, 325, 7)
Light STGNN Model Ready ‚úì
Epoch 1/10  Loss: 0.3876
Epoch 2/10  Loss: 0.3585
Epoch 3/10  Loss: 0.3531
Epoch 4/10  Loss: 0.3509
Epoch 5/10  Loss: 0.3491
Epoch 6/10  Loss: 0.3477
Epoch 7/10  Loss: 0.3467
Epoch 8/10  Loss: 0.3460
Epoch 9/10  Loss: 0.3454
Epoch 10/10  Loss: 0.3448

üìä NORMALIZED RESULTS
MAE  : 0.35102132
RMSE : 0.63656336
MAPE : 213.11461925506592
R2   : 0.6001818776130676

üìä REAL SCALE RESULTS
Real MAE  : 3.367828798198785
Real RMSE : 6.107425099669173
