In [1]:
import os
# if os.getcwd().split('/')[-1] != 'scripts':
#     %cd scripts
#     print('Changed directory to scripts')

import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping

#from model import Model
from at_model import Model
#from multi_at_model import Model
from train_model import TrainModel
from data_module import DataModule

# Set seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [2]:
# Load adjacency matrix
adj_path = "./data/los_adj.csv"
adj_df = pd.read_csv(adj_path, header=None)
print("Adjacency Matrix Loaded:", adj_df.shape)

# Load feature matrix
feat_path = "./data/los_speed.csv"
feat_df = pd.read_csv(feat_path, header=None)
print("Feature Matrix Loaded:", feat_df.shape)

Adjacency Matrix Loaded: (207, 207)
Feature Matrix Loaded: (2017, 207)


In [3]:
def train_model_with_pre_len(pre_len, feat_path, adj_path):
    print(f"Training model with pre_len = {pre_len}...")

    # Initialize DataModule
    data_module = DataModule(
        feat_path=feat_path,
        adj_path=adj_path,
        batch_size=32,
        seq_len=12,
        pre_len=pre_len,
        scaling_method="minmax"
    )

    # Model parameters
    input_dim = data_module.feat.shape[1]
    hidden_dim = 24
    output_dim = pre_len
    num_nodes = data_module.adj.shape[0]
    adj = torch.from_numpy(data_module.adj).to('cpu')

    # Initialize model and training wrapper
    model = Model(
        adj=adj,
        seq_len=data_module.seq_len,
        input_dim=input_dim,
        hidden_dim=hidden_dim,
        output_dim=output_dim,
        num_nodes=num_nodes,
        num_stacks=1,
        num_layers=1,
        num_heads=1,
    )
    train_model = TrainModel(
        model=model,
        pre_len=pre_len,
        feat_max_val=data_module.feat_max_val,
        feat_min_val=data_module._feat_min_val,
        learning_rate= 7e-4,
        scaling_method="minmax",
    )

    early_stopping = EarlyStopping(
        monitor="val_loss",
        patience=5,  # Increased from 5
        mode="min",
        verbose=True
    )

    trainer = pl.Trainer(
        max_epochs=500,
        callbacks=[early_stopping, pl.callbacks.ModelCheckpoint(monitor="val_loss")],
        logger=True,
        gradient_clip_val=2,
        accelerator="cpu",
        accumulate_grad_batches=4,  # Larger effective batch size
        min_epochs=15,
    )

    # Train the model
    trainer.fit(train_model, data_module)

    # Log metrics
    final_metrics = trainer.logged_metrics
    print(final_metrics)
    rmse = final_metrics.get("val_rmse", torch.tensor(float("nan"))).item()
    mae = final_metrics.get("val_mae", torch.tensor(float("nan"))).item()

    train_loss = trainer.callback_metrics['train_loss'].item()
    val_loss = trainer.callback_metrics['val_loss'].item()

    print(f"Completed training for pre_len = {pre_len}: RMSE = {rmse:.4f}, MAE = {mae:.4f}")
    print(f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

    return data_module, train_model, model, trainer

In [None]:
def generate_predictions_and_plots(data_module, train_model, model, pre_len):
    print("Generating Predictions...")

    val_loader = data_module.val_dataloader()
    actual_values, predicted_values = [], []

    model.eval()
    with torch.no_grad():
        for i, batch in enumerate(val_loader):
            x, y = batch
            x, y = x.to(train_model.device), y.to(train_model.device)
            y_pred = train_model(x)
            actual_values.append(y.cpu().numpy())
            predicted_values.append(y_pred.cpu().numpy())

    actual_values = np.concatenate(actual_values, axis=0)
    predicted_values = np.concatenate(predicted_values, axis=0)
    predicted_values = predicted_values[:, :actual_values.shape[1], :]

    scaling_method = data_module.scaling_method
    if scaling_method == 'minmax':
        min_val, max_val = data_module._feat_min_val, data_module._feat_max_val
        def inverse_scale(data, min_val, max_val):
            return data * (max_val - min_val) + min_val
        actual_values = inverse_scale(actual_values, min_val, max_val)
        predicted_values = inverse_scale(predicted_values, min_val, max_val)
    elif scaling_method == 'std':
        scaler = data_module.scaler
        actual_values = scaler.inverse_transform(actual_values.reshape(-1, actual_values.shape[-1])).reshape(actual_values.shape)
        predicted_values = scaler.inverse_transform(predicted_values.reshape(-1, predicted_values.shape[-1])).reshape(predicted_values.shape)
    else:
        raise ValueError("Invalid scaling method. Choose 'minmax' or 'std'.")

    if actual_values.shape != predicted_values.shape:
        raise ValueError(f"Shapes mismatch: {actual_values.shape} vs {predicted_values.shape}")

    timestamps = np.arange(actual_values.shape[0])
    node_indices_to_plot = [11, 44, 112, 32]
    save_directory = "./prediction_plots"
    os.makedirs(save_directory, exist_ok=True)

    print("Generating and saving plots...")
    for node_index in node_indices_to_plot:
        plt.figure()
        plt.plot(timestamps, actual_values[:, 0, node_index], label='Actual')
        plt.plot(timestamps, predicted_values[:, 0, node_index], label='Predicted')
        plt.xlabel('Time')
        plt.ylabel('Value')
        plt.title(f'Node {node_index} - Actual vs Predicted')
        plt.legend()
        plt.savefig(os.path.join(save_directory, f'node_{node_index}_comparison.png'))
        plt.close()
    print(f"Plots saved to: {save_directory}")

for pre_len in [3]:
    data_module, train_model, model, trainer = train_model_with_pre_len(pre_len, feat_path, adj_path)
    generate_predictions_and_plots(data_module, train_model, model, pre_len)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/Users/akshat/Developer/ML_WORK/.venv/lib/python3.9/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/Users/akshat/Developer/ML_WORK/.venv/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default

  | Name        | Type              | Params | Mode 
----------------------------------------------------------
0 | model       | Model          

Training model with pre_len = 3...
Feature matrix shape: (2016, 207)
Adjacency matrix shape: (207, 207)
Original feature range: 1.0 70.0
Scaled feature range: 0.0, 1.0
Shape of tensor 0 in train dataset: torch.Size([1597, 12, 207])
Shape of tensor 1 in train dataset: torch.Size([1597, 3, 207])
Shape of tensor 0 in validation dataset: torch.Size([389, 12, 207])
Shape of tensor 1 in validation dataset: torch.Size([389, 3, 207])
Epoch 0: 100%|██████████| 50/50 [00:07<00:00,  6.53it/s, v_num=26, train_loss_step=0.918, val_loss=38.10, val_rmse=44.30, val_mae=35.20, train_loss_epoch=0.910]

Metric val_loss improved. New best score: 38.133


Epoch 2: 100%|██████████| 50/50 [00:06<00:00,  7.66it/s, v_num=26, train_loss_step=0.927, val_loss=36.80, val_rmse=42.60, val_mae=33.90, train_loss_epoch=0.837]

Metric val_loss improved by 1.326 >= min_delta = 0.0. New best score: 36.807


Epoch 4: 100%|██████████| 50/50 [00:06<00:00,  7.70it/s, v_num=26, train_loss_step=0.827, val_loss=34.70, val_rmse=41.20, val_mae=31.30, train_loss_epoch=0.756]

Metric val_loss improved by 2.140 >= min_delta = 0.0. New best score: 34.667


Epoch 5: 100%|██████████| 50/50 [00:06<00:00,  7.57it/s, v_num=26, train_loss_step=0.607, val_loss=28.90, val_rmse=33.70, val_mae=26.30, train_loss_epoch=0.752]

Metric val_loss improved by 5.763 >= min_delta = 0.0. New best score: 28.904


Epoch 7: 100%|██████████| 50/50 [00:06<00:00,  7.25it/s, v_num=26, train_loss_step=0.612, val_loss=24.70, val_rmse=28.30, val_mae=22.80, train_loss_epoch=0.698]

Metric val_loss improved by 4.198 >= min_delta = 0.0. New best score: 24.706


Epoch 10: 100%|██████████| 50/50 [00:06<00:00,  7.90it/s, v_num=26, train_loss_step=0.813, val_loss=17.90, val_rmse=20.90, val_mae=16.60, train_loss_epoch=0.620]

Metric val_loss improved by 6.812 >= min_delta = 0.0. New best score: 17.894


Epoch 12: 100%|██████████| 50/50 [00:06<00:00,  7.27it/s, v_num=26, train_loss_step=0.573, val_loss=17.60, val_rmse=19.40, val_mae=16.50, train_loss_epoch=0.584]

Metric val_loss improved by 0.291 >= min_delta = 0.0. New best score: 17.602


Epoch 13: 100%|██████████| 50/50 [00:06<00:00,  7.52it/s, v_num=26, train_loss_step=0.559, val_loss=16.60, val_rmse=19.70, val_mae=15.50, train_loss_epoch=0.579]

Metric val_loss improved by 0.976 >= min_delta = 0.0. New best score: 16.626


Epoch 17: 100%|██████████| 50/50 [00:06<00:00,  7.38it/s, v_num=26, train_loss_step=0.540, val_loss=16.30, val_rmse=19.00, val_mae=15.10, train_loss_epoch=0.536]

Metric val_loss improved by 0.318 >= min_delta = 0.0. New best score: 16.309


Epoch 22:  52%|█████▏    | 26/50 [00:03<00:03,  7.84it/s, v_num=26, train_loss_step=0.531, val_loss=16.70, val_rmse=18.60, val_mae=15.60, train_loss_epoch=0.548]