In [49]:
import models
from models.model import Informer
import numpy as np


### Define ParamSpace 

In [59]:
###############################################################################
# STAGE 1: DEFINITIONS AND HYPERPARAMETER SPACE
###############################################################################

import optuna

# Hyperparameter Search Space
def get_hyperparameter_space(trial):
    return {
        'd_model': trial.suggest_categorical('d_model', [128, 256, 512]),
        'n_heads': trial.suggest_categorical('n_heads', [2, 4, 8]),
        'e_layers': trial.suggest_int('e_layers', 1, 3),
        'd_layers': trial.suggest_int('d_layers', 1, 2),
        'factor': trial.suggest_categorical('factor', [3, 5, 7]),
        
        'batch_size': trial.suggest_categorical('batch_size', [16, 32, 64]),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-2),
        'dropout': trial.suggest_uniform('dropout', 0.05, 0.5),
        'weight_decay': trial.suggest_loguniform('weight_decay', 1e-6, 1e-3),
        
        'seq_len': trial.suggest_categorical('seq_len', [48, 96, 168]),
        'label_len': trial.suggest_categorical('label_len', [1,24, 48]),
        'pred_len': trial.suggest_categorical('pred_len', [1])
    }


### Define the Fine_tune function 

In [60]:
import torch
import torch.nn as nn
import torch.optim as optim
from models.model import Informer

In [61]:
###############################################################################
# STAGE 2: Fine-Tuning Function 
###############################################################################
def objective(trial, train_data, val_data):
    # Unpack the training and validation data
    (X_train, temporal_train, y_train), (X_val, temporal_val, y_val) = train_data, val_data

    params = get_hyperparameter_space(trial)

    # Model Initialization
    model = Informer(
        enc_in=X_train.shape[2],  
        dec_in=X_train.shape[2],
        c_out=1,
        seq_len=params['seq_len'],
        label_len=params['label_len'],
        out_len=params['pred_len'],
        factor=params['factor'],
        d_model=params['d_model'],
        n_heads=params['n_heads'],
        e_layers=params['e_layers'],
        d_layers=params['d_layers'],
        dropout=params['dropout'],
        distil=True
    )

    # Loss & Optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'], weight_decay=params['weight_decay'])

    # Training Loop
    model.train()
    batch_size = params['batch_size']
    num_epochs = 5  

    for epoch in range(num_epochs):
        permutation = torch.randperm(X_train.size(0))

        for i in range(0, X_train.size(0), batch_size):
            indices = permutation[i:i + batch_size]
            batch_x, batch_y = X_train[indices], y_train[indices]
            batch_x_mark = temporal_train[indices]  # Temporal features

            optimizer.zero_grad()
            output = model(batch_x, batch_x_mark, batch_x, batch_x_mark)  # Forward pass
            loss = criterion(output, batch_y)
            loss.backward()
            optimizer.step()

    # Evaluation on Validation Set
    model.eval()
    with torch.no_grad():
        val_output = model(X_val, temporal_val, X_val, temporal_val)
        val_loss = criterion(val_output, y_val)

    return val_loss.item()


### Data Preprocessing

In [62]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch

In [63]:
###############################################################################
# STAGE 3: Data Pre-Processing 
###############################################################################

# Load the dataset
final_df = pd.read_csv("/Users/arthur/Documents/STUDY/Imperial/rough paths /salvi notebook/Sig_global_local_level_by_level_depth_{max_depths}.csv")

# Split features and target
X = final_df.drop("y", axis=1).values
y = final_df["y"].values

# Extract timestamps (assuming first column contains timestamps)
timestamps = X[:, 0]
features = X[:, 1:]  # Remaining features

# Standardize the features (excluding timestamps)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

# Reshape for Informer input (similar to LSTM)
X_scaled = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], 1))

# Convert timestamps to datetime
timestamps_dt = pd.to_datetime(pd.Series(timestamps), unit='s', errors='coerce')

# Extract temporal features and clip to valid ranges
temporal_features = pd.DataFrame({
    'month': timestamps_dt.dt.month.fillna(1) - 1,          # 0-11 (shift from 1-12)
    'day_of_month': timestamps_dt.dt.day.fillna(1) - 1,     # 0-30 (shift from 1-31)
    'day_of_week': timestamps_dt.dt.dayofweek.fillna(0),    # 0-6 (already fine)
    'hour': timestamps_dt.dt.hour.fillna(0),                # 0-23 (already fine)
    'minute': (timestamps_dt.dt.minute.fillna(0) // 10)     # 0-5 (binning into 6 categories)
})

# Ensure the values are within the correct bounds to prevent IndexErrors
temporal_features['month'] = temporal_features['month'].clip(0, 11)
temporal_features['day_of_month'] = temporal_features['day_of_month'].clip(0, 30)
temporal_features['day_of_week'] = temporal_features['day_of_week'].clip(0, 6)
temporal_features['hour'] = temporal_features['hour'].clip(0, 23)
temporal_features['minute'] = temporal_features['minute'].clip(0, 5)

# Reshape temporal features to match the sequence shape
temporal_features = temporal_features.values.reshape((X_scaled.shape[0], 1, -1))


# Split data into training and validation sets
from sklearn.model_selection import train_test_split

X_train, X_val, temporal_train, temporal_val, y_train, y_val = train_test_split(
    X_scaled, temporal_features, y, test_size=0.2, random_state=42
)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)

# Convert temporal features to 'long' for embedding compatibility
temporal_train = torch.tensor(temporal_train, dtype=torch.long)
temporal_val = torch.tensor(temporal_val, dtype=torch.long)

# Target variables
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(-1)
y_val = torch.tensor(y_val, dtype=torch.float32).unsqueeze(-1)


In [64]:
print("Temporal Features Shape:", temporal_features.shape)
print("X_train Shape:", X_train.shape)
print("Temporal_train Shape:", temporal_train.shape)
print("y_train Shape:", y_train.shape)
print("NaNs in Temporal Features:", np.isnan(temporal_features).sum())

Temporal Features Shape: (100, 1, 5)
X_train Shape: torch.Size([80, 1260, 1])
Temporal_train Shape: torch.Size([80, 1, 5])
y_train Shape: torch.Size([80, 1])
NaNs in Temporal Features: 0


In [65]:
print(timestamps_dt.head())

0   1970-01-01 01:55:49
1   1970-01-01 02:20:47
2   1970-01-01 02:53:14
3   1970-01-01 04:04:52
4   1970-01-01 04:24:29
dtype: datetime64[ns]


In [66]:
# Check compatibility
print("Sample X:", X_train[0].shape)         # Should be (1260, 1)
print("Sample Temporal Features:", temporal_train[0].shape)  # Should be (1, 5)


Sample X: torch.Size([1260, 1])
Sample Temporal Features: torch.Size([1, 5])


## Hyperparameters search using Optuna 

In [58]:
###############################################################################
# STAGE 4: Hyperparameters Search
###############################################################################
import optuna
from optuna.pruners import MedianPruner

# Hyperparameter Optimization
pruner = MedianPruner()
study = optuna.create_study(direction='minimize', pruner=pruner)
study.optimize(
    lambda trial: objective(trial, (X_train, temporal_train, y_train), (X_val, temporal_val, y_val)),
    n_trials=50
)

# Display Best Hyperparameters
print("Best Trial:")
trial = study.best_trial
print(f"Validation Loss: {trial.value}")
for key, value in trial.params.items():
    print(f"{key}: {value}")


[32m[I 2025-02-03 19:38:27,734][0m A new study created in memory with name: no-name-aff11cad-8ea7-4c0e-bbb4-a2eb450ca03a[0m
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
[32m[I 2025-02-03 19:43:49,998][0m Trial 0 finished with value: 0.3632199764251709 and parameters: {'d_model': 256, 'n_heads': 2, 'e_layers': 3, 'd_layers': 1, 'factor': 7, 'batch_size': 16, 'learning_rate': 0.0017840984509659957, 'dropout': 0.39543284486153923, 'weight_decay': 2.0980701910994343e-06, 'seq_len': 48, 'label_len': 1, 'pred_len': 1}. Best is trial 0 with value: 0.3632199764251709.[0m
[32m[I 2025-02-03 19:52:11,747][0m Trial 1 finished with value: 0.29379910230636597 and parameters: {'d_model': 256, 'n_heads': 8, 'e_layers': 1, 'd_layers': 2, 'factor': 3, 'batch_size': 16, 'learning_rate': 0.0007380398380125416, 'dropout': 0.2044713871292106, 'weight_decay': 2.0278925123447975e-06, 'seq_len': 96, 'label_len': 24, 'pred_len'

### train the model 

In [48]:
###############################################################################
# STAGE 5: TRAINING USING BEST HYPERPARAMETERS
###############################################################################
import torch
import torch.nn as nn
import torch.optim as optim
from models.model import Informer

# Training Function
def train_model(best_params, X_train, y_train, X_val, y_val, num_epochs=50):
    # Initialize model with best hyperparameters
    model = Informer(
        enc_in=X_train.shape[2],
        dec_in=X_train.shape[2],
        c_out=1,
        seq_len=best_params['seq_len'],
        label_len=best_params['label_len'],
        out_len=best_params['pred_len'],
        factor=best_params['factor'],
        d_model=best_params['d_model'],
        n_heads=best_params['n_heads'],
        e_layers=best_params['e_layers'],
        d_layers=best_params['d_layers'],
        dropout=best_params['dropout'],
        distil=True
    )

    # Loss & Optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=best_params['learning_rate'], weight_decay=best_params['weight_decay'])

    batch_size = best_params['batch_size']
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        permutation = torch.randperm(X_train.size(0))

        epoch_loss = 0
        for i in range(0, X_train.size(0), batch_size):
            indices = permutation[i:i + batch_size]
            batch_x, batch_y = X_train[indices], y_train[indices]
            batch_x_mark = temporal_train[indices]  # Add temporal data for training

            optimizer.zero_grad()
            output = model(batch_x, batch_x_mark, batch_x, batch_x_mark)  # Pass temporal features
            loss = criterion(output, batch_y)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        # Validation Loss Calculation
        model.eval()
        with torch.no_grad():
            val_output = model(X_val, X_val, None)
            val_loss = criterion(val_output, y_val)

        train_losses.append(epoch_loss / (X_train.size(0) // batch_size))
        val_losses.append(val_loss.item())

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Val Loss: {val_losses[-1]:.4f}")

    return model, train_losses, val_losses


### Evaluate the model

In [None]:
###############################################################################
# STAGE 6: EVALUATE THE MODEL 
###############################################################################
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Evaluation Function
def evaluate_model(model, X_val, y_val):
    model.eval()
    with torch.no_grad():
        predictions = model(X_val, X_val, None)
    
    predictions = predictions.squeeze().cpu().numpy()
    y_true = y_val.squeeze().cpu().numpy()

    # Evaluation Metrics
    mse = mean_squared_error(y_true, predictions)
    mae = mean_absolute_error(y_true, predictions)
    r2 = r2_score(y_true, predictions)

    print(f"Evaluation Metrics:")
    print(f" - MSE: {mse:.4f}")
    print(f" - MAE: {mae:.4f}")
    print(f" - R² Score: {r2:.4f}")

    # Plotting Predictions vs Actual Values
    plt.figure(figsize=(10, 6))
    plt.plot(y_true[:100], label='Actual')
    plt.plot(predictions[:100], label='Predicted')
    plt.xlabel('Samples')
    plt.ylabel('Target Value')
    plt.title('Actual vs Predicted Values')
    plt.legend()
    plt.show()

    return mse, mae, r2


### Main 

In [None]:
###############################################################################
# STAGE 7: MAIN
###############################################################################

# Best hyperparameters from Optuna
best_params = study.best_trial.params

# Train the model
model, train_losses, val_losses = train_model(best_params, X_train, y_train, X_val, y_val, num_epochs=50)

# We now Evaluate the model 
mse, mae, r2 = evaluate_model(model, X_val, y_val)
