<a href="https://colab.research.google.com/github/basugautam/Reproducibility-Challenge-Project/blob/Architecture-Files/Complete_project_codes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 📌 Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 📌 Step 2: Import necessary libraries
import pandas as pd

# 📌 Step 3: Define original and new paths
original_path = '/content/drive/My Drive/timeseries_data.csv.csv'  # Update this if needed
cleaned_path = '/content/drive/My Drive/timeseries_data_cleaned.csv'  # This will be our new clean CSV

# 📌 Step 4: Load the raw file without assuming header
df_raw = pd.read_csv(original_path, header=None)
print("🔍 Preview raw file structure:")
print(df_raw.head(10))  # View first 10 rows to understand structure

# 📌 Step 5: Extract real header row and data
# We assume row 0 is column names and actual data starts from row 1
df_cleaned = pd.read_csv(original_path, skiprows=1)

# 📌 Step 6: Rename first column as 'Date' if it's a timestamp
df_cleaned.rename(columns={df_cleaned.columns[0]: 'Date'}, inplace=True)

# 📌 Step 7: Convert 'Date' column to datetime format
df_cleaned['Date'] = pd.to_datetime(df_cleaned['Date'], errors='coerce')

# 📌 Step 8: Drop rows where Date is NaT (invalid timestamp)
df_cleaned = df_cleaned.dropna(subset=['Date'])

# 📌 Step 9: Save the cleaned data to a new file
df_cleaned.to_csv(cleaned_path, index=False)
print(f"✅ Cleaned CSV saved here:\n{cleaned_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
🔍 Preview raw file structure:
         0     1     2                 3            4   \
0       NaN   NaN   NaN               NaN          NaN   
1      Name  GENC  Year  Total Population  Growth Rate   
2   -> 2024   NaN   NaN                --           --   
3    Canada    CA  2024        38,904,514         0.72   
4   -> 2025   NaN   NaN                --           --   
5    Canada    CA  2025        39,187,155         0.73   
6   -> 2026   NaN   NaN                --           --   
7    Canada    CA  2026        39,465,520         0.69   
8   -> 2027   NaN   NaN                --           --   
9    Canada    CA  2027        39,730,162         0.65   

                               5                     6   \
0                             NaN                   NaN   
1  Population Density (per sq km)  Total Fertility Rate   
2                        

  df_cleaned['Date'] = pd.to_datetime(df_cleaned['Date'], errors='coerce')


In [2]:
# 📌 Step 1: Load cleaned CSV from Google Drive
import pandas as pd
import numpy as np
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load CSV
file_path = '/content/drive/My Drive/timeseries_data_cleaned.csv'
df = pd.read_csv(file_path)

# Step 2: Inspect and rename columns if needed
print("✅ Column Names:", df.columns.tolist())

# Try to find the correct datetime column
for col in df.columns:
    if df[col].dtype == 'object' and df[col].str.contains(r'\d{4}', na=False).any():
        df.rename(columns={col: 'Date'}, inplace=True)
        break

# Step 3: Convert 'Date' column to datetime and set index
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df = df.set_index('Date')
df = df.sort_index()
df = df.dropna()

# Step 4: Resample yearly (if needed)
df_resampled = df.resample('YE').mean().interpolate()

# Step 5: Create sliding windows
input_window = 5       # past 5 years
forecast_horizon = 3   # next 3 years

X, Y = [], []
values = df_resampled.values

for i in range(len(values) - input_window - forecast_horizon):
    x_seq = values[i : i + input_window]
    y_seq = values[i + input_window : i + input_window + forecast_horizon]
    X.append(x_seq)
    Y.append(y_seq)

X = np.array(X)
Y = np.array(Y)

# Step 6: Save windows to Google Drive (optional)
np.save('/content/drive/My Drive/X_windows.npy', X)
np.save('/content/drive/My Drive/Y_windows.npy', Y)

# ✅ Output shapes
print("✅ Data ready for modeling!")
print(f"X shape (samples, {input_window}, features):", X.shape)
print(f"Y shape (samples, {forecast_horizon}, features):", Y.shape)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Column Names: ['Date', 'GENC', 'Year', 'Total Population', 'Growth Rate', 'Population Density (per sq km)', 'Total Fertility Rate', 'Life Expectancy at Birth', 'Under-5 Mortality Rate', 'Sex Ratio of the Population', 'Youth and Old Age (0-14 and 65+)', 'Youth (0-14)', 'Old Age (65+)', 'Both Sexes', 'Male', 'Female']
✅ Data ready for modeling!
X shape (samples, 5, features): (0,)
Y shape (samples, 3, features): (0,)


In [3]:
# Define custom forecast horizons (in years)
forecast_horizons = [1, 5, 10, 20]
input_window = 5

X_multi, Y_multi = [], []

for i in range(len(df_resampled) - max(forecast_horizons) - input_window):
    x_seq = df_resampled.iloc[i : i + input_window].values
    y_seq = []
    for h in forecast_horizons:
        y = df_resampled.iloc[i + input_window + h - 1].values  # get the year at t+h
        y_seq.append(y)
    X_multi.append(x_seq)
    Y_multi.append(y_seq)

X_multi = np.array(X_multi)
Y_multi = np.array(Y_multi)

# Save these for modeling
np.save('/content/drive/My Drive/X_multi.npy', X_multi)
np.save('/content/drive/My Drive/Y_multi.npy', Y_multi)

print("✅ Multi-horizon windows ready!")
print(f"X_multi shape: {X_multi.shape} → (samples, {input_window}, features)")
print(f"Y_multi shape: {Y_multi.shape} → (samples, {len(forecast_horizons)}, features)")


✅ Multi-horizon windows ready!
X_multi shape: (0,) → (samples, 5, features)
Y_multi shape: (0,) → (samples, 4, features)


In [4]:
# 🟦 Step 1: Import necessary PyTorch libraries
# 🔵 (a#) We are importing these to build neural networks and define loss functions
# 🔵 (b#) PyTorch's torch.nn module provides tools to build models and custom loss functions
# 🔵 (c#) `nn` is a shorthand alias for `torch.nn` to keep code cleaner
# 🔵 (d#) With these, we can define and use custom regularized loss for multi-horizon consistency

import torch
import torch.nn as nn

# 🟨 Step 2: Define a custom Weighted MSE Loss function
# 🔵 (a#) We want different penalties for errors at different forecast horizons
# 🔵 (b#) Weighted loss lets us emphasize closer or farther predictions
# 🔵 (c#) This subclass overrides `forward()` to compute weighted MSE
# 🔵 (d#) This allows training the model with greater control over forecasting behavior

class WeightedMSELoss(nn.Module):
    def __init__(self, weights):
        super(WeightedMSELoss, self).__init__()
        self.weights = torch.tensor(weights, dtype=torch.float32).view(1, -1)  # Reshape to broadcast

    def forward(self, predictions, targets):
        # Ensure input shapes are compatible
        assert predictions.shape == targets.shape, "Prediction and target must match shape"

        # Compute squared errors
        squared_error = (predictions - targets) ** 2

        # Apply weights
        weighted_error = squared_error * self.weights

        # Return mean of weighted error
        return weighted_error.mean()


In [5]:
# 🟦 Step 1: Import necessary libraries
import pandas as pd
import numpy as np

# 🟥 Step 2: Upload file manually if not using Google Drive
from google.colab import files
uploaded = files.upload()

# 🟩 Step 3: Load the uploaded file
# 🔵 (a#) We are loading the dataset to extract the time series for modeling
# 🔵 (b#) This will allow us to apply forecasting techniques using the data
# 🔵 (c#) `pd.read_csv()` loads CSV data into a DataFrame, which is a table-like data structure
# 🔵 (d#) This gives us access to the clean, structured input to process for deep learning
df = pd.read_csv(list(uploaded.keys())[0])
df.head()


Saving timeseries_data.csv.csv to timeseries_data.csv (1).csv


Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Dependency Ratio,Unnamed: 11,Unnamed: 12,Median Age,Unnamed: 14,Unnamed: 15
0,Name,GENC,Year,Total Population,Growth Rate,Population Density (per sq km),Total Fertility Rate,Life Expectancy at Birth,Under-5 Mortality Rate,Sex Ratio of the Population,Youth and Old Age (0-14 and 65+),Youth (0-14),Old Age (65+),Both Sexes,Male,Female
1,-> 2024,,,--,--,--,--,--,--,--,--,--,--,--,--,--
2,Canada,CA,2024,38904514,0.72,4.3,1.44,83.9,4.8,0.99,56.8,23.9,32.9,42.5,43.9,41.2
3,-> 2025,,,--,--,--,--,--,--,--,--,--,--,--,--,--
4,Canada,CA,2025,39187155,0.73,4.3,1.43,84.8,4.4,0.99,57.7,23.8,33.9,42.8,44.1,41.4


In [6]:
# 🟦 Print all available columns so we know what to use
print("Available columns in dataset:", df.columns)

# 🔵 Please replace 'your_column_name' below with the actual column name from the output above
# 🟧 For example, if the column is 'population' or 'value', use that instead of 'your_column_name'
data = df['your_column_name'].values.astype(np.float32)  # 🔁 Replace this


Available columns in dataset: Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4',
       'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9',
       'Dependency Ratio', 'Unnamed: 11', 'Unnamed: 12', 'Median Age',
       'Unnamed: 14', 'Unnamed: 15'],
      dtype='object')


KeyError: 'your_column_name'

In [None]:
# 🟨 Step 4: Create rolling windows of inputs and corresponding multi-step outputs
# 🔵 (a#) We need to create input-output pairs for training the model
# 🔵 (b#) Using sliding windows, we generate training samples from historical data
# 🔵 (c#) Each input is a sequence of time steps (e.g., 20), and each output is a future value at multiple horizons
# 🔵 (d#) This allows the model to learn from past data and make multi-horizon predictions

input_window = 20
forecast_steps = [1, 5, 10, 20]  # Forecast horizons: t+1, t+5, t+10, t+20

X_multi = []
Y_multi = []

for i in range(len(data) - input_window - max(forecast_steps)):
    x = data[i : i + input_window]
    y = [data[i + input_window + h - 1] for h in forecast_steps]
    X_multi.append(x)
    Y_multi.append(y)

X_multi = np.array(X_multi).reshape(-1, input_window, 1)
Y_multi = np.array(Y_multi)

# 🟪 Step 5: Print and verify the shape of processed data
# 🔵 (a#) This confirms the model input and output dimensions are correctly prepared
# 🔵 (b#) Avoids runtime errors when passed into the model
# 🔵 (c#) `.shape` shows the dimensions: (samples, time steps, features)
# 🔵 (d#) Now the model can process sequences and predict multi-horizon values

print(f"✅ Shape of X_multi: {X_multi.shape}")  # Expected: (samples, 20, 1)
print(f"✅ Shape of Y_multi: {Y_multi.shape}")  # Expected: (samples, 4)

# ✅ Safe to access shape[1] now
forecast_horizons = Y_multi.shape[1]


In [None]:
import numpy as np

# Assume df_resampled is your cleaned and resampled dataframe
# Define the number of time steps for the input sequence and the number of steps for the forecast horizon
sequence_length = 10  # Number of time steps for each input sequence
forecast_horizons = 5  # Forecast for the next 5 time steps

# Initialize X_multi (input) and Y_multi (output/forecast) arrays
X_multi = []
Y_multi = []

# Loop through the dataset and create windows
for i in range(len(df_resampled) - sequence_length - forecast_horizons + 1):
    # Input sequence (X) - previous 'sequence_length' time steps
    X = df_resampled.iloc[i:i+sequence_length].values

    # Output sequence (Y) - forecast for the next 'forecast_horizons' time steps
    Y = df_resampled.iloc[i+sequence_length:i+sequence_length+forecast_horizons].values

    X_multi.append(X)
    Y_multi.append(Y)

# Convert to numpy arrays
X_multi = np.array(X_multi)
Y_multi = np.array(Y_multi)

# Check the shapes to confirm data preparation
print(f"Shape of X_multi: {X_multi.shape}")
print(f"Shape of Y_multi: {Y_multi.shape}")


In [None]:
# Prepare your data (convert to PyTorch tensors)
X_train = torch.tensor(X_multi, dtype=torch.float32)
Y_train = torch.tensor(Y_multi, dtype=torch.float32)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100

for epoch in range(num_epochs):
    model.train()

    optimizer.zero_grad()

    # Forward pass
    y_pred = model(X_train)  # Predicted horizons: (batch_size, forecast_horizons)

    # Compute loss
    loss = loss_fn(y_pred, Y_train)

    # Backward pass
    loss.backward()

    # Update weights
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")


In [None]:
model.eval()
with torch.no_grad():
    # Example: Forecast the next 20 years based on the latest input window
    x_input = torch.tensor(X_multi[-1:], dtype=torch.float32)  # Last input
    forecast = model(x_input)

    # Print forecasted values for t+1, t+5, t+10, t+20
    print("Forecasted values:", forecast)


In [None]:
# Save the trained model
torch.save(model.state_dict(), '/content/drive/My Drive/time_series_transformer.pth')

# Load the model for future predictions
model = TimeSeriesTransformer(input_size, hidden_size, num_heads, num_layers, forecast_horizons)
model.load_state_dict(torch.load('/content/drive/My Drive/time_series_transformer.pth'))
model.eval()


In [None]:
# Importing necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd

# Load the previously trained model from Google Drive
model = TimeSeriesTransformer(input_size, hidden_size, num_heads, num_layers, forecast_horizons)
model.load_state_dict(torch.load('/content/drive/My Drive/time_series_transformer.pth'))
model.eval()  # Switch model to evaluation mode

# Assume df_resampled is your preprocessed data
# If df_resampled is a Pandas DataFrame, ensure it's in the appropriate format
# Example: df_resampled has shape (num_samples, num_features)


In [None]:
# a) In this step, we load the trained model to continue training or evaluate it on new data.
# b) This code uses the 'load_state_dict' function to restore the model's parameters from a saved file.
# c) The 'eval' function sets the model to evaluation mode, which ensures that operations like dropout are disabled.
# d) The purpose of this code is to load the pre-trained model and prepare it for inference or further training.


In [None]:
# Define the Temporal Discounting Loss function
class TemporalDiscountingLoss(nn.Module):
    def __init__(self, weights):
        super(TemporalDiscountingLoss, self).__init__()
        self.weights = torch.tensor(weights, dtype=torch.float32)  # Decaying weights for different forecast horizons

    def forward(self, predictions, targets):
        # predictions: (batch_size, forecast_horizons)
        # targets: (batch_size, forecast_horizons)

        # Compute the individual losses for each forecast horizon (e.g., t+1, t+5, t+10, t+20)
        losses = (predictions - targets) ** 2  # Mean squared error for each horizon

        # Apply the weights to each loss
        weighted_losses = losses * self.weights

        # Return the sum of weighted losses
        return weighted_losses.sum()


In [None]:
# a) We are using Temporal Discounting Loss to ensure the model prioritizes long-term forecasting accuracy.
#    By weighting the losses differently for different forecast horizons, we can emphasize the importance of predicting long-term values.
# b) The TemporalDiscountingLoss class calculates the squared error loss for each forecast horizon and then applies the weight vector to each.
#    This ensures that we penalize the model more for errors in long-term predictions (e.g., t+10, t+20).
# c) 'weights' is the vector that defines the temporal discounting at each forecast horizon, and 'losses' are the squared differences between predictions and targets.
# d) This operation will return the sum of weighted losses, which is used to guide the model during training towards better long-term forecasting.


In [None]:
# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Define the loss function with the weights for temporal discounting
loss_weights = [0.1, 0.2, 0.4, 0.8]  # Example weights for t+1, t+5, t+10, t+20
criterion = TemporalDiscountingLoss(loss_weights)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    # Assuming X_train and Y_train are your training data (already windowed)
    # Convert them to PyTorch tensors
    X_train_tensor = torch.tensor(X_multi, dtype=torch.float32)
    Y_train_tensor = torch.tensor(Y_multi, dtype=torch.float32)

    # Forward pass
    predictions = model(X_train_tensor)

    # Compute the loss
    loss = criterion(predictions, Y_train_tensor)

    # Backward pass
    loss.backward()
    optimizer.step()

    # Print the loss for each epoch
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")


In [None]:
# a) In this code, we are training the model with the Temporal Discounting Loss to focus on long-term forecasting.
#    By using weighted losses, we enforce that the model learns better long-term predictions (e.g., t+10, t+20).
# b) The optimizer (Adam) is used to adjust the model parameters based on the gradients of the loss function.
#    We use 'loss.backward()' to compute these gradients and 'optimizer.step()' to update the parameters.
# c) 'X_train_tensor' and 'Y_train_tensor' are the training data (input and target). The 'predictions' are the model's outputs.
# d) The result of this operation is a model that learns with temporal discounting loss, improving long-term forecasting accuracy.


In [None]:
# Evaluate the model on a test set (using the same windowing approach for X_test and Y_test)
model.eval()  # Switch to evaluation mode

# Assuming X_test and Y_test are your test data (already windowed)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)

# Get the predictions
with torch.no_grad():  # Disable gradient computation for evaluation
    predictions = model(X_test_tensor)

# Compute the loss on the test set
test_loss = criterion(predictions, Y_test_tensor)
print(f"Test Loss: {test_loss.item()}")


In [None]:
# a) After training, we need to evaluate the model to see how well it generalizes to unseen data (test set).
#    The model is switched to evaluation mode using 'model.eval()', which disables certain layers like dropout.
# b) The 'torch.no_grad()' context ensures that gradients are not computed during evaluation, saving memory and computation.
#    We then compute the loss for the test set using the Temporal Discounting Loss function.
# c) 'X_test_tensor' and 'Y_test_tensor' are the input and target data for the test set. 'predictions' are the model's outputs for this set.
# d) The result is a 'test_loss' that gives an indication of how well the model performs on unseen data using the weighted loss strategy.


In [None]:
import matplotlib.pyplot as plt

# Convert predictions and true values to numpy arrays for plotting
predictions_np = predictions.numpy()
Y_test_np = Y_test_tensor.numpy()

# Plot the results for a few forecast horizons (e.g., t+1, t+5, t+10)
horizons = ['t+1', 't+5', 't+10', 't+20']

# Plot predictions vs actual for each horizon
plt.figure(figsize=(12, 6))
for i, horizon in enumerate(horizons):
    plt.subplot(2, 2, i+1)
    plt.plot(Y_test_np[:, i], label='True Values')
    plt.plot(predictions_np[:, i], label='Predictions')
    plt.title(f"Forecast Horizon: {horizon}")
    plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# a) We use visualization to compare the true values and predictions from the model, helping us understand its performance at different forecast horizons.
# b) The plot allows us to visually assess how well the model is performing at various forecast horizons (e.g., t+1, t+5, t+10, t+20).
# c) 'plt.plot()' creates a line plot for true vs. predicted values. 'horizons' is a list of forecast time steps (e.g., t+1, t+5, t+10, t+20).
# d) The result is a set of plots showing the predictions compared to the actual values for each forecast horizon.


In [None]:
# Save the trained model
torch.save(model.state_dict(), '/content/drive/My Drive/time_series_transformer_trained.pth')

# To load the model later for prediction
model.load_state_dict(torch.load('/content/drive/My Drive/time_series_transformer_trained.pth'))
model.eval()


In [None]:
# a) After training and evaluating the model, we save the model to Google Drive for future use.
#    Saving the model allows us to avoid retraining it each time we want to use it for predictions.
# b) 'torch.save()' is used to store the model's parameters, while 'load_state_dict()' is used to restore these parameters later.
# c) 'state_dict' refers to the dictionary containing the model's parameters (weights). The model is restored by loading this state.
# d) This ensures that the model is available for future use, either for making predictions or fine-tuning it further.


In [None]:
import torch

# Define a function to compute the penalty for violating monotonicity constraint
def monotonicity_penalty(predictions):
    """
    This function computes the penalty for violating the monotonicity constraint.
    The monotonicity constraint enforces that the uncertainty (or variance) increases with time.
    """
    # Calculate the difference between consecutive predictions (for each forecast horizon)
    # The assumption here is that the forecasted uncertainty should increase with time
    diff = torch.diff(predictions, dim=1)

    # Penalize negative differences, as we want the variance to increase (positive diff)
    penalty = torch.sum(torch.relu(-diff))  # Only penalize if the difference is negative

    return penalty

# Example: assuming predictions from the model
# predictions: (batch_size, forecast_horizons)
predictions = torch.tensor([[0.1, 0.15, 0.2, 0.25],  # Sample prediction for one batch
                            [0.2, 0.3, 0.4, 0.5]])  # Sample prediction for another batch

# Compute the monotonicity penalty for the predictions
penalty = monotonicity_penalty(predictions)
print(f"Monotonicity penalty: {penalty.item()}")


In [None]:
# a) We are using the monotonicity penalty to enforce that the uncertainty (or variance) increases over time. This helps in making long-term forecasts more reliable and consistent.
# b) The function 'monotonicity_penalty' calculates the difference between consecutive time steps (horizons) in the predictions.
#    It then penalizes negative differences using the 'torch.relu' function, which sets any negative differences to zero, ensuring that the penalty is applied when the model's uncertainty decreases over time.
# c) 'torch.diff(predictions, dim=1)' computes the difference between consecutive forecast horizons. The 'torch.relu' function is used to apply the penalty only when the difference is negative (i.e., the uncertainty decreases).
# d) The result of this operation is a penalty value that will be added to the model's total loss, encouraging the model to increase uncertainty over time.


In [None]:
# Modify the loss function to include the monotonicity penalty
class TemporalDiscountingLossWithMonotonicity(nn.Module):
    def __init__(self, weights, monotonicity_weight=0.1):
        super(TemporalDiscountingLossWithMonotonicity, self).__init__()
        self.weights = torch.tensor(weights, dtype=torch.float32)
        self.monotonicity_weight = monotonicity_weight  # Weight of the monotonicity penalty in the total loss

    def forward(self, predictions, targets):
        # Compute the standard temporal discounting loss
        losses = (predictions - targets) ** 2  # Mean squared error for each horizon
        weighted_losses = losses * self.weights

        # Compute the monotonicity penalty
        monotonicity_penalty_value = monotonicity_penalty(predictions)

        # Return the total loss, which is the sum of weighted losses and the monotonicity penalty
        total_loss = weighted_losses.sum() + self.monotonicity_weight * monotonicity_penalty_value
        return total_loss

# Example usage in training loop
# Assuming X_train and Y_train are our training data (already windowed)
X_train_tensor = torch.tensor(X_multi, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_multi, dtype=torch.float32)

# Define the loss function with monotonicity penalty
loss_weights = [0.1, 0.2, 0.4, 0.8]  # Example weights for t+1, t+5, t+10, t+20
criterion = TemporalDiscountingLossWithMonotonicity(loss_weights)

# Example model training loop with monotonicity constraints
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(50):
    model.train()
    optimizer.zero_grad()

    # Forward pass
    predictions = model(X_train_tensor)

    # Compute the loss with the monotonicity penalty
    loss = criterion(predictions, Y_train_tensor)

    # Backward pass
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item()}")


In [None]:
# a) By including the monotonicity penalty in the loss function, we are directly enforcing the rule that uncertainty should increase over time.
# b) The modified loss function, 'TemporalDiscountingLossWithMonotonicity', combines the traditional temporal discounting loss with the monotonicity penalty.
#    This ensures that the model is penalized not only for making incorrect predictions but also for violating the monotonicity constraint.
# c) The 'monotonicity_weight' controls how much importance we give to the monotonicity penalty compared to the regular forecasting loss. A higher value would place more emphasis on monotonicity.
# d) This operation results in a total loss that guides the model to follow both the temporal discounting and the monotonicity rules, improving the long-term forecasting behavior.


In [None]:
# Evaluate the model on a test set
model.eval()

# Assuming X_test and Y_test are your test data (already windowed)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)

# Get the predictions from the model
with torch.no_grad():
    predictions = model(X_test_tensor)

# Compute the test loss including the monotonicity penalty
test_loss = criterion(predictions, Y_test_tensor)
print(f"Test Loss (with Monotonicity Penalty): {test_loss.item()}")


In [None]:
# a) In this step, we evaluate the model on a test set to understand its generalization ability with the monotonicity constraints applied.
# b) By calling 'model.eval()', we ensure the model is in evaluation mode, which disables dropout and other training-specific behaviors.
#    We then compute the loss for the test set by using the same temporal discounting and monotonicity penalty.
# c) 'X_test_tensor' and 'Y_test_tensor' are the test data in tensor format. The 'predictions' are the outputs of the model when evaluated on this data.
# d) This operation will provide the test loss, which includes both the temporal discounting loss and the penalty for monotonicity violations. This helps in determining how well the model adheres to the monotonicity rule on unseen data.


In [None]:
import matplotlib.pyplot as plt

# Convert predictions and true values to numpy arrays for plotting
predictions_np = predictions.numpy()
Y_test_np = Y_test_tensor.numpy()

# Plot predictions vs actual for a few forecast horizons (e.g., t+1, t+5, t+10)
horizons = ['t+1', 't+5', 't+10', 't+20']

# Plot predictions vs actual for each horizon
plt.figure(figsize=(12, 6))
for i, horizon in enumerate(horizons):
    plt.subplot(2, 2, i+1)
    plt.plot(Y_test_np[:, i], label='True Values')
    plt.plot(predictions_np[:, i], label='Predictions')
    plt.title(f"Forecast Horizon: {horizon}")
    plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# a) Visualization helps in comparing the model's predictions against the true values to understand its performance, particularly at different forecast horizons.
# b) By plotting the true and predicted values for each forecast horizon, we can visually inspect how well the model is adhering to the monotonicity constraints.
# c) 'plt.plot()' creates a line plot for true vs. predicted values. 'horizons' is a list of forecast time steps (e.g., t+1, t+5, t+10, t+20).
# d) The result will be a set of plots showing the predictions compared to the actual values for each forecast horizon, which will help us understand how well the model is performing under the monotonicity constraints.


In [None]:
import torch
import numpy as np

# Define a function to compute the Fourier Transform of a time series
def compute_fft(x):
    """
    Compute the Fast Fourier Transform (FFT) of the input time series.
    FFT helps in analyzing the frequency components of the signal.
    """
    # Apply FFT on the input tensor (time series)
    fft_result = torch.fft.fft(x)

    # Get the magnitude of the complex FFT result (frequency domain representation)
    fft_magnitude = torch.abs(fft_result)

    return fft_magnitude

# Define a function to compute the frequency-aware loss
def frequency_aware_loss(predictions, targets):
    """
    Frequency-Aware Loss that compares the predicted signal and the target signal in the frequency domain.
    """
    # Apply FFT on the predictions and targets
    prediction_fft = compute_fft(predictions)
    target_fft = compute_fft(targets)

    # Compute the Mean Squared Error (MSE) in the frequency domain
    mse_freq = torch.mean((prediction_fft - target_fft) ** 2)

    return mse_freq


In [None]:
# a) We are using FFT to transform the time series data into the frequency domain, allowing us to capture long-term periodicities while avoiding short-term noise.
# b) The function 'compute_fft' performs the Fast Fourier Transform on the time series data, converting it into the frequency domain.
#    The 'frequency_aware_loss' function then compares the magnitude of the FFT of the predicted signal with the true signal, ensuring that the model adheres to the frequency patterns.
# c) 'torch.fft.fft' computes the FFT of a tensor. The result is a complex number array, from which we compute the magnitude using 'torch.abs' to get the frequency components. The MSE is computed in the frequency domain to capture periodicity differences.
# d) This operation helps the model focus on long-term periodic patterns in the data by penalizing discrepancies in the frequency domain, thereby reducing the risk of overfitting short-term noise.


In [None]:
# Example model and training loop with Frequency-Aware Loss
class TimeSeriesTransformerWithFreqLoss(nn.Module):
    def __init__(self, input_size, hidden_size, num_heads, num_layers, forecast_horizons, dropout=0.1):
        super(TimeSeriesTransformerWithFreqLoss, self).__init__()

        # Define embedding layer
        self.embedding = nn.Linear(input_size, hidden_size)

        # Transformer Encoder and Decoder
        self.transformer = nn.Transformer(
            d_model=hidden_size,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=2048,
            dropout=dropout
        )

        # Output layer
        self.output_layer = nn.Linear(hidden_size, forecast_horizons)

    def forward(self, x):
        # x: (batch_size, sequence_length, input_size)
        x = self.embedding(x)  # (batch_size, sequence_length, hidden_size)
        x = x.permute(1, 0, 2)  # (sequence_length, batch_size, hidden_size)

        # Transformer expects the target sequence as well (for now, using x as both)
        transformer_output = self.transformer(x, x)  # (sequence_length, batch_size, hidden_size)

        # Get the last time step's output for forecasting
        forecast = transformer_output[-1, :, :]  # (batch_size, hidden_size)

        # Final output layer to predict multiple horizons
        output = self.output_layer(forecast)  # (batch_size, forecast_horizons)
        return output

# Example model parameters (adjust based on your data)
input_size = X_multi.shape[2]  # Number of features in input
hidden_size = 64
num_heads = 4
num_layers = 2
forecast_horizons = Y_multi.shape[1]  # forecast horizons

# Instantiate the model
model_with_freq_loss = TimeSeriesTransformerWithFreqLoss(input_size, hidden_size, num_heads, num_layers, forecast_horizons)

# Define the optimizer and the frequency-aware loss criterion
optimizer = optim.Adam(model_with_freq_loss.parameters(), lr=0.001)

# Example training loop with frequency-aware loss
for epoch in range(50):
    model_with_freq_loss.train()
    optimizer.zero_grad()

    # Forward pass
    predictions = model_with_freq_loss(X_train_tensor)

    # Compute the frequency-aware loss
    loss = frequency_aware_loss(predictions, Y_train_tensor)

    # Backward pass
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item()}")


In [None]:
# a) We are integrating the Frequency-Aware Loss into the training loop to ensure that the model captures long-term periodic patterns while avoiding overfitting to short-term noise.
# b) The model 'TimeSeriesTransformerWithFreqLoss' is trained using the same architecture as before, but with an additional frequency-aware loss term.
#    The 'frequency_aware_loss' function is called during training, and it computes the discrepancy between the predicted and true signals in the frequency domain, encouraging the model to focus on periodicity rather than noise.
# c) The optimizer (Adam) is used to minimize the total loss, which now includes both the standard loss and the frequency-aware loss.
# d) This operation ensures that the model will be regularized with respect to frequency patterns, improving its ability to generalize over long-term periodicities.


In [None]:
# Evaluate the model on a test set with frequency-aware loss
model_with_freq_loss.eval()

# Assuming X_test and Y_test are your test data (already windowed)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)

# Get the predictions from the model
with torch.no_grad():
    predictions = model_with_freq_loss(X_test_tensor)

# Compute the test loss including the frequency-aware penalty
test_loss = frequency_aware_loss(predictions, Y_test_tensor)
print(f"Test Loss (with Frequency-Aware Loss): {test_loss.item()}")


In [None]:
# a) Evaluating the model with frequency-aware loss ensures that the model is generalizing well to unseen data while capturing long-term periodicities.
# b) By calling 'model.eval()', we disable training-specific behaviors (like dropout) to evaluate the model's performance on the test set.
#    The frequency-aware loss is computed on the test set, ensuring that the model has learned to focus on periodicity in the predictions.
# c) The 'predictions' are the model outputs, and 'test_loss' is the loss calculated using the frequency-aware criterion, which penalizes discrepancies in the frequency domain.
# d) This operation provides an evaluation of how well the model has learned to capture periodic patterns and avoid overfitting to noise in unseen data.


In [None]:
import matplotlib.pyplot as plt

# Convert predictions and true values to numpy arrays for plotting
predictions_np = predictions.numpy()
Y_test_np = Y_test_tensor.numpy()

# Plot predictions vs actual for a few forecast horizons (e.g., t+1, t+5, t+10)
horizons = ['t+1', 't+5', 't+10', 't+20']

# Plot predictions vs actual for each horizon
plt.figure(figsize=(12, 6))
for i, horizon in enumerate(horizons):
    plt.subplot(2, 2, i+1)
    plt.plot(Y_test_np[:, i], label='True Values')
    plt.plot(predictions_np[:, i], label='Predictions')
    plt.title(f"Forecast Horizon: {horizon}")
    plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# a) Visualization helps in comparing the model's predictions against the true values and analyzing how well it captures the long-term periodicities.
# b) By plotting the true and predicted values for each forecast horizon, we can visually inspect how the model is performing in both time and frequency domains.
# c) 'plt.plot()' creates line plots for true vs. predicted values. The 'horizons' list corresponds to the forecasted time steps (e.g., t+1, t+5, t+10, t+20).
# d) The result is a set of plots that will show the predictions in comparison with actual values, helping to identify if the model is adhering to periodicity and avoiding short-term noise.


In [None]:
import torch

# Define a function to compute the consistency loss
def consistency_loss(predictions):
    """
    Ensures that predictions across consecutive time steps (e.g., t+5, t+6, t+7) are consistent.
    Adds a penalty if the change between consecutive time steps is too large.
    """
    # Calculate the absolute differences between consecutive time steps
    diff = torch.abs(predictions[:, 1:] - predictions[:, :-1])  # |y_{t+5} - y_{t+6}|

    # Calculate the mean absolute difference (MAD) for consistency penalty
    consistency_penalty = torch.mean(diff)

    return consistency_penalty


In [None]:
# a) We are using the Consistency Loss to penalize large jumps or oscillations between consecutive time steps (like t+5, t+6, t+7).
# b) The function 'consistency_loss' computes the absolute differences between consecutive predictions in the forecast horizon. This difference is then averaged to calculate the consistency penalty.
# c) The input 'predictions' is a tensor of shape (batch_size, forecast_horizons), where each column corresponds to a time step (e.g., t+1, t+5, t+6). The difference between consecutive predictions is computed using 'torch.abs' to get the absolute value of the change.
# d) This penalty ensures that the model predictions are smooth and consistent, preventing large, unrealistic jumps between neighboring forecast horizons (e.g., t+5, t+6, t+7).


In [None]:
# Define a model class with consistency constraint added
class TimeSeriesTransformerWithConsistency(nn.Module):
    def __init__(self, input_size, hidden_size, num_heads, num_layers, forecast_horizons, dropout=0.1):
        super(TimeSeriesTransformerWithConsistency, self).__init__()

        # Define embedding layer
        self.embedding = nn.Linear(input_size, hidden_size)

        # Transformer Encoder and Decoder
        self.transformer = nn.Transformer(
            d_model=hidden_size,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=2048,
            dropout=dropout
        )

        # Output layer
        self.output_layer = nn.Linear(hidden_size, forecast_horizons)

    def forward(self, x):
        # x: (batch_size, sequence_length, input_size)
        x = self.embedding(x)  # (batch_size, sequence_length, hidden_size)
        x = x.permute(1, 0, 2)  # (sequence_length, batch_size, hidden_size)

        # Transformer expects the target sequence as well (for now, using x as both)
        transformer_output = self.transformer(x, x)  # (sequence_length, batch_size, hidden_size)

        # Get the last time step's output for forecasting
        forecast = transformer_output[-1, :, :]  # (batch_size, hidden_size)

        # Final output layer to predict multiple horizons
        output = self.output_layer(forecast)  # (batch_size, forecast_horizons)
        return output

# Example model parameters (adjust based on your data)
input_size = X_multi.shape[2]  # Number of features in input
hidden_size = 64
num_heads = 4
num_layers = 2
forecast_horizons = Y_multi.shape[1]  # forecast horizons

# Instantiate the model
model_with_consistency = TimeSeriesTransformerWithConsistency(input_size, hidden_size, num_heads, num_layers, forecast_horizons)

# Define the optimizer and the loss function
optimizer = optim.Adam(model_with_consistency.parameters(), lr=0.001)

# Example training loop with consistency loss
for epoch in range(50):
    model_with_consistency.train()
    optimizer.zero_grad()

    # Forward pass
    predictions = model_with_consistency(X_train_tensor)

    # Compute the consistency loss
    loss = consistency_loss(predictions)  # Consistency penalty

    # Backward pass
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Consistency Loss: {loss.item()}")


In [None]:
# a) The model is trained to minimize both the prediction error and the consistency loss, ensuring smooth transitions between consecutive forecast horizons.
# b) The 'TimeSeriesTransformerWithConsistency' class is designed with the same architecture as before, but with an added consistency loss term.
#    The 'consistency_loss' function is called during training to penalize large jumps or oscillations between consecutive time steps, such as t+5, t+6, t+7.
# c) The optimizer (Adam) is used to minimize the total loss, which includes both the prediction loss and the consistency penalty.
# d) This operation ensures that the model does not produce erratic or unrealistic predictions between consecutive time steps and the transitions between forecast horizons remain smooth.


In [None]:
# Evaluate the model on a test set with consistency loss
model_with_consistency.eval()

# Assuming X_test and Y_test are your test data (already windowed)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)

# Get the predictions from the model
with torch.no_grad():
    predictions = model_with_consistency(X_test_tensor)

# Compute the test consistency loss
test_loss = consistency_loss(predictions)
print(f"Test Loss (with Consistency Loss): {test_loss.item()}")


In [None]:
# a) Evaluating the model with consistency loss ensures that the model is making smooth and consistent predictions across forecast horizons.
# b) By calling 'model.eval()', we disable training-specific behaviors (like dropout) and evaluate the model on the test data.
#    The 'consistency_loss' function is then applied to the predictions on the test set, penalizing any large jumps between consecutive time steps.
# c) The 'predictions' are the model's output, and 'test_loss' is the calculated consistency loss for the test set.
# d) This operation provides an evaluation of the model's ability to maintain smooth transitions and consistency in predictions across neighboring forecast horizons.


In [None]:
import matplotlib.pyplot as plt

# Convert predictions and true values to numpy arrays for plotting
predictions_np = predictions.numpy()
Y_test_np = Y_test_tensor.numpy()

# Plot predictions vs actual for a few forecast horizons (e.g., t+5, t+6, t+7)
horizons = ['t+5', 't+6', 't+7']

# Plot predictions vs actual for each horizon
plt.figure(figsize=(12, 6))
for i, horizon in enumerate(horizons):
    plt.subplot(2, 2, i+1)
    plt.plot(Y_test_np[:, i], label='True Values')
    plt.plot(predictions_np[:, i], label='Predictions')
    plt.title(f"Forecast Horizon: {horizon}")
    plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# a) Visualization helps in comparing the model's predictions against the true values and analyzing the consistency of the predictions across time steps.
# b) By plotting the true and predicted values for each forecast horizon, we can visually inspect how consistent the model is at making predictions for nearby horizons.
# c) 'plt.plot()' creates line plots for true vs. predicted values. The 'horizons' list corresponds to the forecasted time steps (e.g., t+5, t+6, t+7).
# d) The result is a set of plots that show how consistent the predictions are for consecutive time steps, helping us evaluate the smoothness of the forecast and whether large jumps are avoided.


In [None]:
import torch

# Define a function to compute the consistency loss
def consistency_loss(predictions):
    """
    Ensures that predictions across consecutive time steps (e.g., t+5, t+6, t+7) are consistent.
    Adds a penalty if the change between consecutive time steps is too large.
    """
    # Calculate the absolute differences between consecutive time steps
    diff = torch.abs(predictions[:, 1:] - predictions[:, :-1])  # |y_{t+5} - y_{t+6}|

    # Calculate the mean absolute difference (MAD) for consistency penalty
    consistency_penalty = torch.mean(diff)

    return consistency_penalty


In [None]:
# a) We are using the Consistency Loss to penalize large jumps or oscillations between consecutive time steps (like t+5, t+6, t+7).
# b) The function 'consistency_loss' computes the absolute differences between consecutive predictions in the forecast horizon. This difference is then averaged to calculate the consistency penalty.
# c) The input 'predictions' is a tensor of shape (batch_size, forecast_horizons), where each column corresponds to a time step (e.g., t+1, t+5, t+6). The difference between consecutive predictions is computed using 'torch.abs' to get the absolute value of the change.
# d) This penalty ensures that the model predictions are smooth and consistent, preventing large, unrealistic jumps between neighboring forecast horizons (e.g., t+5, t+6, t+7).


In [None]:
# Define a model class with consistency constraint added
class TimeSeriesTransformerWithConsistency(nn.Module):
    def __init__(self, input_size, hidden_size, num_heads, num_layers, forecast_horizons, dropout=0.1):
        super(TimeSeriesTransformerWithConsistency, self).__init__()

        # Define embedding layer
        self.embedding = nn.Linear(input_size, hidden_size)

        # Transformer Encoder and Decoder
        self.transformer = nn.Transformer(
            d_model=hidden_size,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=2048,
            dropout=dropout
        )

        # Output layer
        self.output_layer = nn.Linear(hidden_size, forecast_horizons)

    def forward(self, x):
        # x: (batch_size, sequence_length, input_size)
        x = self.embedding(x)  # (batch_size, sequence_length, hidden_size)
        x = x.permute(1, 0, 2)  # (sequence_length, batch_size, hidden_size)

        # Transformer expects the target sequence as well (for now, using x as both)
        transformer_output = self.transformer(x, x)  # (sequence_length, batch_size, hidden_size)

        # Get the last time step's output for forecasting
        forecast = transformer_output[-1, :, :]  # (batch_size, hidden_size)

        # Final output layer to predict multiple horizons
        output = self.output_layer(forecast)  # (batch_size, forecast_horizons)
        return output

# Example model parameters (adjust based on your data)
input_size = X_multi.shape[2]  # Number of features in input
hidden_size = 64
num_heads = 4
num_layers = 2
forecast_horizons = Y_multi.shape[1]  # forecast horizons

# Instantiate the model
model_with_consistency = TimeSeriesTransformerWithConsistency(input_size, hidden_size, num_heads, num_layers, forecast_horizons)

# Define the optimizer and the loss function
optimizer = optim.Adam(model_with_consistency.parameters(), lr=0.001)

# Example training loop with consistency loss
for epoch in range(50):
    model_with_consistency.train()
    optimizer.zero_grad()

    # Forward pass
    predictions = model_with_consistency(X_train_tensor)

    # Compute the consistency loss
    loss = consistency_loss(predictions)  # Consistency penalty

    # Backward pass
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Consistency Loss: {loss.item()}")


In [None]:
# a) The model is trained to minimize both the prediction error and the consistency loss, ensuring smooth transitions between consecutive forecast horizons.
# b) The 'TimeSeriesTransformerWithConsistency' class is designed with the same architecture as before, but with an added consistency loss term.
#    The 'consistency_loss' function is called during training to penalize large jumps or oscillations between consecutive time steps, such as t+5, t+6, t+7.
# c) The optimizer (Adam) is used to minimize the total loss, which includes both the prediction loss and the consistency penalty.
# d) This operation ensures that the model does not produce erratic or unrealistic predictions between consecutive time steps and the transitions between forecast horizons remain smooth.


In [None]:
# Evaluate the model on a test set with consistency loss
model_with_consistency.eval()

# Assuming X_test and Y_test are your test data (already windowed)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)

# Get the predictions from the model
with torch.no_grad():
    predictions = model_with_consistency(X_test_tensor)

# Compute the test consistency loss
test_loss = consistency_loss(predictions)
print(f"Test Loss (with Consistency Loss): {test_loss.item()}")


In [None]:
# a) Evaluating the model with consistency loss ensures that the model is making smooth and consistent predictions across forecast horizons.
# b) By calling 'model.eval()', we disable training-specific behaviors (like dropout) and evaluate the model on the test data.
#    The 'consistency_loss' function is then applied to the predictions on the test set, penalizing any large jumps between consecutive time steps.
# c) The 'predictions' are the model's output, and 'test_loss' is the calculated consistency loss for the test set.
# d) This operation provides an evaluation of the model's ability to maintain smooth transitions and consistency in predictions across neighboring forecast horizons.


In [None]:
import matplotlib.pyplot as plt

# Convert predictions and true values to numpy arrays for plotting
predictions_np = predictions.numpy()
Y_test_np = Y_test_tensor.numpy()

# Plot predictions vs actual for a few forecast horizons (e.g., t+5, t+6, t+7)
horizons = ['t+5', 't+6', 't+7']

# Plot predictions vs actual for each horizon
plt.figure(figsize=(12, 6))
for i, horizon in enumerate(horizons):
    plt.subplot(2, 2, i+1)
    plt.plot(Y_test_np[:, i], label='True Values')
    plt.plot(predictions_np[:, i], label='Predictions')
    plt.title(f"Forecast Horizon: {horizon}")
    plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# a) Visualization helps in comparing the model's predictions against the true values and analyzing the consistency of the predictions across time steps.
# b) By plotting the true and predicted values for each forecast horizon, we can visually inspect how consistent the model is at making predictions for nearby horizons.
# c) 'plt.plot()' creates line plots for true vs. predicted values. The 'horizons' list corresponds to the forecasted time steps (e.g., t+5, t+6, t+7).
# d) The result is a set of plots that show how consistent the predictions are for consecutive time steps, helping us evaluate the smoothness of the forecast and whether large jumps are avoided.


In [None]:
# Define a function to compute horizon-specific MSE
def horizon_specific_mse(predictions, target, horizons=[1, 5, 10]):
    """
    Compute the MSE for specific forecast horizons, such as t+1, t+5, t+10.
    This allows us to track the performance of the model at different time steps.
    """
    mse_results = {}

    # Iterate through the specified horizons
    for horizon in horizons:
        # Get the prediction and target for the specific horizon
        pred_at_horizon = predictions[:, horizon-1]  # Indexing for 0-based
        target_at_horizon = target[:, horizon-1]

        # Compute the MSE for that horizon
        mse = torch.mean((pred_at_horizon - target_at_horizon) ** 2)
        mse_results[f"t+{horizon}"] = mse.item()

    return mse_results


In [None]:
# a) Why we are using this strategy: Horizon-specific MSE allows us to evaluate the model’s performance at key forecast horizons (t+1, t+5, t+10). This is useful for understanding how well the model predicts both short-term and long-term future values.
# b) How these codes will solve the purpose: The function computes the MSE between the predicted and true values at specific horizons. By tracking the MSE at these important time steps, we can assess how well the model performs for different forecast windows.
# c) Explanation of terms used:
#    - 'predictions': A tensor representing the predicted forecast values.
#    - 'target': A tensor representing the ground truth values.
#    - 'mse_results': A dictionary containing the MSE values for each horizon (t+1, t+5, t+10).
# d) What we achieve: This operation provides insight into how the model performs at various forecast horizons, allowing us to identify if the model has any weaknesses at specific time steps.


In [None]:
import torch
import numpy as np

# Define a function to compute CRPS (for probabilistic models)
def crps_score(predictions, target, forecast_horizon=10):
    """
    Compute the Continuous Ranked Probability Score (CRPS) for probabilistic models.
    This is a metric used to evaluate probabilistic forecasts by comparing the predicted
    cumulative distribution function (CDF) to the actual CDF of the ground truth.
    """
    # Initialize an array to store the CRPS for each sample
    crps_values = []

    # For each prediction, compute the CRPS
    for i in range(predictions.size(0)):
        # Convert predictions to cumulative distribution (CDF) format
        sorted_preds = torch.sort(predictions[i, :]).values  # Sort predictions for CDF
        sorted_target = torch.sort(target[i, :]).values  # Sort ground truth for CDF

        # Compute CRPS as the area between the predicted CDF and the true CDF
        crps = torch.mean((sorted_preds - sorted_target) ** 2)  # Simplified CRPS calculation
        crps_values.append(crps.item())

    return np.mean(crps_values)


In [None]:
# a) Why we are using this strategy: CRPS is used to evaluate probabilistic forecasts by comparing the predicted CDF to the true CDF. It gives us an overall measure of how well the predicted distribution fits the observed distribution.
# b) How these codes will solve the purpose: The function calculates the CRPS for each sample in the batch by comparing the sorted predictions and ground truth. The CRPS score is then averaged over all samples.
# c) Explanation of terms used:
#    - 'sorted_preds': The sorted predicted values, representing the CDF of the predictions.
#    - 'sorted_target': The sorted ground truth values, representing the CDF of the true values.
#    - 'crps_values': A list containing the CRPS for each prediction in the batch.
# d) What we achieve: By computing CRPS, we can evaluate the probabilistic forecast and how well it matches the true distribution, providing a more comprehensive evaluation of the model.


In [None]:
# Define a function to compute frequency similarity between predictions and ground truth
def frequency_similarity(predictions, target):
    """
    Compute the similarity between the frequency components of the predictions and the target
    using FFT (Fast Fourier Transform).
    """
    # Apply FFT to both predictions and ground truth
    fft_preds = np.fft.fft(predictions.cpu().numpy(), axis=1)
    fft_target = np.fft.fft(target.cpu().numpy(), axis=1)

    # Compute the magnitude of the frequency components
    mag_preds = np.abs(fft_preds)
    mag_target = np.abs(fft_target)

    # Compute the similarity using the cosine similarity between frequency components
    similarity = np.mean(np.cos(np.angle(fft_preds) - np.angle(fft_target)), axis=1)

    return np.mean(similarity)


In [None]:
# a) Why we are using this strategy: Frequency similarity helps us evaluate whether the model has captured the periodic behavior present in the data, which is important for forecasting time series data with inherent periodicities.
# b) How these codes will solve the purpose: The function applies the Fast Fourier Transform (FFT) to both the predictions and the ground truth, calculates the frequency components, and then computes the cosine similarity between the frequency components of the predicted and true signals.
# c) Explanation of terms used:
#    - 'fft_preds' and 'fft_target': The FFT results of the predicted and true values, representing their frequency components.
#    - 'mag_preds' and 'mag_target': The magnitudes of the frequency components of the predicted and true values.
#    - 'similarity': The cosine similarity between the frequency components of the predictions and ground truth.
# d) What we achieve: This operation gives us a measure of how similar the frequency components of the predictions are to those of the true values, which is important for assessing the periodicity of the forecasts.


In [None]:
import matplotlib.pyplot as plt

# Define a function to plot long-term trends of predictions vs ground truth
def plot_long_term_trends(predictions, target):
    """
    Plot the long-term trends of the model's predictions against the true values.
    This visual inspection helps in assessing how well the model fits the data over time.
    """
    plt.figure(figsize=(10, 6))
    plt.plot(target[:, 0].cpu().numpy(), label="Ground Truth", color='blue')
    plt.plot(predictions[:, 0].cpu().numpy(), label="Predictions", color='red')
    plt.title("Long-Term Trends: Predictions vs Ground Truth")
    plt.xlabel("Time Step")
    plt.ylabel("Value")
    plt.legend()
    plt.show()


In [None]:
# a) Why we are using this strategy: Visual inspection allows us to visually assess how well the model fits the long-term trends of the data, which is crucial for understanding the overall quality of the forecast.
# b) How these codes will solve the purpose: The function plots the predictions and the true values over time, allowing us to directly compare the model's long-term trend against the observed data.
# c) Explanation of terms used:
#    - 'predictions' and 'target': The predicted and true values, respectively.
# d) What we achieve: By visualizing the long-term trends, we can gain insights into the model’s performance over time, check for any significant deviations, and evaluate the overall fit.


In [None]:
# Sample evaluation call:
predictions = model(X_test_tensor)  # Get model predictions
target = Y_test_tensor  # Ground truth

# Horizon-specific error
horizon_mse = horizon_specific_mse(predictions, target)

# CRPS score
crps = crps_score(predictions, target)

# Frequency similarity
freq_similarity = frequency_similarity(predictions, target)

# Plot long-term trends
plot_long_term_trends(predictions, target)

# Output the evaluation metrics
print(f"Horizon-specific MSE: {horizon_mse}")
print(f"CRPS: {crps}")
print(f"Frequency Similarity: {freq_similarity}")


In [None]:
# a) Why we are using this strategy: We combine all the evaluation metrics to get a comprehensive assessment of the model's performance across different aspects, such as horizon-specific error, CRPS, frequency similarity, and visual inspection.
# b) How these codes will solve the purpose: The evaluation call computes the horizon-specific MSE, CRPS score, and frequency similarity, and it visualizes the long-term trends. We then print out these metrics for further analysis.
# c) Explanation of terms used:
#    - 'predictions' and 'target': The model's predictions and the true values.
# d) What we achieve: This final step aggregates the evaluation metrics and provides a clear picture of how well the model performs in different evaluation aspects, ensuring a thorough model assessment.
