In [None]:
!pip install iTransformer==0.4.4

In [None]:
import torch
from iTransformer import iTransformer

#Load dataset

In [None]:
!git clone https://github.com/AbdullahO/SAMoSSA.git

In [None]:
import numpy as np
dataset = np.load('/content/SAMoSSA/datasets/electricity/electricity.npy', encoding='bytes')

#Training and Validation Stage

In [None]:
training_set = dataset[25824:25848]      # Arrays 1 to 25824
validation_set = dataset[25848:25872] # Arrays 25825 to 25872

In [None]:
import pandas as pd
import numpy as np

number_of_hours, num_users = training_set.shape

# Generate date range
date_range = pd.date_range(start='01/01/2011 00:00', periods=number_of_hours, freq='H')

# Reshape and create pairs of values and user IDs
data = []
for user_id in range(1, num_users + 1):
    for hour, value in enumerate(training_set[:, user_id - 1]):
        data.append([date_range[hour], value, user_id])

# Create DataFrame
df_train = pd.DataFrame(data, columns=['Date', 'Load', 'UserID'])

In [None]:
import pandas as pd
import numpy as np

number_of_hours, num_users = validation_set.shape

# Generate date range
date_range = pd.date_range(start='17/01/2011 00:00', periods=number_of_hours, freq='H')

# Reshape and create pairs of values and user IDs
data = []
for user_id in range(1, num_users + 1):
    for hour, value in enumerate(validation_set[:, user_id - 1]):
        data.append([date_range[hour], value, user_id])

# Create DataFrame
df_valid = pd.DataFrame(data, columns=['Date', 'Load', 'UserID'])

In [None]:
sampled_user_ids=[ 58,  53,  84, 274, 164, 365, 340, 225, 281,  48,  42, 298, 334,
        63,   3, 229, 262, 104,  64,  27, 133,  61, 245,   2,  67, 337,
       127, 248, 218, 217, 317, 280, 243,  76, 219, 250, 305,  75, 350,
        49,  95, 224, 162, 367,  73, 161, 238, 324,  29, 154]

In [None]:
# Filter the original DataFrame to include only the sampled user IDs
df_train = df_train[df_train['UserID'].isin(sampled_user_ids)]
df_valid = df_valid[df_valid['UserID'].isin(sampled_user_ids)]

In [None]:
load_per_user_train = df_train.groupby('UserID')['Load'].apply(list)
load_per_user_train

In [None]:
load_per_user_valid = df_valid.groupby('UserID')['Load'].apply(list)
load_per_user_valid

In [None]:
time_series = torch.tensor(load_per_user_train.tolist()).unsqueeze(0)

time_series.shape, time_series.dtype

In [None]:
transposed_time_series = time_series.transpose(1, 2)  # (batch, lookback len, variates)

transposed_time_series.shape, transposed_time_series.dtype

#Train iTransformer model

In [None]:
array_list_valid = [np.array(lst) for lst in load_per_user_valid]
# Combine the arrays into a single 2D array and convert to a tensor
combined_array = torch.tensor(array_list_valid).T

validation_tensor = combined_array.unsqueeze(0)

# Verify the shape
print(validation_tensor.shape)

In [None]:
transposed_time_series.shape,transposed_time_series.dtype

In [None]:
mean = transposed_time_series.mean(dim=(0, 1), keepdim=True)
std = transposed_time_series.std(dim=(0, 1), keepdim=True)

# Normalize training data
normalized_train_tensor = (transposed_time_series - mean) / std

# Normalize validation data using training mean and std
validation_tensor_normalized = (validation_tensor - mean) / std

In [None]:
import torch
import torch.optim as optim
from iTransformer import iTransformer
from torch.utils.data import DataLoader
import copy
import torch.nn as nn
import torch.nn.init as init
import random

# Function to calculate SMAPE
def calculate_smape(predictions, ground_truth):
    predictions = predictions.float()
    ground_truth = ground_truth.float()
    mask = ground_truth != 0
    masked_predictions = predictions[mask]
    masked_ground_truth = ground_truth[mask]
    numerator = torch.abs(masked_predictions - masked_ground_truth)
    denominator = torch.abs(masked_predictions) + torch.abs(masked_ground_truth)
    smape = torch.mean(numerator / denominator)
    return smape.item()

# Initialization methods
def xavier_init(m):
    if isinstance(m, nn.Linear):
        init.xavier_uniform_(m.weight)
        if m.bias is not None:
            init.zeros_(m.bias)

def kaiming_init(m):
    if isinstance(m, nn.Linear):
        init.kaiming_uniform_(m.weight, nonlinearity='relu')
        if m.bias is not None:
            init.zeros_(m.bias)

def uniform_init(m):
    if isinstance(m, nn.Linear):
        init.uniform_(m.weight, -0.1, 0.1)
        if m.bias is not None:
            init.zeros_(m.bias)

initialization_methods = [uniform_init]

# Constants
lookback_len = 23
num_variates = 50
pred_length = 23
num_iterations = 30

best_smape_error = float('inf')
best_model_state = None
best_init_method = None

for iteration in range(num_iterations):
    # Randomly select an initialization method
    init_method = random.choice(initialization_methods)

    # Model initialization
    model = iTransformer(
        num_variates=num_variates,
        lookback_len=lookback_len,
        dim=32,
        depth=4,
        heads=8,
        dim_head=32,
        pred_length=pred_length,
        num_tokens_per_variate=2,
    )

    # Apply selected initialization
    model.apply(init_method)

    # Loss function and optimizer
    loss_function = torch.nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=1000)

    # DataLoader for training data
    train_loader = DataLoader(normalized_train_tensor, batch_size=2, shuffle=False)

    # Training loop
    model.train()
    for epoch in range(1000):
        total_loss = 0
        for inputs in train_loader:
            x = inputs[:, :lookback_len, :]
            y = inputs[:, 1:lookback_len+1, :]
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_function(outputs[pred_length], y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        scheduler.step()
    # Evaluation
    test_tensor_adjusted = normalized_train_tensor[:, :23, :]
    test_tensor_adjusted_float = test_tensor_adjusted.float()
    model.eval()

    with torch.no_grad():
      model_output = model(test_tensor_adjusted_float)  # Make sure this tensor is normalized
      if pred_length in model_output:
        predictions_normalized = model_output[pred_length]
        # Transform predictions back to original scale
        predictions_original_scale = (predictions_normalized * std) + mean
        smape_error = calculate_smape(predictions_original_scale, validation_tensor[:, :23, :])
        if smape_error < best_smape_error:
          best_smape_error = smape_error
          best_model_state = copy.deepcopy(model.state_dict())
          best_init_method = init_method.__name__

  # Print the best initialization method and SMAPE error
          print(f"Best Initialization Method: {best_init_method}")
          print(f"Best SMAPE Error(%): {best_smape_error*100}")

  # Optionally, save the best model state
  # if best_model_state is not None:
  #     torch.save(best_model_state, 'best_model.pth')

In [None]:
import torch
import torch.optim as optim
from iTransformer import iTransformer
from torch.utils.data import DataLoader
import copy
import itertools

# Function to calculate SMAPE
def calculate_smape(predictions, ground_truth):
    predictions = predictions
    ground_truth = ground_truth
    mask = ground_truth != 0
    masked_predictions = predictions[mask]
    masked_ground_truth = ground_truth[mask]
    numerator = torch.abs(masked_predictions - masked_ground_truth)
    denominator = torch.abs(masked_predictions+masked_ground_truth)
    smape = torch.mean(numerator / denominator)
    return smape.item()

# Constants
lookback_len = 24
num_variates = 50
pred_length = 24

# Define the hyperparameter grid
# learning_rates = [0.001, 0.01, 0.1]
# depths = [2, 4, 6]
# heads = [4, 8, 12]
learning_rates = [0.001]
depths = [4]
heads = [8]

# Create a list of all possible combinations of hyperparameters
hyperparameter_grid = list(itertools.product(learning_rates, depths, heads))

# Initialize variables to store the best hyperparameters and corresponding SMAPE
best_smape_error = float('inf')
best_hyperparameters = None
best_model_state = None

# DataLoader for training and validation data
train_loader = DataLoader(transposed_time_series, batch_size=2, shuffle=False)
validation_loader = DataLoader(validation_tensor, batch_size=2, shuffle=False)

for lr, depth, head in hyperparameter_grid:

    # Initialize the model with the current set of hyperparameters
    model = iTransformer(
        num_variates=num_variates,
        lookback_len=lookback_len,
        dim=32,
        depth=depth,
        heads=head,
        dim_head=32,
        pred_length=pred_length,
        num_tokens_per_variate=2,
    )

    # Loss function and optimizer
    loss_function = torch.nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=1000)

    # Training loop
    model.train()

    for epoch in range(1000):
        total_loss = 0
        for inputs in train_loader:
            # Modify these lines to correctly slice the inputs and targets
            x = inputs[:, :lookback_len, :]  # Input sequence with lookback length
            y = inputs[:, 1:lookback_len + 1, :]  # Target sequence with prediction length
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_function(outputs[pred_length],y)  # Ensure outputs and y have compatible shapes
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
    # Evaluation
    val_tensor_adjusted = validation_tensor[:, :47, :]
    val_tensor_adjusted_float = val_tensor_adjusted.float()
    model.eval()

    with torch.no_grad():
        model_output = model(val_tensor_adjusted_float)
        if pred_length in model_output:
            predictions = model_output[pred_length]
            smape_error = calculate_smape(predictions, val_tensor_adjusted_float)
            if smape_error < best_smape_error:
                best_smape_error = smape_error
                best_hyperparameters = (lr, depth, head)
                best_model_state = copy.deepcopy(model.state_dict())

                # Print the best hyperparameters and corresponding SMAPE
                print(f"Best Hyperparameters: Learning Rate: {best_hyperparameters[0]}, Depth: {best_hyperparameters[1]}, Heads: {best_hyperparameters[2]}")
                print(f"Best SMAPE Error: {best_smape_error}")


#Testing Stage

In [None]:
train_val_set = dataset[25440:25872]
testing_set = dataset[25872:25920]

In [None]:
import pandas as pd
import numpy as np

number_of_hours, num_users = train_val_set.shape

# Generate date range
date_range = pd.date_range(start='01/01/2011 00:00', periods=number_of_hours, freq='H')

# Reshape and create pairs of values and user IDs
data = []
for user_id in range(1, num_users + 1):
    for hour, value in enumerate(train_val_set[:, user_id - 1]):
        data.append([date_range[hour], value, user_id])

# Create DataFrame
df_train_val = pd.DataFrame(data, columns=['Date', 'Load', 'UserID'])

In [None]:
import pandas as pd
import numpy as np

number_of_hours, num_users = testing_set.shape

# Generate date range
date_range = pd.date_range(start='19/01/2011 00:00', periods=number_of_hours, freq='H')

# Reshape and create pairs of values and user IDs
data = []
for user_id in range(1, num_users + 1):
    for hour, value in enumerate(testing_set[:, user_id - 1]):
        data.append([date_range[hour], value, user_id])

# Create DataFrame
df_test = pd.DataFrame(data, columns=['Date', 'Load', 'UserID'])

In [None]:
load_per_user_train = df_train_val.groupby('UserID')['Load'].apply(list)
load_per_user_train

In [None]:
load_per_user_test = df_test.groupby('UserID')['Load'].apply(list)
load_per_user_test

In [None]:
time_series = torch.tensor(load_per_user_train.tolist()).unsqueeze(0)

time_series.shape, time_series.dtype

In [None]:
transposed_time_series = time_series.transpose(1, 2)  # (batch, lookback len, variates)

transposed_time_series.shape, transposed_time_series.dtype

In [None]:
array_list_valid = [np.array(lst) for lst in load_per_user_test]
# Combine the arrays into a single 2D array and convert to a tensor
combined_array = torch.tensor(array_list_valid).T

test_tensor = combined_array.unsqueeze(0)

# Verify the shape
print(test_tensor.shape)

In [None]:
for inputs in train_loader:
  print(inputs)


In [None]:
import torch
import torch.optim as optim
from iTransformer import iTransformer
from torch.utils.data import DataLoader
import copy
import torch.nn as nn
import torch.nn.init as init
import random

# Function to calculate SMAPE
def calculate_smape(predictions, ground_truth):
    predictions = predictions.float()
    ground_truth = ground_truth.float()
    mask = ground_truth != 0
    masked_predictions = predictions[mask]
    masked_ground_truth = ground_truth[mask]
    numerator = torch.abs(masked_predictions - masked_ground_truth)
    denominator = torch.abs(masked_predictions+masked_ground_truth)
    smape = torch.mean(numerator / denominator)
    return smape.item()

def uniform_init(m):
    if isinstance(m, nn.Linear):
        init.uniform_(m.weight, -0.1, 0.1)
        if m.bias is not None:
            init.zeros_(m.bias)

# Constants
lookback_len = 47
num_variates = 370
pred_length = 47


best_smape_error = float('inf')
best_model_state = None
best_init_method = None


# Model initialization
model = iTransformer(
    num_variates=num_variates,
    lookback_len=lookback_len,
    dim=256,
    depth=6,
    heads=8,
    dim_head=256,
    pred_length=pred_length,
    num_tokens_per_variate=2,
)

# Apply selected initialization
model.apply(uniform_init)

# Loss function and optimizer
loss_function = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=1000)

# DataLoader for training data
train_loader = DataLoader(transposed_time_series, batch_size=32, shuffle=False)

# Training loop
model.train()
for epoch in range(100):
    print(epoch)
    total_loss = 0
    for inputs in train_loader:
        x = inputs[:, :lookback_len, :]
        y = inputs[:, lookback_len:lookback_len+pred_length, :]
        optimizer.zero_grad()
        outputs = model(x)
        loss = loss_function(outputs[pred_length], y)
        optimizer.step()
        total_loss += loss.item()
        print(loss)
    scheduler.step()

# Evaluation
test_tensor_adjusted = test_tensor[:, :47, :]
test_tensor_adjusted_float = test_tensor_adjusted.float()
model.eval()

with torch.no_grad():
  model_output = model(test_tensor_adjusted_float)
  if pred_length in model_output:
      predictions = model_output[pred_length]
      smape_error = calculate_smape(predictions, test_tensor_adjusted)
      if smape_error < best_smape_error:
          best_smape_error = smape_error
          best_model_state = copy.deepcopy(model.state_dict())

print(f"Best SMAPE Error(%): {best_smape_error*100}")

  # Optionally, save the best model state
  # if best_model_state is not None:
  #     torch.save(best_model_state, 'best_model.pth')

In [None]:
import torch
import torch.optim as optim
from iTransformer import iTransformer
from torch.utils.data import DataLoader
import copy
import torch.nn as nn
import torch.nn.init as init
import random

# Function to calculate SMAPE
def calculate_smape(predictions, ground_truth):
    predictions = predictions.float()
    ground_truth = ground_truth.float()
    mask = ground_truth != 0
    masked_predictions = predictions[mask]
    masked_ground_truth = ground_truth[mask]
    numerator = torch.abs(masked_predictions - masked_ground_truth)
    denominator = torch.abs(masked_predictions) + torch.abs(masked_ground_truth)
    smape = torch.mean(numerator / denominator)
    return smape.item()

# Initialization methods
def xavier_init(m):
    if isinstance(m, nn.Linear):
        init.xavier_uniform_(m.weight)
        if m.bias is not None:
            init.zeros_(m.bias)

def kaiming_init(m):
    if isinstance(m, nn.Linear):
        init.kaiming_uniform_(m.weight, nonlinearity='relu')
        if m.bias is not None:
            init.zeros_(m.bias)

def uniform_init(m):
    if isinstance(m, nn.Linear):
        init.uniform_(m.weight, -0.1, 0.1)
        if m.bias is not None:
            init.zeros_(m.bias)

initialization_methods = [xavier_init, kaiming_init, uniform_init]

# Constants
lookback_len = 47
num_variates = 370
pred_length = 47
num_iterations = 5

best_smape_error = float('inf')
best_model_state = None
best_init_method = None

for iteration in range(num_iterations):
    # Randomly select an initialization method
    init_method = random.choice(initialization_methods)

    # Model initialization
    model = iTransformer(
        num_variates=num_variates,
        lookback_len=lookback_len,
        dim=512,
        depth=4,
        heads=8,
        dim_head=512,
        pred_length=pred_length,
        num_tokens_per_variate=2,
    )

    # Apply selected initialization
    model.apply(init_method)

    # Loss function and optimizer
    loss_function = torch.nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=1000)

    # DataLoader for training data
    train_loader = DataLoader(transposed_time_series, batch_size=32, shuffle=False)

    # Training loop
    model.train()
    for epoch in range(1000):
        total_loss = 0
        for inputs in train_loader:
            x = inputs[:, :lookback_len, :]
            y = inputs[:, 1:lookback_len+1, :]
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_function(outputs[pred_length], y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        scheduler.step()
    # Evaluation
    test_tensor_adjusted = transposed_time_series[:, :47, :]
    test_tensor_adjusted_float = test_tensor_adjusted.float()
    model.eval()

    with torch.no_grad():
        model_output = model(test_tensor_adjusted_float)
        if pred_length in model_output:
            predictions = model_output[pred_length]
            smape_error = calculate_smape(predictions, test_tensor_adjusted_float)
            if smape_error < best_smape_error:
                best_smape_error = smape_error
                best_model_state = copy.deepcopy(model.state_dict())
                best_init_method = init_method.__name__

  # Print the best initialization method and SMAPE error
                print(f"Best Initialization Method: {best_init_method}")
                print(f"Best SMAPE Error(%): {best_smape_error*100}")

  # Optionally, save the best model state
  # if best_model_state is not None:
  #     torch.save(best_model_state, 'best_model.pth')