In [None]:
!pip install pytorch-lightning==2.0.2 pytorch_forecasting==1.0.0


In [None]:
import pandas as pd
import numpy as np
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss
from torch.utils.data import DataLoader


In [None]:
!git clone https://github.com/AbdullahO/SAMoSSA.git
import numpy as np
dataset = np.load('/content/SAMoSSA/datasets/electricity/electricity.npy', encoding='bytes')

In [None]:
training_set = dataset[25800:25848]      # Arrays 1 to 25824
validation_set = dataset[25848:25872] # Arrays 25825 to 25872

In [None]:
sampled_user_ids=[ 58,  53,  84, 274, 164, 365, 340, 225, 281,  48,  42, 298, 334,
        63,   3, 229, 262, 104,  64,  27, 133,  61, 245,   2,  67, 337,
       127, 248, 218, 217, 317, 280, 243,  76, 219, 250, 305,  75, 350,
        49,  95, 224, 162, 367,  73, 161, 238, 324,  29, 154]

In [None]:
import pandas as pd
import numpy as np

number_of_hours, num_users = training_set.shape

# Generate date range
date_range = pd.date_range(start='01/01/2011 00:00', periods=number_of_hours, freq='D')

# Reshape and create pairs of values and user IDs
data = []
for user_id in range(1, num_users + 1):
    for hour, value in enumerate(training_set[:, user_id - 1]):
        data.append([date_range[hour], value, user_id])

# Create DataFrame
df_train = pd.DataFrame(data, columns=['Date', 'Load', 'UserID'])

In [None]:
import pandas as pd
import numpy as np

number_of_hours, num_users = validation_set.shape

# Generate date range
date_range = pd.date_range(start='18/02/2011 00:00', periods=number_of_hours, freq='D')

# Reshape and create pairs of values and user IDs
data = []
for user_id in range(1, num_users + 1):
    for hour, value in enumerate(validation_set[:, user_id - 1]):
        data.append([date_range[hour], value, user_id])

# Create DataFrame
df_valid = pd.DataFrame(data, columns=['Date', 'Load', 'UserID'])

In [None]:
# Filter the original DataFrame to include only the sampled user IDs
df_train = df_train[df_train['UserID'].isin(sampled_user_ids)]
df_valid = df_valid[df_valid['UserID'].isin(sampled_user_ids)]

In [None]:
# Convert 'Date' to datetime and create a time index
df_train['Date'] = pd.to_datetime(df_train['Date'])

# Calculate the time index as the number of days since the start of the dataset
df_train['time_idx'] = (df_train['Date'] - df_train['Date'].min()).dt.days
# Ensure 'UserID' is of type 'category' for proper encoding
df_train['UserID'] = df_train['UserID'].astype(str).astype('category')

In [None]:
# Convert 'Date' to datetime and create a time index
df_valid['Date'] = pd.to_datetime(df_valid['Date'])

# Calculate the time index as the number of days since the start of the dataset
df_valid['time_idx'] = (df_valid['Date'] - df_valid['Date'].min()).dt.days

# Ensure 'UserID' is of type 'category' for proper encoding
df_valid['UserID'] = df_valid['UserID'].astype(str).astype('category')

In [None]:
import pandas as pd

# Function to scale Load values to [-1, 1] range for each user
def scale_to_neg_one_to_one(user_df):
    X_min = user_df['Load'].min()
    X_max = user_df['Load'].max()
    user_df['Load'] = (user_df['Load'] - X_min) / (X_max - X_min)
    return user_df

# Apply the scaling function to each user's data
df_scaled_train = df_train.groupby('UserID').apply(scale_to_neg_one_to_one)



In [None]:
df_train

In [None]:
df_scaled_train

In [None]:
import pandas as pd

# Function to scale Load values to [0, 1] range for each user
def scale_to_neg_one_to_one(user_df):
    X_min = user_df['Load'].min()
    X_max = user_df['Load'].max()
    user_df['Load'] = (user_df['Load'] - X_min) / (X_max - X_min)
    return user_df

# Apply the scaling function to each user's data
df_scaled_valid = df_valid.groupby('UserID').apply(scale_to_neg_one_to_one)

In [None]:
df_scaled_valid

In [None]:
from pytorch_forecasting import TimeSeriesDataSet

max_encoder_length = 48
max_prediction_length = 24

training = TimeSeriesDataSet(
    df_scaled_train,
    time_idx="time_idx",
    target="Load",
    group_ids=["UserID"],
    min_encoder_length=1,  # This can be less than max_encoder_length
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["UserID"],
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["Load"],
    allow_missing_timesteps=True,
    target_normalizer=GroupNormalizer(
    groups=["UserID"], transformation="softplus"
    ),  # or any other normalization
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)


In [None]:
from torch.utils.data import DataLoader

batch_size = 16 # Define based on your system's capabilities
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=1)

In [None]:
test_ts_dataset = TimeSeriesDataSet.from_dataset(training, df_valid, stop_randomization=True, predict_mode=True)

test_dataloader = test_ts_dataset.to_dataloader(
        train=False, batch_size=batch_size * 10, num_workers=1)

In [None]:
import lightning.pytorch as pl


trainer = pl.Trainer(
    max_epochs=50,
    accelerator='gpu',
    devices=1,
    enable_model_summary=True,
    gradient_clip_val=0.1)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.001,
    hidden_size=32,
    attention_head_size=4,
    hidden_continuous_size=32,
    dropout=0.1,
    output_size=7,  # there are 7 quantiles by default: [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]
    loss=QuantileLoss())

trainer.fit(
    tft,
    train_dataloaders=train_dataloader)


In [None]:
predictions = tft.predict(test_dataloader)
predictions

In [None]:
predictions.shape

In [None]:
import torch
import numpy as np

user_ids=df_valid['UserID'].unique()


# Convert the tensor to a NumPy array
numpy_data = predictions.cpu().numpy()

# Create a dictionary with UserIDs as keys and tensor rows as values
user_predicted_times= {user_id: row for user_id, row in zip(user_ids, numpy_data)}


In [None]:
user_predicted_times

In [None]:
# Function to reverse scale to original
# def reverse_scale_to_original(X_scaled, X_min, X_max):
#     return ((X_scaled + 1) / 2) * (X_max - X_min) + X_min

def reverse_min_max_scale(X_norm, X_min, X_max):
    return X_norm * (X_max - X_min) + X_min

# Calculate min and max for each user
min_max_values = df_train.groupby('UserID')['Load'].agg([np.min, np.max]).to_dict('index')

# Initialize a dictionary to store inverse normalized values
user_predicted_values = {}

# Iterate over each user in the scaled_values_dict
for user_id, scaled_values in user_predicted_times.items():
    # Retrieve min and max for the current user
    X_min = min_max_values[user_id]['amin']
    X_max = min_max_values[user_id]['amax']

    # Reverse scaling for the current user's data to original scale
    original_values = reverse_min_max_scale(scaled_values, X_min, X_max)

    # Store the inverse normalized values in the dictionary
    user_predicted_values[user_id] = original_values

user_predicted_values

In [None]:
# Converting to DataFrame
df = pd.DataFrame(df_valid, columns=['Date', 'Load', 'UserID', 'time_idx'])
filtered_df = df[df['time_idx'] != 23]

# Convert 'Date' to datetime
filtered_df['Date'] = pd.to_datetime(filtered_df['Date'])

# Creating a dictionary where the key is the UserID and the value is a list of 'Time' values for that UserID
user_actual_times = filtered_df.groupby('UserID')['Load'].apply(list).to_dict()

In [None]:
import numpy as np

def calculate_smape_exclude_zeros(actual, forecast):
    """Calculate SMAPE between two arrays, excluding entries where actual values are zero."""
    actual = np.array(actual)
    forecast = np.array(forecast)

    return 100/len(actual) * np.sum(np.abs(forecast - actual) / (np.abs(actual) + np.abs(forecast)))


# Calculate SMAPE for each key, excluding zeros in actual dictionary
smape_per_user = {}
for key in user_actual_times:
    smape_per_user[key] = calculate_smape_exclude_zeros(user_actual_times[key], user_predicted_values[key])

# Calculate overall SMAPE, excluding NaN values
overall_smape = np.mean(list(smape_per_user.values()))

smape_per_user



In [None]:
overall_smape = np.mean(list(smape_per_user.values()))
print("SMAPE for TFT model across all users (%):",overall_smape)