In [1]:
import torch
import pandas as pd
from datasets import Dataset, load_dataset
from transformers import TimeSeriesTransformerConfig, TimeSeriesTransformerModel, TimeSeriesTransformerForPrediction
import numpy as np
import os 



In [2]:
if not os.path.exists('/Users/dorianfitton/Library/Mobile Documents/com~apple~CloudDocs/Documents/Cours_Télécom/fil_rouge/fil_rouge.nosync/examples/datasets/EPEX_FR_2_transformer.csv'):

    df = pd.read_csv("/Users/dorianfitton/Library/Mobile Documents/com~apple~CloudDocs/Documents/Cours_Télécom/fil_rouge/fil_rouge.nosync/examples/datasets/EPEX_FR_NEW_UTC.csv")
    df['Date'] = pd.to_datetime(df['Date'])
    # Create features for positional encoding 
    df['Hour of the day'] = df['Date'].dt.hour
    df['Day of the week'] = df['Date'].dt.dayofweek
    df['Day of the year'] = df['Date'].dt.dayofyear
    df['Year'] = df['Date'].dt.year
    df['Month of the year'] = df['Date'].dt.month
    print(df.head())

    df.to_csv("/Users/dorianfitton/Library/Mobile Documents/com~apple~CloudDocs/Documents/Cours_Télécom/fil_rouge/fil_rouge.nosync/examples/datasets/EPEX_FR_2_transformer.csv")
    print("CSV done and saved")

else:
    print("Transformer csv already exists")

Transformer csv already exists


In [3]:
df = pd.read_csv("/Users/dorianfitton/Library/Mobile Documents/com~apple~CloudDocs/Documents/Cours_Télécom/fil_rouge/fil_rouge.nosync/examples/datasets/EPEX_FR_2_transformer.csv")
df['Date'] = pd.to_datetime(df['Date'])
dataset = Dataset.from_pandas(df)
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Load forecast,Generation forecast,Price,Hour of the day,Day of the week,Day of the year,Year,Month of the year
0,0,2017-01-01 00:00:00,73650.0,69539.0,58.23,0,6,1,2017,1
1,1,2017-01-01 01:00:00,72350.0,67376.0,51.95,1,6,1,2017,1
2,2,2017-01-01 02:00:00,68750.0,65412.0,47.27,2,6,1,2017,1
3,3,2017-01-01 03:00:00,65900.0,64557.0,45.49,3,6,1,2017,1
4,4,2017-01-01 04:00:00,65000.0,64552.0,44.5,4,6,1,2017,1


In [26]:
device = torch.device("cpu")

In [27]:
# Choix des paramètres 
nb_time_feat = 7
context = 90

In [28]:

# Séparation du dataset en données de test et train (pas nécessaire)
train_dataset = dataset.filter(lambda x: x['Date'].year < 2021)
#test_dataset = dataset.filter(lambda x: x['Date'].date() == pd.to_datetime('2021-01-01').date())

Filter:   0%|          | 0/52584 [00:00<?, ? examples/s]

In [29]:
# On fait la prédiction à partir de la première date 
date = train_dataset['Date'][24*context].date()

# On sépare les données d'entrainement et les données de test (ici on a un jour de test)

past_dataset = train_dataset.filter(lambda x: x['Date'].date() < date)
print("past_dataset shape: ", past_dataset.shape)

future_dataset = train_dataset.filter(lambda x: x['Date'].date() == date)
print("future_dataset shape: ", future_dataset.shape)

Filter:   0%|          | 0/35064 [00:00<?, ? examples/s]

past_dataset shape:  (2160, 10)


Filter:   0%|          | 0/35064 [00:00<?, ? examples/s]

future_dataset shape:  (24, 10)


In [30]:
# On définit quelles vont être les données utilisées/suprimées pour l'entrainement en utilisant un masque (en cas de données manquantes / NaN)
past_observed_mask = torch.ones(24*context).reshape(1, -1)
print("past_observed_mask shape:",past_observed_mask.shape)

past_observed_mask shape: torch.Size([1, 2160])


In [31]:
# On définit la matrice des features (variables exogènes et positional encoding) sur les données d'entrainement et les données de test

past_time_features = torch.tensor(list(zip(past_dataset['Day of the week'],
                                           past_dataset['Hour of the day'],
                                           past_dataset['Day of the year'],
                                           past_dataset['Year'],
                                           past_dataset['Month of the year'],
                                           past_dataset['Load forecast'],
                                           past_dataset['Generation forecast'])))\
                                          .reshape(1, -1, nb_time_feat)

print("past_time_features shape:", past_time_features.shape)

future_time_features = torch.tensor(list(zip(future_dataset['Day of the week'],
                                             future_dataset['Hour of the day'],
                                             future_dataset['Day of the year'],
                                             future_dataset['Year'],
                                             future_dataset['Month of the year'],
                                             future_dataset['Load forecast'],
                                             future_dataset['Generation forecast'])))\
                                             .reshape(1, -1, nb_time_feat)
print("future_time_features shape:",future_time_features.shape)

past_time_features shape: torch.Size([1, 2160, 7])
future_time_features shape: torch.Size([1, 24, 7])


In [32]:
# On définit les valeurs de la variable endogène pour les données d'entrainement et pour les données de test

past_values = torch.tensor(past_dataset['Price']).reshape(1, -1)
print("past_values shape:",past_values.shape)

future_values = torch.tensor(future_dataset['Price']).reshape(1, -1)
print("future_values shape:",future_values.shape)

past_values shape: torch.Size([1, 2160])
future_values shape: torch.Size([1, 24])


In [33]:
# On transfert les matrices vers le gpu 
past_values = past_values.to(device)
past_time_features = past_time_features.to(device)
past_observed_mask = past_observed_mask.to(device)
future_values = future_values.to(device)
future_time_features = future_time_features.to(device)

print("past_time_features shape:", past_time_features.shape)
print("past_values shape:",past_values.shape)
print("past_observed_mask shape:",past_observed_mask.shape)

print("future_time_features shape:",future_time_features.shape)
print("future_values shape:",future_values.shape)

past_time_features shape: torch.Size([1, 2160, 7])
past_values shape: torch.Size([1, 2160])
past_observed_mask shape: torch.Size([1, 2160])
future_time_features shape: torch.Size([1, 24, 7])
future_values shape: torch.Size([1, 24])


In [18]:
# Initializing a Time Series Transformer configuration with 24 time steps for prediction
configuration = TimeSeriesTransformerConfig(prediction_length   = 24, 
                                            context_length      = 24 * (context-1) + 17, 
                                            input_size          = 1, 
                                            output_size         = 1, 
                                            num_time_features   = nb_time_feat )

# Randomly initializing a model (with random weights) from the configuration
model = TimeSeriesTransformerForPrediction(configuration)

# Accessing the model configuration
configuration = model.config
model = model.to(device)



# during training, one provides both past and future values
# as well as possible additional features
outputs = model(
    past_values=past_values,
    past_time_features=past_time_features,
    past_observed_mask=past_observed_mask,
    future_values=future_values,
    future_time_features=future_time_features
)

loss = outputs.loss
loss.backward()




In [19]:
outputs

Seq2SeqTSPredictionOutput(loss=tensor(5.2091, grad_fn=<DivBackward0>), params=(tensor([[2.9365, 2.9245, 2.9067, 2.9391, 2.9028, 2.9107, 2.9167, 2.9190, 2.9258,
         2.9551, 2.9528, 2.9383, 2.8823, 2.9302, 2.9564, 2.9168, 2.9268, 2.9012,
         2.9575, 2.9761, 2.9372, 2.9155, 2.9449, 2.9032]],
       grad_fn=<SqueezeBackward1>), tensor([[ 0.0367,  0.0685,  0.0088, -0.0140, -0.0127,  0.0984,  0.0115,  0.0304,
         -0.0843,  0.0030,  0.0137,  0.0896, -0.0250,  0.0402, -0.1098,  0.0136,
          0.0586, -0.0508, -0.0627,  0.0188,  0.0389,  0.0467,  0.0518,  0.0420]],
       grad_fn=<SqueezeBackward1>), tensor([[0.9852, 1.0062, 1.0155, 0.9845, 0.9973, 0.9678, 1.0014, 0.9995, 0.9982,
         0.9987, 1.0161, 0.9935, 0.9873, 0.9437, 0.9712, 0.9854, 1.0272, 1.0164,
         0.9724, 0.9996, 0.9877, 0.9969, 0.9831, 0.9773]],
       grad_fn=<SqueezeBackward1>)), past_key_values=None, decoder_hidden_states=None, decoder_attentions=None, cross_attentions=None, encoder_last_hidden_state=t

In [None]:
# during inference, one only provides past values
# as well as possible additional features
# the model autoregressively generates future values
outputs = model.generate(
    past_values=past_values,
    past_time_features=past_time_features,
    past_observed_mask=past_observed_mask,
    future_time_features=future_time_features
)

In [14]:
mean_prediction = outputs.sequences.mean(dim=1).detach().cpu()
df_pred = pd.DataFrame(mean_prediction.numpy())
df_pred

#df_pred.to_csv('/Users/dorianfitton/Library/Mobile Documents/com~apple~CloudDocs/Documents/Cours_Télécom/fil_rouge/fil_rouge.nosync/forecasts/transformer_pred.csv')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,-13.782734,-26.099804,-3.193981,-14.078363,-12.001953,-3.627244,-12.409479,-21.700716,-15.958526,0.644061,...,-1.341909,14.395869,4.388855,-2.381189,-19.363369,3.30722,-28.608604,-3.585922,5.668192,0.359775


In [15]:
model.num_parameters()

399043

In [16]:
model

TimeSeriesTransformerForPrediction(
  (model): TimeSeriesTransformerModel(
    (scaler): TimeSeriesMeanScaler()
    (encoder): TimeSeriesTransformerEncoder(
      (value_embedding): TimeSeriesValueEmbedding(
        (value_projection): Linear(in_features=16, out_features=64, bias=False)
      )
      (embed_positions): TimeSeriesSinusoidalPositionalEmbedding(2177, 64)
      (layers): ModuleList(
        (0-1): 2 x TimeSeriesTransformerEncoderLayer(
          (self_attn): TimeSeriesTransformerAttention(
            (k_proj): Linear(in_features=64, out_features=64, bias=True)
            (v_proj): Linear(in_features=64, out_features=64, bias=True)
            (q_proj): Linear(in_features=64, out_features=64, bias=True)
            (out_proj): Linear(in_features=64, out_features=64, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=64, out_features=32

In [23]:
from huggingface_hub import hf_hub_download
import torch
from transformers import TimeSeriesTransformerForPrediction

file = hf_hub_download(
    repo_id="kashif/tourism-monthly-batch", filename="train-batch.pt", repo_type="dataset"
)
batch = torch.load(file)

model = TimeSeriesTransformerForPrediction.from_pretrained(
    "huggingface/time-series-transformer-tourism-monthly"
)

# during training, one provides both past and future values
# as well as possible additional features
outputs = model(
    past_values=batch["past_values"],
    past_time_features=batch["past_time_features"],
    past_observed_mask=batch["past_observed_mask"],
    static_categorical_features=batch["static_categorical_features"],
    static_real_features=batch["static_real_features"],
    future_values=batch["future_values"],
    future_time_features=batch["future_time_features"],
)

loss = outputs.loss
loss.backward()

# during inference, one only provides past values
# as well as possible additional features
# the model autoregressively generates future values
outputs = model.generate(
    past_values=batch["past_values"],
    past_time_features=batch["past_time_features"],
    past_observed_mask=batch["past_observed_mask"],
    static_categorical_features=batch["static_categorical_features"],
    static_real_features=batch["static_real_features"],
    future_time_features=batch["future_time_features"],
)

mean_prediction = outputs.sequences.mean(dim=1)

In [None]:
batch['past_time_features'].shape

torch.Size([64, 61, 2])

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class TransformerModel(nn.Module):
    def __init__(self, num_layers, d_model, num_heads, d_ff, input_sequence_length, output_sequence_length, dropout_rate):
        super(TransformerModel, self).__init__()

        self.embedding = nn.Linear(3, d_model)  # Change the input dimension to 3
        self.positional_encoding = PositionalEncoding(d_model)

        self.encoder_layers = nn.ModuleList([
            EncoderLayer(num_heads, d_model, d_ff, dropout_rate)
            for _ in range(num_layers)
        ])

        self.decoder_layers = nn.ModuleList([
            DecoderLayer(num_heads, d_model, d_ff, dropout_rate)
            for _ in range(num_layers)
        ])

        self.output_layer = nn.Linear(d_model, output_sequence_length)

    def forward(self, input):
        input = input.double()  # Convert input to DoubleTensor
        input_embedding = self.embedding(input.float())  # Convert input to FloatTensor
        input_embedding = self.positional_encoding(input_embedding)

        enc_output = input_embedding
        for encoder_layer in self.encoder_layers:
            enc_output = encoder_layer(enc_output)

        dec_output = enc_output
        for decoder_layer in self.decoder_layers:
            dec_output = decoder_layer(dec_output, enc_output)

        output = self.output_layer(dec_output)
        return output


class EncoderLayer(nn.Module):
    def __init__(self, num_heads, d_model, d_ff, dropout_rate):
        super(EncoderLayer, self).__init__()
        
        self.self_attention = MultiheadAttention(num_heads, d_model)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.norm1 = nn.LayerNorm(d_model)
        
        self.feed_forward = FeedForward(d_model, d_ff)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.norm2 = nn.LayerNorm(d_model)
    
    def forward(self, x):
        attention = self.self_attention(x, x, x)
        attention = self.dropout1(attention)
        x = self.norm1(x + attention)
        
        ff = self.feed_forward(x)
        ff = self.dropout2(ff)
        x = self.norm2(x + ff)
        
        return x


class DecoderLayer(nn.Module):
    def __init__(self, num_heads, d_model, d_ff, dropout_rate):
        super(DecoderLayer, self).__init__()
        
        self.self_attention = MultiheadAttention(num_heads, d_model)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.norm1 = nn.LayerNorm(d_model)
        
        self.encoder_attention = MultiheadAttention(num_heads, d_model)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.norm2 = nn.LayerNorm(d_model)
        
        self.feed_forward = FeedForward(d_model, d_ff)
        self.dropout3 = nn.Dropout(dropout_rate)
        self.norm3 = nn.LayerNorm(d_model)
    
    def forward(self, x, enc_output):
        self_attention = self.self_attention(x, x, x)
        self_attention = self.dropout1(self_attention)
        x = self.norm1(x + self_attention)
        
        encoder_attention = self.encoder_attention(x, enc_output, enc_output)
        encoder_attention = self.dropout2(encoder_attention)
        x = self.norm2(x + encoder_attention)
        
        ff = self.feed_forward(x)
        ff = self.dropout3(ff)
        x = self.norm3(x + ff)
        
        return x


class MultiheadAttention(nn.Module):
    def __init__(self, num_heads, d_model):
        super(MultiheadAttention, self).__init__()
        
        self.num_heads = num_heads
        self.d_model = d_model
        
        self.head_dim = d_model // num_heads
        
        self.query = nn.Linear(d_model, d_model)
        self.key = nn.Linear(d_model, d_model)
        self.value = nn.Linear(d_model, d_model)
        
        self.output = nn.Linear(d_model, d_model)
    
    def forward(self, query, key, value):
        batch_size = query.size(0)
        
        query = self.query(query).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        key = self.key(key).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        value = self.value(value).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        
        attention_weights = torch.matmul(query, key.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))
        attention_weights = F.softmax(attention_weights, dim=-1)
        
        output = torch.matmul(attention_weights, value)
        output = output.transpose(1, 2).contiguous().view(batch_size, -1, self.d_model)
        
        output = self.output(output)
        
        return output


class FeedForward(nn.Module):
    def __init__(self, d_model, d_ff):
        super(FeedForward, self).__init__()
        
        self.fc1 = nn.Linear(d_model, d_ff)
        self.fc2 = nn.Linear(d_ff, d_model)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_sequence_length=5000):
        super(PositionalEncoding, self).__init__()
        
        position = torch.arange(0, max_sequence_length).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * -(torch.log(torch.tensor(10000.0)) / d_model))
        pe = torch.zeros(max_sequence_length, d_model)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x


In [5]:
# Load the saved model
num_layers = 2
d_model = 16
num_heads = 8 
d_ff = 64
input_sequence_length = 5*24
output_sequence_length = 24
dropout_rate = 0.2
device = torch.device('cpu')
model = TransformerModel(num_layers, d_model, num_heads, d_ff, input_sequence_length, output_sequence_length, dropout_rate).to(device)
#model.load_state_dict(torch.load('path_to_saved_model.pth'))
model.eval()  # Set the model to evaluation mode
array_data = np.array(df.loc[:,['Price', 'Load forecast', 'Generation forecast']])
print(array_data.shape)
# Prepare the input data for prediction
input_data = torch.tensor(array_data)  # input_sequence should be of shape (k,)
input_data = input_data.unsqueeze(0).to(device)  # Add batch dimension (1, k)
print(input_data.shape)

# Perform the prediction
with torch.no_grad():
    output = model(input_data)

# Extract the predicted output
predicted_output = output.squeeze(0)  # Remove the batch dimension (1, m) -> (m,)

# Interpret the predicted output
predicted_prices = predicted_output.numpy()  # Convert to NumPy array
predicted_prices


(52584, 3)
torch.Size([1, 52584, 3])


: 

: 