In [2]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Colab Notebooks/LAB_mod_B/Project
import sys
print(sys.executable)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks/LAB_mod_B/Project
/usr/bin/python3


# Importing libraries

In [3]:
!pip install optuna



In [4]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as mcolors
from tqdm import tqdm
import optuna
from sklearn.model_selection import KFold
import functools

#from sklearn.model_selection import TimeSeriesSplit

## Imporiting Data

In [5]:
# we use as temporal info the day of the year (1...365)
df_features = pd.read_csv("IT_NORD_VAE_input.csv")

features = ['timestamp', 'day_sin_t0','day_cos_t0',
           'day_sin_t1','day_cos_t1',
           'day_sin_t2','day_cos_t2',
           'day_sin_t3','day_cos_t3',
           'day_sin_t4','day_cos_t4',
           'day_sin_t5','day_cos_t5',
           'day_sin_t6','day_cos_t6',
           'day_sin_t7','day_cos_t7',
           'day_sin_t8','day_cos_t8',
           'month_sin_t0','month_cos_t0',
           'month_sin_t1','month_cos_t1',
           'month_sin_t2','month_cos_t2',
           'month_sin_t3','month_cos_t3',
           'month_sin_t4','month_cos_t4',
           'month_sin_t5','month_cos_t5',
           'month_sin_t6','month_cos_t6',
           'month_sin_t7','month_cos_t7',
           'month_sin_t8','month_cos_t8',
            'Unnamed: 0']

# in alternative one can use the following lines to keep day and month time info
'''
features = ['sin_dayofyear_t0','cos_dayofyear_t0',
            'sin_dayofyear_t1','cos_dayofyear_t1',
            'sin_dayofyear_t2','cos_dayofyear_t2',
            'sin_dayofyear_t3','cos_dayofyear_t3',
            'sin_dayofyear_t4','cos_dayofyear_t4',
            'sin_dayofyear_t5','cos_dayofyear_t5',
            'sin_dayofyear_t6','cos_dayofyear_t6',
            'sin_dayofyear_t7','cos_dayofyear_t7',
            'sin_dayofyear_t8','cos_dayofyear_t8','Unnamed: 0'] '''

df_features = df_features.drop(columns = features).iloc[:, 0:20]
df_features.shape

(8320, 20)

In [6]:
df_features.head()

Unnamed: 0,year_t0,2t_t0,solar_t0,tp_t0,ws_10m_t0,ws_100m_t0,hour_sin_t0,hour_cos_t0,sin_dayofyear_t0,cos_dayofyear_t0,year_t1,2t_t1,solar_t1,tp_t1,ws_10m_t1,ws_100m_t1,hour_sin_t1,hour_cos_t1,sin_dayofyear_t1,cos_dayofyear_t1
0,-1.224855,-1.266622,-0.785197,-0.540314,-0.169929,0.237315,0.249009,1.890047,0.025562,1.291842,-1.22515,-1.324388,-0.785197,-0.540313,-0.166965,0.001266,1.183622,1.408711,0.025565,1.291842
1,-1.224855,-1.324489,-0.785197,-0.540314,-0.167036,0.001132,1.183829,1.408519,0.025562,1.291842,-1.22515,-1.363017,-0.691746,-0.543041,-0.218534,-0.093002,1.570805,0.246088,0.025565,1.291842
2,-1.224855,-1.36312,-0.691746,-0.543042,-0.218606,-0.093139,1.571044,0.246007,0.025562,1.291842,-1.22515,-1.091346,0.35595,-0.543041,-0.39687,-0.52985,1.183622,-0.916535,0.025565,1.291842
3,-1.224855,-1.091436,0.35595,-0.543042,-0.396948,-0.530006,1.183829,-0.916506,0.025562,1.291842,-1.22515,-0.496896,0.545301,-0.543041,-0.719747,-0.962824,0.248877,-1.39811,0.025565,1.291842
4,-1.224855,-0.496955,0.545301,-0.543042,-0.719834,-0.962998,0.249009,-1.398034,0.025562,1.291842,-1.22515,-0.515202,-0.492313,-0.543041,-0.807978,-1.115623,-0.685868,-0.916535,0.025565,1.291842


Separate training and testing data

In [7]:
# Custom PyTorch Dataset
class TabularDataset(Dataset):                         # to use PyTorch's DataLoader
    def __init__(self, data_tensor):
        self.data = data_tensor

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, index):
        return self.data[index], 0  # dummy label

n_samples = len(df_features)
split_idx = int(n_samples * 0.8)
INPUT_DIM = df_features.shape[1]

train_data = df_features[:split_idx]
test_data = df_features[split_idx:]

train_tensor = torch.tensor(train_data.values, dtype=torch.float32)
test_tensor = torch.tensor(test_data.values, dtype=torch.float32)

train_dataset = TabularDataset(train_tensor)
test_dataset = TabularDataset(test_tensor)

BATCH_SIZE = 128
train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=False)     # no shuffle
test_loader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle=False)

print("Input Dim:", INPUT_DIM)
print("Training Dataframe Dim:", train_loader.dataset.data.shape)
print("Testing Dataframe Dim:", test_loader.dataset.data.shape)

Input Dim: 20
Training Dataframe Dim: torch.Size([6656, 20])
Testing Dataframe Dim: torch.Size([1664, 20])


# Model Definition

In [8]:
class VariationalAutoEncoder(nn.Module):
    def __init__(self, input_dim, h_dim1, z_dim, h_dim2, activation):
        super(VariationalAutoEncoder, self).__init__()

        # encoder
        self.fc1 = nn.Linear(input_dim, h_dim1)
        self.batch1 =  nn.BatchNorm1d(h_dim1)
        self.fc_mu = nn.Linear(h_dim1, z_dim)
        self.fc_sigma = nn.Linear(h_dim1, z_dim)

        # decoder
        self.fc2 = nn.Linear(z_dim, h_dim2)
        self.batch2 = nn.BatchNorm1d(h_dim2)
        self.fc3 = nn.Linear(h_dim2, input_dim)
        self.activation = activation

    def encode(self, x):
        h1 = self.activation(self.batch1(self.fc1(x)))
        mu, sigma = self.fc_mu(h1), self.fc_sigma(h1)
        return mu, sigma

    def decode(self, z):
        h = self.activation(self.batch2(self.fc2(z)))
        return self.fc3(h)

    def forward(self, x):
        mu, sigma = self.encode(x)
        epsilon = torch.randn_like(sigma)
        z_new = mu + sigma*epsilon
        x_reconstructed = self.decode(z_new)
        return x_reconstructed, mu, sigma

# Training Process

The hyperparameters we need to specify the architecture and train the VAE are:
* `INPUT_DIM`: the dimension of the **input** for the encoder
* `H_DIM`: the dimension of the **hidden layers** for encoders and decoders
* `Z_DIM`: the dimension of the **latent space**
* `BATCH_SIZE`, `NUM_EPOCHS`, `LR_RATE`: optimization hyper-parameters

Definisco la funzione per il training di una fold

In [9]:
def train_one_fold(model, train_loader, val_loader, optimizer, loss_fn, device, num_epochs=15, z_dim=2):
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  history = {
      'total_loss': [],
      'reconstruction_loss': [],
      'kl_div': [],
      'val_total_loss': [],
      'val_reconstruction_loss': [],
      'val_kl_div': []
  }
  outputs = []

  for epoch in range(num_epochs):
    epoch_loss = 0
    epoch_recon = 0
    epoch_kl = 0

    model.train()
    loop = tqdm(train_loader, desc=f"Z_DIM = {z_dim} | Epoch {epoch+1}/{num_epochs}")

    for i, (x_batch, _) in enumerate(loop):
      x_batch = x_batch.to(DEVICE)
      x_reconstructed, mu, sigma = model(x_batch)

      # Loss Function
      reconstruction_loss = loss_fn(x_reconstructed, x_batch)
      logvar = torch.log(sigma.pow(2) + 1e-8)
      kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
      loss = reconstruction_loss + kl_div

      # Backpropagation
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      loop.set_postfix(                                               # sto decidendo che info printare nella barra che vedo ad ogni epoca
          total_loss=loss.item(),
          reconstruction_loss=reconstruction_loss.item(),
          kl_div=kl_div.item())

      epoch_loss += loss.item()
      epoch_recon += reconstruction_loss.item()
      epoch_kl += kl_div.item()

      if i == 0:
        outputs.append((epoch, x_batch.clone(), x_reconstructed.clone()))

    # Compute and store averages after each epoch
    history['total_loss'].append(epoch_loss / len(train_loader.dataset))                     # appendo la loss (mediata dopo tutti i batches - quindi per un'intera epoca)
    history['reconstruction_loss'].append(epoch_recon / len(train_loader.dataset))
    history['kl_div'].append(epoch_kl / len(train_loader.dataset))

    # Validation
    model.eval()
    val_total = 0
    val_recon = 0
    val_kl = 0

    with torch.no_grad():
      for x_val, _ in val_loader:
        x_val = x_val.to(DEVICE)
        x_reconstructed, mu, sigma = model(x_val)
        loss_rec = loss_fn(x_reconstructed, x_val)
        logvar = torch.log(sigma.pow(2) + 1e-8)
        kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        loss = loss_rec + kl_div

        val_total += loss.item()
        val_recon += loss_rec.item()
        val_kl += kl_div.item()

    history['val_total_loss'].append(val_total / len(val_loader.dataset))
    history['val_reconstruction_loss'].append(val_recon / len(val_loader.dataset))
    history['val_kl_div'].append(val_kl / len(val_loader.dataset))

  return history

K-Fold Loop and Hyperparameter Grid

In [10]:
''' k-fold cross validation classico '''
def get_activation(name):
  return {
      "relu": nn.ReLU(),
      "sigmoid": nn.Sigmoid(),
      "leaky_relu": nn.LeakyReLU()}[name]

def objective(trial, train_tensor, INPUT_DIM):
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  full_tensor = train_tensor
  # hyperparameters
  h_dim1 = trial.suggest_int('h_dim1', 16, 128, step=8)                                     # setta qui gli hyperparametri
  h_dim2 = trial.suggest_int('h_dim2', 16, 128, step=8)
  z_dim = trial.suggest_int('z_dim', 2, 14, step=2)
  lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
  batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])

  act_name = trial.suggest_categorical("activation", ["relu", "sigmoid", "leaky_relu"])

  activation_fn = get_activation(act_name)

  kf = KFold(n_splits=5, shuffle=False)                                                     # 5 fold
  val_losses = []

  for train_idx, val_idx in kf.split(full_tensor):
    train_test = full_tensor[train_idx]
    val_test = full_tensor[val_idx]

    train_loader = DataLoader(TabularDataset(train_test), batch_size=128, shuffle=False)
    val_loader = DataLoader(TabularDataset(val_test), batch_size=128, shuffle=False)

    model = VariationalAutoEncoder(
        INPUT_DIM, h_dim1, z_dim, h_dim2,
        activation_fn).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss(reduction="sum")

    history = train_one_fold(model, train_loader, val_loader, optimizer, loss_fn, DEVICE, num_epochs=100, z_dim=z_dim)           # settare qui il numero di epochs

    val_losses.append(history['val_total_loss'][-1])

  avg_val_loss = np.mean(val_losses)
  return avg_val_loss

In [12]:
study = optuna.create_study(
    direction="minimize",
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10))

study.optimize(functools.partial(objective, train_tensor=train_tensor, INPUT_DIM=INPUT_DIM), n_trials=50, catch=(Exception,))

[I 2025-05-22 10:03:30,368] A new study created in memory with name: no-name-f5e818d0-cb8f-49ca-8e29-ba77828246b9
  h_dim1 = trial.suggest_int('h_dim1', 16, 128, 8)             # added 3 new params
  h_dim2 = trial.suggest_int('h_dim2', 16, 128, 8)
  z_dim = trial.suggest_int('z_dim', 2, 14, 2)
  lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
Z_DIM = 8 | Epoch 1/50: 100%|██████████| 42/42 [00:00<00:00, 92.84it/s, kl_div=0.117, reconstruction_loss=0.375, total_loss=0.493]
Z_DIM = 8 | Epoch 2/50: 100%|██████████| 42/42 [00:00<00:00, 126.74it/s, kl_div=0.103, reconstruction_loss=0.352, total_loss=0.455]
Z_DIM = 8 | Epoch 3/50: 100%|██████████| 42/42 [00:00<00:00, 84.26it/s, kl_div=0.1, reconstruction_loss=0.343, total_loss=0.443]
Z_DIM = 8 | Epoch 4/50: 100%|██████████| 42/42 [00:00<00:00, 87.97it/s, kl_div=0.0982, reconstruction_loss=0.329, total_loss=0.427]
Z_DIM = 8 | Epoch 5/50: 100%|██████████| 42/42 [00:00<00:00, 89.89it/s, kl_div=0.0962, reconstruction_loss=0.313, total_loss=0.409

KeyboardInterrupt: 

In [None]:
best_params = study.best_trial.params

activation_fn = {"relu": nn.ReLU(), "sigmoid": nn.Sigmoid(), "leaky_relu": nn.LeakyReLU()}[best_params['activation']]
print("Best hyperparameters found:")
for k, v in best_params.items():
    print(f"  {k}: {v}")

Usando i best parameters uso tutto il training set

In [None]:
# Rebuild model using best parameters
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model = VariationalAutoEncoder(INPUT_DIM,
                                    best_params['h_dim1'],
                                    best_params['h_dim2'],
                                    best_params['z_dim'],
                                    best_params['batch_size'],
                                    activation_fn=activation_fn
                                    ).to(DEVICE)
optimizer = torch.optim.Adam(best_model.parameters(), lr=best_params['lr'])
loss_fn = nn.MSELoss(reduction="sum")

# Use entire training dataset
train_loader = DataLoader(TabularDataset(train_tensor), batch_size=best_params['batch_size'], shuffle=False)
test_loader = DataLoader(TabularDataset(test_tensor), batch_size=best_params['batch_size'], shuffle=False)

# Train final model (increase epochs if needed)
_ = train_one_fold(best_model, train_loader, test_loader, optimizer, loss_fn, DEVICE, num_epochs=100, z_dim=best_params['z_dim'])       # setta qui le epoche


In [None]:
torch.save(best_model.state_dict(), f"vae_best_model_weights.pt")

with open("vae_best_model.json", "w") as f:
  json.dump(best_params, f)