<a href="https://colab.research.google.com/github/eli-js/UPMDeepLearning/blob/main/assignments/pytorch_basics/session_5/session_5_solutions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1 align="center">Deep Learning - Master in Deep Learning of UPM</h1>

**IMPORTANTE**

Antes de empezar debemos instalar PyTorch Lightning, por defecto, esto valdría:

In [1]:
!pip install pytorch-lightning

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.6.0-py3-none-any.whl.metadata (21 kB)
Collecting torchmetrics>0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.8.2-py3-none-any.whl.metadata (22 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB)
Downloading pytorch_lightning-2.6.0-py3-none-any.whl (849 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.5/849.5 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.15.2-py3-none-any.whl (29 kB)
Downloading torchmetrics-1.8.2-py3-none-any.whl (983 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lightning-utilities, torchmetrics, pytorch-lightning
Successfully installed lightning-utilities-0.15.2 pytorch-lightning-2.6.0 torchmetrics-1.8.2


Además, si te encuentras ejecutando este código en Google Collab, lo mejor será que montes tu drive para tener acceso a los datos:

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


En este ejercicio práctico vamos a utilizar los conocimientos adquiridos para abordar un caso de Regresión mediante PyTorch Lightning

In [None]:
import pandas as pd

DATA_PATH = 'data/exercise.csv'

df = pd.read_csv(DATA_PATH)

df.head() # Imprimamos las primeras filas del dataframe

Unnamed: 0,0,1,2,3,4,5,6,7,8,target
0,-0.567889,1.939935,2.151471,-0.18695,-0.651792,1.509136,1.248835,1.023286,-2.2966,-74.736449
1,0.296674,-0.529911,0.273535,-1.220958,0.949093,-0.791798,-1.228863,0.676973,-0.280019,-4.239277
2,-0.379515,1.181062,-0.292617,-0.424034,-0.108128,1.749699,0.377352,-1.964881,-0.844832,-95.592664
3,-0.12071,0.269624,-0.009167,-0.852415,-0.121054,-0.589381,-0.321264,-0.736134,-0.88351,-132.503258
4,-0.270182,-1.466287,0.335747,-0.038218,-1.206132,-0.820438,-1.082228,-0.77405,0.330435,-181.452417


# Dataset

In [None]:
import torch
import pandas as pd

class RegressionDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.data = df

        self.labels = self.data['target'].values
        self.features = self.data.drop('target', axis=1).values

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        features = self.features[idx]
        target = self.labels[idx]
        return features, target

# DataModule

In [None]:
from sklearn.model_selection import train_test_split

def split_train_val_test(df, val_size=0.2, test_size=0.2):
    eval_size = val_size + test_size # eval es un split intermedio que luego se divide en val y test
    test_prop = test_size / eval_size # proporción de test respecto a eval

    train, eval_ = train_test_split(df, test_size=eval_size)
    val, test = train_test_split(eval_, test_size=test_prop)
    return train, val, test

In [None]:
import pytorch_lightning
import numpy as np
from torch.utils.data import DataLoader

class RegressionDataModule(pytorch_lightning.LightningDataModule):
    def __init__(self, df, batch_size=16, num_workers=4, prefetch_factor=2, pin_memory=True):
        super().__init__()
        self.train_df, self.val_df, self.test_df = split_train_val_test(df)

        self.batch_size = batch_size
        self.num_workers = num_workers
        self.prefetch_factor = prefetch_factor
        self.pin_memory = pin_memory

    def setup(self, stage=None): # esta función la ejecuta el trainer cuando se va a ejecutar el fit o el predict
        if stage == 'fit':
            self.train_dataset = RegressionDataset(self.train_df)
            self.val_dataset = RegressionDataset(self.val_df)

        elif stage == 'test':
            self.test_dataset = RegressionDataset(self.test_df)

    def collate_fn(self, batch):
        features, targets = zip(*batch)
        features = torch.tensor(np.stack(features), dtype=torch.float32)
        targets = torch.tensor(targets, dtype=torch.float32)
        return features, targets

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, collate_fn=self.collate_fn, num_workers=self.num_workers, prefetch_factor=self.prefetch_factor, pin_memory=self.pin_memory)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, collate_fn=self.collate_fn, num_workers=self.num_workers, prefetch_factor=self.prefetch_factor, pin_memory=self)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, collate_fn=self.collate_fn, num_workers=self.num_workers, prefetch_factor=self.prefetch_factor, pin_memory=self.pin_memory)

# LightningModule

In [None]:
from torchmetrics import R2Score
import torch.nn as nn

class Regressor(pytorch_lightning.LightningModule):
    def __init__(self, input_shape):
        super().__init__()

        # Inicializamos las capas de la red
        self.l1 = nn.Linear(input_shape, 32)
        self.l2 = nn.Linear(32, 64)
        self.l3 = nn.Linear(64, 32)

        self.out = nn.Linear(32, 1)

        self.act = nn.GELU()

        # Función de pérdida
        self.criterion = nn.MSELoss()

        # Inicializamos las métricas
        self.r2 = R2Score()

    # Función forward como en un nn.Module de PyTorch
    def forward(self, x):
        x = self.l1(x)
        x = self.act(x)

        x = self.l2(x)
        x = self.act(x)

        x = self.l3(x)
        x = self.act(x)

        x = self.out(x)
        return x

    def compute_batch(self, batch, split='train'):
        inputs, targets = batch
        output = self(inputs)

        preds = output.view(-1)
        targets = targets.view(-1)

        loss = self.criterion(preds, targets)
        self.log_dict(
            {
                f'{split}_loss': loss,
                f'{split}_r2': self.r2(preds, targets),
            },
            on_epoch=True)

        return loss

    def training_step(self, batch, batch_idx):
        return self.compute_batch(batch, 'train')

    def validation_step(self, batch, batch_idx):
        return self.compute_batch(batch, 'val')

    def test_step(self, batch, batch_idx):
        return self.compute_batch(batch, 'test')

    def configure_optimizers(self):
        return torch.optim.AdamW(self.parameters(), lr=1e-3, amsgrad=True) # self.parameters() son los parámetros del modelo

# Callbacks, Loggers y Trainer

In [None]:
import pytorch_lightning
import time

class Timer(pytorch_lightning.Callback):
    def on_train_start(self, trainer, pl_module):
        self.init = time.time()

    def on_train_end(self, trainer, pl_module):
        print(f"El entrenamiento ha durado {time.time() - self.init} segundos!")

In [None]:
import os
import datetime

SAVE_DIR = f'lightning_logs/exercise/{datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}'

# DataModule
data = pd.read_csv(DATA_PATH)
data_module = RegressionDataModule(data, batch_size=64)

# LightningModule
input_shape = data.shape[1] - 1
model = Regressor(input_shape=input_shape)

# Callbacks
early_stopping_callback = pytorch_lightning.callbacks.EarlyStopping(
    monitor='val_r2', # monitorizamos la pérdida en el conjunto de validación
    mode='max',
    patience=10, # número de epochs sin mejora antes de parar
    verbose=False, # si queremos que muestre mensajes del estado del early stopping
)
model_checkpoint_callback = pytorch_lightning.callbacks.ModelCheckpoint(
    monitor='val_r2', # monitorizamos la pérdida en el conjunto de validación
    mode='max', # queremos minimizar la pérdida
    save_top_k=1, # guardamos solo el mejor modelo
    dirpath=SAVE_DIR, # directorio donde se guardan los modelos
    filename=f'best_model' # nombre del archivo
)

timer_callback = Timer()

callbacks = [early_stopping_callback, model_checkpoint_callback, timer_callback]

# Loggers
csv_logger = pytorch_lightning.loggers.CSVLogger(
    save_dir=SAVE_DIR,
    name='metrics',
    version=None
)

loggers = [csv_logger] # se pueden poner varios loggers (mirar documentación)

# Trainer
trainer = pytorch_lightning.Trainer(max_epochs=50, accelerator='gpu', devices=[0], callbacks=callbacks, logger=loggers)

trainer.fit(model, data_module)
results = trainer.test(model, data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs/exercise/2024-11-28_18-49-52/metrics
/home/adrian/.local/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:652: Checkpoint directory /home/adrian/workspace/deep-learning-dlmasterupm/assignments/pytorch_basics/session_5/lightning_logs/exercise/2024-11-28_18-49-52 exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name      | Type    | Params | Mode 
----------------------------------------------
0 | l1        | Linear  | 320    | train
1 | l2        | Linear  | 2.1 K  | train
2 | l3        | Linear  | 2.1 K  | train
3 | out       | Linear  | 33     | train
4 | act       | GELU    | 0      | train
5 | criterion | MSELoss | 0      | train
6 | r2        | R2Score | 0      | train
----------------------------------------------
4.5 K     Trainable params
0         Non-trainable params
4.5 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


El entrenamiento ha durado 114.78146147727966 segundos!


Testing: |          | 0/? [00:00<?, ?it/s]

# Inferencia

In [None]:
test_sample = data_module.test_df.sample(10)

inputs = torch.tensor(test_sample.drop('target', axis=1).values, dtype=torch.float32)
targets = torch.tensor(test_sample['target'].values, dtype=torch.float32)

model.eval()

with torch.no_grad():
    outputs = model(inputs)
    preds = outputs.squeeze().numpy()

for i, (pred, target) in enumerate(zip(preds, targets)):
    print(f"Predicción {i}: {pred:.2f}, Valor real: {target:.2f}")

Predicción 0: -68.67, Valor real: -49.20
Predicción 1: -50.52, Valor real: -38.50
Predicción 2: -36.19, Valor real: -26.59
Predicción 3: -111.46, Valor real: -83.73
Predicción 4: 232.16, Valor real: 253.69
Predicción 5: -159.69, Valor real: -142.23
Predicción 6: -212.26, Valor real: -220.30
Predicción 7: 187.02, Valor real: 179.16
Predicción 8: -199.13, Valor real: -198.58
Predicción 9: 164.71, Valor real: 154.61
