Creating a CNN for Arousal Prediction

In [21]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, Subset
import pytorch_lightning as pl
import torch.nn as nn
import torch.optim as optim
from pytorch_lightning import Trainer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.optim as optim
import pytorch_lightning as pl



## Custom Dataset

In [22]:
class TensorDataset(Dataset):
    def __init__(self, data_dir, max_frames=115):
        self.data_dir = data_dir
        self.max_frames = max_frames

        # Ottieni tutti i file di tensori
        all_tensor_files = [f for f in os.listdir(data_dir) if f.endswith('.npy')]

        # Filtra i file con abbastanza frame
        self.tensor_files = []
        for file_name in all_tensor_files:
            file_path = os.path.join(data_dir, file_name)
            tensor = np.load(file_path, mmap_mode='r')  # Usa mmap per migliorare la velocità
            if tensor.shape[0] >= self.max_frames:
                self.tensor_files.append(file_name)

        print(f"Loaded {len(self.tensor_files)} of {len(all_tensor_files)} files from {data_dir} (filtered out {len(all_tensor_files) - len(self.tensor_files)} files)")

    def __len__(self):
        return len(self.tensor_files)

    def __getitem__(self, idx):
        file_name = self.tensor_files[idx]
        file_path = os.path.join(self.data_dir, file_name)

        # Carica il tensore
        tensor = np.load(file_path, mmap_mode='r')
        tensor = torch.tensor(tensor, dtype=torch.float32)

        # Trimma se necessario
        if tensor.shape[0] > self.max_frames:
            tensor = tensor[:self.max_frames]    

        # Estrai etichette dal nome file
        file_parts = file_name.split('_')
        arousal = float(file_parts[-4])
        arousal = 0 if arousal < 5 else 1
        labels = torch.tensor([arousal], dtype=torch.float32)

        return tensor, labels, file_name


# Definition of the CNN

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
import pytorch_lightning as pl

class EmotionCNN(pl.LightningModule):
    def __init__(self, input_shape):
        """
        input_shape: torch.Size([131, 478, 3])
            - 131: altezza (o numero di frame)
            - 478: larghezza (numero di landmark)
            - 3: canali (coordinate x, y, z)
        """
        super().__init__()
        print(f"Model initialized with input_shape: {input_shape}")

        # Impostazioni di base
        self.height = input_shape[0]   # 131
        self.width = input_shape[1]    # 478
        self.channels = input_shape[2] # 3

        # Layer convoluzionali
        self.conv1 = nn.Conv2d(in_channels=self.channels, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        # Layer Fully Connected (inizializzato con dimensioni fisse, corretto dopo)
        self.fc1 = nn.Linear(1, 512)  # Placeholder, sarà corretto nel forward
        self.fc2 = nn.Linear(512, 256)
        self.fc_out = nn.Linear(256, 1)

        self.relu = nn.ReLU()
        self.loss_fn = nn.BCEWithLogitsLoss()

    def forward(self, x):
        batch_size = x.size(0)
        device = x.device  # Assicura che sia su GPU o CPU in modo corretto

        # Cambiamo l'ordine delle dimensioni: [batch, channels, height, width]
        x = x.permute(0, 3, 1, 2).contiguous()

        # Passaggio attraverso i layer convoluzionali
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))

        
        fc_input_dim = x.shape[1] * x.shape[2] * x.shape[3]

        if self.fc1.in_features != fc_input_dim:
            self.fc1 = nn.Linear(fc_input_dim, 512).to(device)

        # Flatten
        x = x.view(batch_size, -1)

        # Layer Fully Connected
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc_out(x)

        return x

    def training_step(self, batch, batch_idx):
        x, y, _ = batch

        # Assicuriamoci che input e target siano sullo stesso device
        device = self.device
        x, y = x.to(device), y.to(device)

        # Assicuriamoci che y sia float per la BCEWithLogitsLoss
        y = y.float()

        y_pred = self(x)
        loss = self.loss_fn(y_pred, y)
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.001)


# Leave-One-Out Training

In [24]:
import os
import numpy as np
import torch
from torch.utils.data import DataLoader, ConcatDataset
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def leave_one_out_training(data_dir, max_epochs=10):
    users = sorted(os.listdir(data_dir))
    all_fold_metrics = []

    for i, user in enumerate(users):
        print(f"\n=== Leave-One-Out Fold {i+1}/{len(users)} ===")
        test_user_path = os.path.join(data_dir, user, 'tensors')
        train_users = [u for u in users if u != user]

        train_datasets = []
        for train_user in train_users:
            user_tensors_path = os.path.join(data_dir, train_user, 'tensors')
            if os.path.exists(user_tensors_path):
                dataset = TensorDataset(user_tensors_path)
                if len(dataset) > 0:
                    train_datasets.append(dataset)
                else:
                    print(f"Warning: Nessun campione valido per {train_user}")

        if not train_datasets:
            print(f"Skipping fold {i+1} - no training samples found")
            continue

        train_dataset = ConcatDataset(train_datasets)
        test_dataset = TensorDataset(test_user_path)
        if len(test_dataset) == 0:
            print(f"Skipping fold {i+1} - no test samples found for user {user}")
            continue

        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True, num_workers=0)
        test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

        sample, _, _ = train_datasets[0][0]
        input_shape = sample.shape
        print(f"Sample shape: {input_shape}")

        model = EmotionCNN(input_shape)


        trainer = Trainer(
            accelerator="auto",
            devices=1,
            max_epochs=max_epochs,
            logger=True,
            enable_checkpointing=True,
            callbacks=[pl.callbacks.EarlyStopping(monitor='train_loss', patience=3)],
            enable_progress_bar=True
        )

        trainer.fit(model, train_loader) 

        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for batch_idx, (test_sample, test_label, test_file) in enumerate(test_loader):
                print(next(model.parameters()).device)  # Dovrebbe stampare cuda:0
                print(test_sample.device)  # Dovrebbe stampare cuda:0

                print(f"Batch {batch_idx}: test_sample shape {test_sample.shape}")

                if test_sample.nelement() == 0:
                    print(f"Warning: test_sample è vuoto! Salto questo batch.")
                    continue
                
                if test_sample.dim() == 3:
                    test_sample = test_sample.unsqueeze(0).permute(0, 3, 1, 2).contiguous()
                
                test_pred = model(test_sample)

                all_preds.append(test_pred.cpu())
                all_labels.append(test_label.cpu())
                print(f"Test file: {test_file[0]}")
                print(f"Prediction: {test_pred.cpu().numpy()}, True label: {test_label.cpu().numpy()}")

        if all_preds and all_labels:
            all_preds = torch.cat(all_preds, dim=0).numpy()
            all_labels = torch.cat(all_labels, dim=0).numpy()

            mse = mean_squared_error(all_preds, all_labels)
            mae = mean_absolute_error(all_preds, all_labels)
            r2 = r2_score(all_labels, all_preds)

            print(f"Metrics for Fold {i+1}:")
            print(f"  MSE: {mse:.4f}")
            print(f"  MAE: {mae:.4f}")
            print(f"  R² Score: {r2:.4f}")

            all_fold_metrics.append({'fold': i+1, 'user': user, 'mse': mse, 'mae': mae, 'r2': r2})

    if all_fold_metrics:
        avg_mse = sum(fold['mse'] for fold in all_fold_metrics) / len(all_fold_metrics)
        avg_mae = sum(fold['mae'] for fold in all_fold_metrics) / len(all_fold_metrics)
        avg_r2 = sum(fold['r2'] for fold in all_fold_metrics) / len(all_fold_metrics)

        print("\n=== Overall Cross-Validation Results ===")
        print(f"Average MSE: {avg_mse:.4f}")
        print(f"Average MAE: {avg_mae:.4f}")
        print(f"Average R² Score: {avg_r2:.4f}")

    return all_fold_metrics


# Start the Leave-One-Out Training

In [25]:
data_dir = "data"
leave_one_out_training(data_dir, max_epochs=10)


=== Leave-One-Out Fold 1/25 ===
Loaded 94 of 99 files from data/0d890ad3184ded354855c291be64a07fe2e378fc/tensors (filtered out 5 files)
Loaded 99 of 99 files from data/0f95dde6c04e48170a73b2de493767d448c53cd0/tensors (filtered out 0 files)
Loaded 98 of 99 files from data/1ea730de507e68e64b264e1fab966415179206cb/tensors (filtered out 1 files)
Loaded 95 of 99 files from data/26892c39a1c19b692286394827f54d0b9aba3b9e/tensors (filtered out 4 files)
Loaded 99 of 99 files from data/344ea9aff53317ba32c08687216a401c9d47c156/tensors (filtered out 0 files)
Loaded 99 of 99 files from data/53df3de1199d9c57741e6cd82b2d32d7849de64d/tensors (filtered out 0 files)
Loaded 75 of 78 files from data/6d390e2eda0851bac98f7e7fcacca63bbe4ce49f/tensors (filtered out 3 files)
Loaded 97 of 99 files from data/70a079525dafd20b5b53647cd4ec30431a0e4515/tensors (filtered out 2 files)
Loaded 84 of 88 files from data/7ebf50ae01489b65369c936611535e8b11ce46ad/tensors (filtered out 4 files)
Loaded 97 of 99 files from data

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Loaded 3 of 99 files from data/0ca7288b551d6863ade0cca0248bbcbe6e600fe0/tensors (filtered out 96 files)
Sample shape: torch.Size([115, 478, 3])
Model initialized with input_shape: torch.Size([115, 478, 3])



  | Name    | Type              | Params | Mode 
------------------------------------------------------
0 | conv1   | Conv2d            | 896    | train
1 | conv2   | Conv2d            | 18.5 K | train
2 | conv3   | Conv2d            | 73.9 K | train
3 | pool    | MaxPool2d         | 0      | train
4 | fc1     | Linear            | 1.0 K  | train
5 | fc2     | Linear            | 131 K  | train
6 | fc_out  | Linear            | 257    | train
7 | relu    | ReLU              | 0      | train
8 | loss_fn | BCEWithLogitsLoss | 0      | train
------------------------------------------------------
225 K     Trainable params
0         Non-trainable params
225 K     Total params
0.903     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


Epoch 0:   0%|          | 0/71 [14:02<?, ?it/s] 
Epoch 0:   0%|          | 0/71 [13:50<?, ?it/s]
Epoch 0:   0%|          | 0/71 [07:48<?, ?it/s]
Epoch 0:   0%|          | 0/71 [02:33<?, ?it/s]
Epoch 0:   0%|          | 0/71 [01:36<?, ?it/s]
Epoch 0:  11%|█▏        | 8/71 [01:16<10:03,  0.10it/s, v_num=5]
Epoch 0:  17%|█▋        | 12/71 [00:19<01:37,  0.61it/s, v_num=6]


OutOfMemoryError: CUDA out of memory. Tried to allocate 216.00 MiB. GPU 0 has a total capacity of 3.64 GiB of which 191.31 MiB is free. Process 3571 has 55.55 MiB memory in use. Including non-PyTorch memory, this process has 3.39 GiB memory in use. Of the allocated memory 3.21 GiB is allocated by PyTorch, and 110.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [2]:
import os
import numpy as np
import pandas as pd

def analyze_tensor_dimensions_for_all_users(base_dir, output_csv='tensor_dimensions_all_users.csv'):
    tensor_data = []

    # Itera su tutti gli utenti
    for user in os.listdir(base_dir):
        user_dir = os.path.join(base_dir, user)
        if not os.path.isdir(user_dir):  # Ignora se non è una cartella
            continue
        
        # Trova la cartella "tensors" all'interno di ogni utente
        tensors_dir = os.path.join(user_dir, 'tensors')
        if not os.path.isdir(tensors_dir):
            continue

        # Lista tutti i file .npy
        tensor_files = [f for f in os.listdir(tensors_dir) if f.endswith('.npy')]

        for file_name in tensor_files:
            file_path = os.path.join(tensors_dir, file_name)
            
            # Carica il tensore
            tensor = np.load(file_path)
            
            # Ottieni la forma del tensore (numero di frame, landmarks, 3D)
            tensor_shape = tensor.shape
            
            # Aggiungi i dati al dataframe
            tensor_data.append({
                'user': user,
                'file_name': file_name,
                'num_frames': tensor_shape[0],
                'num_landmarks': tensor_shape[1],
                'num_coordinates': tensor_shape[2]
            })
            
            print(f"User: {user}, File: {file_name}, Shape: {tensor_shape}")

    # Crea un DataFrame con i dati raccolti
    df = pd.DataFrame(tensor_data)
    
    # Salva il DataFrame in un file CSV
    df.to_csv(output_csv, index=False)
    
    print(f"\n--- Dati salvati in {output_csv} ---")
    print(df.head())

# Esegui l'analisi su tutti gli utenti
base_dir = "data"
analyze_tensor_dimensions_for_all_users(base_dir)


User: dc894f24a0678a276269c6b9d76d97edd082fa04, File: dc894f24a0678a276269c6b9d76d97edd082fa04_serene_283e2999-f591-4be4-9620-c69fec4f26a7.jpg_3_5_2_1.npy, Shape: (138, 478, 3)
User: dc894f24a0678a276269c6b9d76d97edd082fa04, File: dc894f24a0678a276269c6b9d76d97edd082fa04_neutre_7da731d0-f26e-4974-b661-da7cfada6fbc.jpg_5_5_2_2.npy, Shape: (133, 478, 3)
User: dc894f24a0678a276269c6b9d76d97edd082fa04, File: dc894f24a0678a276269c6b9d76d97edd082fa04_serene_f44c9cdd-53aa-46e7-afc2-6a1927cfdf89.jpg_3_4_1_1.npy, Shape: (109, 478, 3)
User: dc894f24a0678a276269c6b9d76d97edd082fa04, File: dc894f24a0678a276269c6b9d76d97edd082fa04_neutre_07f96b48-2b8e-4b1d-a450-559f2334b84d.jpg_6_6_4_2.npy, Shape: (129, 478, 3)
User: dc894f24a0678a276269c6b9d76d97edd082fa04, File: dc894f24a0678a276269c6b9d76d97edd082fa04_serene_e6732706-c811-46ca-a051-f6d321cb27db.jpg_4_5_3_1.npy, Shape: (139, 478, 3)
User: dc894f24a0678a276269c6b9d76d97edd082fa04, File: dc894f24a0678a276269c6b9d76d97edd082fa04_serene_5a43dc93-8068

In [None]:
import numpy as np
import os

def calculate_frame_stats(data_dir):
    frame_counts = []
    
    for user in os.listdir(data_dir):
        user_dir = os.path.join(data_dir, user)
        if not os.path.isdir(user_dir):
            continue
        
        tensors_dir = os.path.join(user_dir, 'tensors')
        if not os.path.isdir(tensors_dir):
            continue
        
        tensor_files = [f for f in os.listdir(tensors_dir) if f.endswith('.npy')]
        
        for file_name in tensor_files:
            file_path = os.path.join(tensors_dir, file_name)
            tensor = np.load(file_path)
            frame_counts.append(tensor.shape[0])
    
    frame_counts = np.array(frame_counts)
    
    # Calcola la media e la mediana
    mean_frames = np.mean(frame_counts)
    median_frames = np.median(frame_counts)
    
    # Calcola anche la deviazione standard per capire la variabilità
    std_frames = np.std(frame_counts)
    
    # Calcola i percentili (ad esempio il 90° percentile)
    percentile_90 = np.percentile(frame_counts, 90)
    
    print(f"Numero medio di frame: {mean_frames:.2f}")
    print(f"Numero mediano di frame: {median_frames}")
    print(f"Deviazione standard del numero di frame: {std_frames:.2f}")
    print(f"90° percentile del numero di frame: {percentile_90}")
    
    return mean_frames, median_frames, percentile_90

# Esegui il calcolo
data_dir = "data/users"
mean_frames, median_frames, percentile_90 = calculate_frame_stats(data_dir)


In [None]:
# copy the folder data and for each user copy only the folder tensors with all the file inside obv, create a folder called data2

import shutil
import os

def copy_tensors_only(data_dir, output_dir):
    for user in os.listdir(data_dir):
        user_dir = os.path.join(data_dir, user)
        if not os.path.isdir(user_dir):
            continue
        
        tensors_dir = os.path.join(user_dir, 'tensors')
        if not os.path.isdir(tensors_dir):
            continue
        
        output_user_dir = os.path.join(output_dir, user)
        os.makedirs(output_user_dir, exist_ok=True)
        
        output_tensors_dir = os.path.join(output_user_dir, 'tensors')
        shutil.copytree(tensors_dir, output_tensors_dir)

# Esegui la copia
data_dir = "data/users"
output_dir = "data/data2"
copy_tensors_only(data_dir, output_dir)

In [None]:
import torchvision.ops
print(torchvision.ops.nms)
