In [1]:
# Cell 1: Initial setup, connecting to Google Drive, installing libraries, and checking GPU availability.
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Instalar y actualizar las librer√≠as
print("\nInstalando y actualizando librer√≠as...")
!pip install --upgrade -q mne pytorch-lightning timm
print("‚úÖ Librer√≠as listas.")

# Step 3: Prueba expl√≠cita de control de la GPU
import torch
print("\n--- INICIANDO PRUEBA DE CONTROL DE GPU ---")
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"‚úÖ GPU detectada: {torch.cuda.get_device_name(0)}")
    try:
        tensor_grande = torch.randn(1024, 1024, 512, device=device) # Asignar 2GB
        memoria_asignada = torch.cuda.memory_allocated(0) / 1024**3
        print(f"‚úÖ ¬°√âxito! Memoria asignada activamente: {memoria_asignada:.2f} GB")
        del tensor_grande
        torch.cuda.empty_cache()
        print("‚úÖ Memoria liberada correctamente.")
        print("--- PRUEBA DE CONTROL DE GPU COMPLETADA EXITOSAMENTE ---")
    except Exception as e:
        print(f"‚ùå ¬°ERROR DURANTE LA PRUEBA! No se pudo asignar memoria a la GPU: {e}")
else:
    print("‚ùå ¬°ERROR! No se detect√≥ ninguna GPU en este entorno de ejecuci√≥n.")

Mounted at /content/drive

Instalando y actualizando librer√≠as...
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m7.4/7.4 MB[0m [31m66.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m828.2/828.2 kB[0m [31m56.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m983.0/983.0 kB[0m [31m60.7 MB/s[0m eta [36m0:00:00[0m
[?25h‚úÖ Librer√≠as listas.

--- INICIANDO PRUEBA DE CONTROL DE GPU ---
‚úÖ GPU detectada: NVIDIA A100-SXM4-40GB
‚úÖ ¬°√âxito! Memoria asignada activamente: 2.00 GB
‚úÖ Memoria liberada correctamente.
--- PRUEBA DE CONTROL DE GPU COMPLETADA EXITOSAMENTE ---


In [2]:
# ==============================================================================
# 1. SETUP AND DEPENDENCY INSTALLATION
# ==============================================================================
print("Ensuring PyTorch Lightning and other libraries are installed...")
# Install the necessary libraries with pinned versions to avoid conflicts
!pip install --upgrade -q pytorch-lightning timm "pandas==2.2.2" "pyarrow==19.0.0"
print("‚úÖ Installation check complete.")

# ==============================================================================
# 2. IMPORTS AND INITIAL CONFIGURATION
# ==============================================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
from torch.utils.data import Dataset, DataLoader, random_split
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import CSVLogger
from torchmetrics.classification import MulticlassAccuracy, MulticlassF1Score
import numpy as np
import pandas as pd
from pathlib import Path
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import os

# Set matrix multiplication precision for A100/H100 GPUs for better performance
torch.set_float32_matmul_precision('medium')
print("‚úÖ Libraries imported and configuration set.")

# ==============================================================================
# 3. MODEL ARCHITECTURE DEFINITION (MULTI-MODEL SUPPORT)
# ==============================================================================
def get_model(model_name='efficientnet_b0', num_classes=5, pretrained=True):
    """
    Creates a model adapted for sleep stage classification.
    Supports multiple architectures.
    """
    if model_name == 'convnext_tiny':
        model = timm.create_model('convnextv2_tiny.fcmae_ft_in22k_in1k', pretrained=pretrained)
        original_conv = model.stem[0]
        new_first_conv = nn.Conv2d(1, original_conv.out_channels, kernel_size=original_conv.kernel_size, stride=original_conv.stride, padding=original_conv.padding, bias=(original_conv.bias is not None))
        with torch.no_grad():
            if original_conv.weight.shape[1] == 3:
                new_first_conv.weight[:, :] = original_conv.weight.clone().mean(dim=1, keepdim=True)
        model.stem[0] = new_first_conv
        num_ftrs = model.head.fc.in_features
        model.head.fc = nn.Linear(num_ftrs, num_classes)
        print(f"‚úÖ ConvNeXT Tiny model created.")

    elif model_name == 'convnext_base':
        model = timm.create_model('convnextv2_base.fcmae_ft_in22k_in1k', pretrained=pretrained)
        original_conv = model.stem[0]
        new_first_conv = nn.Conv2d(1, original_conv.out_channels, kernel_size=original_conv.kernel_size, stride=original_conv.stride, padding=original_conv.padding, bias=(original_conv.bias is not None))
        with torch.no_grad():
            if original_conv.weight.shape[1] == 3:
                new_first_conv.weight[:, :] = original_conv.weight.clone().mean(dim=1, keepdim=True)
        model.stem[0] = new_first_conv
        num_ftrs = model.head.fc.in_features
        model.head.fc = nn.Linear(num_ftrs, num_classes)
        print(f"‚úÖ ConvNeXT Base model created.")

    elif model_name == 'vit_base':
        model = timm.create_model('vit_base_patch16_224.augreg_in21k', pretrained=pretrained, img_size=(76, 60))
        original_conv = model.patch_embed.proj
        new_patch_embed = nn.Conv2d(1, original_conv.out_channels, kernel_size=original_conv.kernel_size, stride=original_conv.stride, padding=original_conv.padding, bias=(original_conv.bias is not None))
        with torch.no_grad():
            if original_conv.weight.shape[1] == 3:
                new_patch_embed.weight[:, :] = original_conv.weight.clone().mean(dim=1, keepdim=True)
        model.patch_embed.proj = new_patch_embed
        num_ftrs = model.head.in_features
        model.head = nn.Linear(num_ftrs, num_classes)
        print(f"‚úÖ Vision Transformer (ViT) Base model created.")

    elif model_name == 'efficientnet_b0':
        model = timm.create_model('efficientnet_b0', pretrained=pretrained)
        original_conv = model.conv_stem
        new_first_conv = nn.Conv2d(1, original_conv.out_channels, kernel_size=original_conv.kernel_size, stride=original_conv.stride, padding=original_conv.padding, bias=(original_conv.bias is not None))
        with torch.no_grad():
            if original_conv.weight.shape[1] == 3:
                new_first_conv.weight[:, :] = original_conv.weight.clone().mean(dim=1, keepdim=True)
        model.conv_stem = new_first_conv
        num_ftrs = model.classifier.in_features
        model.classifier = nn.Linear(num_ftrs, num_classes)
        print(f"‚úÖ EfficientNet B0 model created.")

    else:
        raise ValueError(f"Model '{model_name}' not supported.")

    return model

print("‚úÖ `get_model` function defined with multi-architecture support.")

# ==============================================================================
# 4. PYTORCH LIGHTNING MODULE
# ==============================================================================
class SleepStageClassifierLightning(pl.LightningModule):
    def __init__(self, model_name, learning_rate=1e-5, class_weights=None):
        super().__init__()
        self.save_hyperparameters()
        self.model = get_model(model_name=self.hparams.model_name, num_classes=5, pretrained=True)
        self.train_accuracy = MulticlassAccuracy(num_classes=5)
        self.val_accuracy = MulticlassAccuracy(num_classes=5)
        self.train_f1 = MulticlassF1Score(num_classes=5, average='macro')
        self.val_f1 = MulticlassF1Score(num_classes=5, average='macro')
        self.weights = torch.tensor(class_weights, dtype=torch.float) if class_weights is not None else None
        self.loss_fn = nn.CrossEntropyLoss(weight=self.weights)

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y_true = batch
        y_pred_logits = self(x)
        loss = self.loss_fn(y_pred_logits, y_true)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_acc', self.train_accuracy(y_pred_logits, y_true), on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y_true = batch
        y_pred_logits = self(x)
        loss = self.loss_fn(y_pred_logits, y_true)
        self.log('val_loss', loss, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', self.val_accuracy(y_pred_logits, y_true), on_epoch=True, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams.learning_rate)
        scheduler = {
            'scheduler': ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3),
            'monitor': 'val_loss',
            'interval': 'epoch',
            'frequency': 1,
        }
        return [optimizer], [scheduler]

print("‚úÖ `SleepStageClassifierLightning` module defined.")

# ==============================================================================
# 5. CUSTOM DATASET DEFINITION
# ==============================================================================
class CombinedDataset(Dataset):
    def __init__(self, file_paths_chunk):
        print(f"Initializing dataset with {len(file_paths_chunk)} files...")
        self.file_paths = file_paths_chunk
        self.epochs_per_file = []
        self._cache = {}
        for f_path in self.file_paths:
            try:
                df_labels = pd.read_parquet(f_path, columns=['label'])
                num_valid = df_labels['label'].isin([0, 1, 2, 3, 4]).sum()
                self.epochs_per_file.append(num_valid)
            except Exception as e:
                print(f"Warning: Could not read or process {f_path.name}. Skipping. Error: {e}")
                self.epochs_per_file.append(0)
        self.cumulative_epochs = np.cumsum(self.epochs_per_file)
        self.total_epochs = self.cumulative_epochs[-1] if len(self.cumulative_epochs) > 0 else 0
        print(f"‚úÖ Dataset initialized. Total valid epochs: {self.total_epochs}")

    def __len__(self):
        return self.total_epochs

    def __getitem__(self, idx):
        file_idx = np.searchsorted(self.cumulative_epochs, idx, side='right')
        local_idx = idx - (self.cumulative_epochs[file_idx - 1] if file_idx > 0 else 0)
        file_path = self.file_paths[file_idx]
        if file_path not in self._cache:
            try:
                df = pd.read_parquet(file_path)
                self._cache[file_path] = df[df['label'].isin([0, 1, 2, 3, 4])].reset_index(drop=True)
            except Exception as e:
                raise IOError(f"Error reading file {file_path.name} in __getitem__: {e}")
        row = self._cache[file_path].iloc[local_idx]
        label = np.int64(row['label'])
        spectrogram_flat = row.drop('label').values.astype(np.float32)
        mean, std = spectrogram_flat.mean(), spectrogram_flat.std()
        spectrogram_normalized = (spectrogram_flat - mean) / (std + 1e-6)
        spectrogram_2d = spectrogram_normalized.reshape(1, 76, 60)
        return torch.from_numpy(spectrogram_2d), torch.tensor(label)

print("‚úÖ `CombinedDataset` class defined.")

# ==============================================================================
# 6. TRAINING EXECUTION
# ==============================================================================
print("\n--- Starting EfficientNet B0 Experiment ---")

# --- General Parameters ---
# Only EfficientNet B0 will be tested in this run
MODELS_TO_TEST = ['efficientnet_b0']
EPOCHS = 40
BATCH_SIZE = 256
NUM_WORKERS = 8
CLASS_WEIGHTS = [0.7, 3.5, 0.5, 1.5, 1.2]
LEARNING_RATE = 5e-5

# --- Paths and File Identification (using Google Drive) ---
shhs1_processed_dir_base = Path('/content/drive/MyDrive/shhs1_processed')
shhs2_processed_dir_base = Path('/content/drive/MyDrive/shhs2_processed')

shhs1_files = list(shhs1_processed_dir_base.glob('*.parquet'))[:50]
shhs2_files = list(shhs2_processed_dir_base.glob('*.parquet'))[:50]
specific_shhs_file_paths = shhs1_files + shhs2_files

# --- Main Experiment Loop ---
if not specific_shhs_file_paths:
     print("\nERROR: No valid .parquet files were found. Aborting experiment.")
else:
    print(f"\nFound {len(specific_shhs_file_paths)} specific files for training.")

    full_dataset = CombinedDataset(specific_shhs_file_paths)

    if len(full_dataset) > 1:
        train_size = int(0.8 * len(full_dataset))
        val_size = len(full_dataset) - train_size
        train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

        print(f"Dataset split: {len(train_dataset)} training samples, {len(val_dataset)} validation samples.")

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, persistent_workers=True)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, persistent_workers=True)

        # The loop will only run once for the specified model
        for model_name in MODELS_TO_TEST:
            print(f"\n{'='*20} STARTING EXPERIMENT FOR MODEL: {model_name.upper()} {'='*20}")

            model = SleepStageClassifierLightning(
                model_name=model_name,
                learning_rate=LEARNING_RATE,
                class_weights=CLASS_WEIGHTS
            )

            experiment_name = f"{model_name}_100_files_solo_run"
            csv_logger = CSVLogger("/content/drive/MyDrive/sleep_logs/", name=experiment_name)

            checkpoint_callback = ModelCheckpoint(
                monitor='val_loss',
                dirpath='/content/drive/MyDrive/final_model_checkpoint/',
                filename=f"sleep-stage-model-{experiment_name}-{{epoch:02d}}-{{val_loss:.4f}}",
                save_top_k=1,
                mode='min'
            )

            early_stop_callback = EarlyStopping(
               monitor='val_loss',
               patience=7,
               verbose=True,
               mode='min'
            )

            trainer = pl.Trainer(
                max_epochs=EPOCHS,
                accelerator="gpu",
                devices=1,
                logger=csv_logger,
                callbacks=[checkpoint_callback, early_stop_callback],
                precision="bf16-mixed",
                gradient_clip_val=1.0
            )

            print(f"üöÄ Starting model training for {model_name.upper()}...")
            trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)
            print(f"‚úÖ Training complete for {model_name.upper()}!")
            print(f"Best model for this run saved at: {checkpoint_callback.best_model_path}")
            print(f"{'='*20} FINISHED EXPERIMENT FOR MODEL: {model_name.upper()} {'='*20}")

    else:
        print("Dataset is too small to split. Aborting experiment.")

print("\n--- EfficientNet B0 Experiment Complete ---")

Ensuring PyTorch Lightning and other libraries are installed...
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m42.1/42.1 MB[0m [31m58.2 MB/s[0m eta [36m0:00:00[0m
[?25h‚úÖ Installation check complete.
‚úÖ Libraries imported and configuration set.
‚úÖ `get_model` function defined with multi-architecture support.
‚úÖ `SleepStageClassifierLightning` module defined.
‚úÖ `CombinedDataset` class defined.

--- Starting EfficientNet B0 Experiment ---

Found 100 specific files for training.
Initializing dataset with 100 files...
‚úÖ Dataset initialized. Total valid epochs: 82476
Dataset split: 65980 training samples, 16496 validation samples.



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)


‚úÖ EfficientNet B0 model created.


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


üöÄ Starting model training for EFFICIENTNET_B0...


/usr/local/lib/python3.12/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:701: Checkpoint directory /content/drive/MyDrive/final_model_checkpoint exists and is not empty.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/utilities/model_summary/model_summary.py:231: Precision bf16-mixed is not supported by the model summary.  Estimated model size in MB will not be accurate. Using 32 bits instead.
INFO:pytorch_lightning.callbacks.model_summary:
  | Name           | Type               | Params | Mode 
--------------------------------------------------------------
0 | model          | EfficientNet       | 4.0 M  | train
1 | train_accuracy | MulticlassAccuracy | 0      | train
2 | val_accuracy   | MulticlassAccuracy | 0      | train
3 | train_f1       | MulticlassF1Score  | 0      | train
4 | val_f1         | MulticlassF1Score  | 0      | train
5 | loss_fn        | CrossEntropyLoss   | 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved. New best score: 0.793


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.064 >= min_delta = 0.0. New best score: 0.729


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.717


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric val_loss did not improve in the last 7 records. Best score: 0.717. Signaling Trainer to stop.


‚úÖ Training complete for EFFICIENTNET_B0!
Best model for this run saved at: /content/drive/MyDrive/final_model_checkpoint/sleep-stage-model-efficientnet_b0_100_files_solo_run-epoch=02-val_loss=0.7169.ckpt

--- EfficientNet B0 Experiment Complete ---
