# **Installations**

In [None]:
!pip install -U bitsandbytes
!pip install -U transformers accelerate peft
!pip install python-dotenv
!pip install einops scikit-learn scipy wandb

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting torch<3,>=2.0 (from bitsandbytes)
  Downloading torch-2.6.0-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB)
Collecting numpy>=1.17 (from bitsandbytes)
  Downloading numpy-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting filelock (from torch<3,>=2.0->bitsandbytes)
  Downloading filelock-3.17.0-py3-none-any.whl.metadata (2.9 kB)
Collecting networkx (from torch<3,>=2.0->bitsandbytes)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting jinja2 (from torch<3,>=2.0->bitsandbytes)
  Downloading jinja2-3.1.5-py3-none-any.whl.metadata (2.6 kB)
Collecting fsspec (from torch<3,>=2.0->bitsandbytes)
  Downloading fsspec-2025.2.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86

In [7]:
# Hugging Face login

secrets_path = os.path.join(project_path, "secrets.env")

# Load the token from .env file in Google Drive
load_dotenv(secrets_path)
hf_token = os.getenv("HUGGINGFACE_TOKEN")

# Authenticate with Hugging Face
login(hf_token)

In [1]:
# Download Llama model weights
!huggingface-cli download meta-llama/Meta-Llama-3.1-8B-Instruct --local-dir Llama-3.1-8B-Instruct --exclude "original/*"

Fetching 14 files:   0%|                                 | 0/14 [00:00<?, ?it/s]Downloading 'README.md' to 'Llama-3.1-8B-Instruct/.cache/huggingface/download/Xn7B-BWUGOee2Y6hCZtEhtFu4BE=.bbd5630a05b65c1a8b25141bd11ec44844107d58.incomplete'
Downloading 'config.json' to 'Llama-3.1-8B-Instruct/.cache/huggingface/download/8_PA_wEVGiVa2goH2H4KQOQpvVY=.0bb6fd75b3ad2fe988565929f329945262c2814e.incomplete'
Downloading 'generation_config.json' to 'Llama-3.1-8B-Instruct/.cache/huggingface/download/3EVKVggOldJcKSsGjSdoUCN1AyQ=.cc7276afd599de091142c6ed3005faf8a74aa257.incomplete'
Downloading 'model-00001-of-00004.safetensors' to 'Llama-3.1-8B-Instruct/.cache/huggingface/download/IO4xwqmZYzFmxznkwkiNSBwO1H0=.2b1879f356aed350030bb40eb45ad362c89d9891096f79a3ab323d3ba5607668.incomplete'
Downloading 'LICENSE' to 'Llama-3.1-8B-Instruct/.cache/huggingface/download/DhCjcNQuMpl4FL346qr3tvNUCgY=.a7c3ca16cee30425ed6ad841a809590f2bcbf290.incomplete'
Downloading 'model-00002-of-00004.safetensors' to 'Llama-3.1

# **Preprocessing**

In [11]:
# Utility class for EEG preprocessing and quantization
class DataProcessor:

    def __init__(self, preprocessed_output_dir, num_bins, bin_encoding, window_size, overlap):
        
        """
        Handles subject-level EEG loading, normalization, segmentation, and quantization.

        Args:
            preprocessed_output_dir (str): Directory to save preprocessed data.
            num_bins (int): Number of bins for quantization.
            bin_encoding (str): Encoding method ('binary' or 'symbolic').
            window_size (int): Number of samples per segment.
            overlap (float): Fraction of overlap between windows.
        """
        self.preprocessed_output_dir = preprocessed_output_dir
        self.num_bins = num_bins
        self.bin_encoding = bin_encoding
        self.window_size = window_size
        self.overlap = overlap
        self.bins = None
        self.labels = None



    def load_subject_data(self, file_path):

        """
        Load EEG data and corresponding labels from a .dat file.

        Args:
            file_path (str): Path to the .dat file.

        Returns:
            tuple: EEG data and labels as numpy arrays.
        """

        print(f"Loading data from {file_path}...")
        with open(file_path, 'rb') as f:
            subject_data = pickle.load(f, encoding='latin1')
            print("Data loaded successfully.")
            return subject_data['data'], subject_data['labels']



    def zscore_normalize(self, eeg_data):
        
        """
        Perform z-score normalization across channels and time for each subject’s entire data.
        eeg_data shape: (num_trials, num_eeg_channels, time) ->  (40, 32, 8064).

        Args:
            eeg_data (np.ndarray): EEG data to be normalized.

        Returns:
            np.ndarray: Z-score normalized EEG data.
        """

        # shape: (trial, channel, time)
        mean_vals = np.mean(eeg_data, axis=(0,2), keepdims=True)
        std_vals = np.std(eeg_data, axis=(0,2), keepdims=True)
        eeg_data = (eeg_data - mean_vals) / (std_vals + 1e-7)
        return eeg_data    
    


    def analyze_distribution(self, eeg_data):
        
        """
        Analyze EEG amplitude distribution and define quantization bins.

        Args:
            eeg_data (np.ndarray): EEG data of shape (num_trials, 32, time_steps).

        Returns:
            Compute quantization bins, and updates self.bins and self.labels according to them.
        """
        
        flattened_data = eeg_data.flatten()
        # Compute percentiles from 5th to 95th to avoid outliers
        percentiles = np.linspace(5, 95, self.num_bins + 1)
        self.bins = np.percentile(flattened_data, percentiles)

        # Assign labels (binary or symbolic)
        if self.bin_encoding == "binary":
            # e.g. 3-bit if num_bins=8 => '000', '001', '010', ...
            self.labels = [
                format(i, f'0{len(bin(self.num_bins - 1)[2:])}b')
                for i in range(self.num_bins)
            ]
        else:
            # e.g. A, B, C, ...
            self.labels = [chr(65 + i) for i in range(self.num_bins)]

        print(f"Quantization Bins: {self.bins}")
        print(f"Assigned Labels: {self.labels}")


    
    def segment_eeg_data(self, eeg_data):

        """
        Segment EEG data into overlapping windows.

        Args:
            eeg_data (np.ndarray): EEG data of shape (32, 8064).

        Returns:
            np.ndarray: Segmented EEG data of shape (num_segments, 32, window_size).
        """

        step = int(self.window_size * (1 - self.overlap))
        num_windows = (eeg_data.shape[1] - self.window_size) // step + 1
        print(f"Segmenting EEG data into {num_windows} windows...")
        segments = [
            eeg_data[:, i * step:i * step + self.window_size]
            for i in range(num_windows)
        ]
        print("Segmentation complete.")
        return np.stack(segments, axis=0)

    

    def quantize_signal(self, signal):

        """
        Convert an EEG signal into a space-separated quantized representation.

        Args:
            signal (np.ndarray): Single EEG trial of shape (32, window_size).

        Returns:
            str: Space-separated quantized representation.
        """

        if self.bins is None:
            raise ValueError("Bins not initialized. Run analyze_distribution() first.")
        
        # Flatten the 32 channels for that segment
        flat = signal.flatten()
        quantized_indices = np.digitize(flat, self.bins, right=False) - 1
        quantized_indices = np.clip(quantized_indices, 0, len(self.labels) - 1)
        return ' '.join(self.labels[i] for i in quantized_indices)
    
    
    
    def preprocess_subject(self, subject_file):

        """
        Preprocess a single subject's EEG data: z-score, segment, quantize, normalize labels.

        Args:
            subject_file (str): Path to the subject's .dat file.

        Returns:
            tuple: z-scored, segmented, quantized EEG data and normalized labels.
        """

        print(f"Preprocessing data for {subject_file}...")
        eeg_data, labels = self.load_subject_data(subject_file)
    
        # eeg_data => (40, 40, 8064) video/trial x channel x data, 
        # labels => (40, 4) video/trial x label (valence, arousal, dominance, liking) 

        # We only need the first 32 channels, 
        # because the remaining 8 are other physiological data, so:

        # 1) Keep only the first 32 channels and time dimension
        eeg_data = eeg_data[:, :32, :]  

        # 2) Z-score per subject
        eeg_data = self.zscore_normalize(eeg_data)

        # 3) Valence & arousal only => columns 0 & 1, normalizing from [1,9] to [0,1]
        labels = labels[:, :2]  
        labels = (labels - 1) / 8

        # 4) Compute quantization bins based on the entire subject’s EEG distribution
        #    (Now that it’s z-scored).
        self.analyze_distribution(eeg_data)

        all_sequences = []
        all_labels = []

        for trial_idx, trial_data in enumerate(eeg_data):
            segments = self.segment_eeg_data(trial_data)
            # Quantize each segment
            quantized_segments = [self.quantize_signal(seg) for seg in segments]

            all_sequences.extend(quantized_segments)

            # Duplicate this trial's valence/arousal label for each segment
            trial_labels = np.tile(labels[trial_idx], (len(quantized_segments), 1))
            all_labels.append(trial_labels)

        sequences = np.array(all_sequences, dtype=object)
        labels = np.concatenate(all_labels, axis=0)

        # For debugging
        print(f"Preprocessed data dimensions => Sequences: {sequences.shape}, Labels: {labels.shape}")
        return sequences, labels



    def preprocess_deap_data(self, data_path):

        """
        Preprocess all subjects' data in the DEAP dataset.

        Args:
            data_path (str): Path to the folder containing .dat files.

        Returns:
            Saves sequences with shape (num_segments, 32, window_size) and labels with shape (num_segments, 2).
        """

        os.makedirs(self.preprocessed_output_dir, exist_ok=True)

        for subject_file in os.listdir(data_path):
            if subject_file.endswith(".dat"):
                print(f"Processing {subject_file}...")
                subject_path = os.path.join(data_path, subject_file)
                sequences, labels = self.preprocess_subject(subject_path)

                # Overwrite existing files without checking
                np.save(os.path.join(self.preprocessed_output_dir, f"{subject_file}_sequences.npy"), sequences)
                np.save(os.path.join(self.preprocessed_output_dir, f"{subject_file}_labels.npy"), labels)
                print(f"Saved preprocessed data for {subject_file}.")


In [None]:
# Preprocess data

num_bins=8
bin_encoding="binary"
window_size = 512
overlap = 0.25

processor = DataProcessor(preprocessed_output_dir, num_bins, bin_encoding, window_size, overlap)
processor.preprocess_deap_data(data_path)

Processing s08.dat...
Preprocessing data for ./DEAP_Dataset/data_preprocessed_python/s08.dat...
Loading data from ./DEAP_Dataset/data_preprocessed_python/s08.dat...
Data loaded successfully.
Quantization Bins: [-1.38248047 -0.6225361  -0.34481999 -0.15768848  0.00141193  0.16003729
  0.34609178  0.62188324  1.37789575]
Assigned Labels: ['000', '001', '010', '011', '100', '101', '110', '111']
Segmenting EEG data into 30 windows...
Segmentation complete.
Segmenting EEG data into 30 windows...
Segmentation complete.
Segmenting EEG data into 30 windows...
Segmentation complete.
Segmenting EEG data into 30 windows...
Segmentation complete.
Segmenting EEG data into 30 windows...
Segmentation complete.
Segmenting EEG data into 30 windows...
Segmentation complete.
Segmenting EEG data into 30 windows...
Segmentation complete.
Segmenting EEG data into 30 windows...
Segmentation complete.
Segmenting EEG data into 30 windows...
Segmentation complete.
Segmenting EEG data into 30 windows...
Segmenta

# **Imports and Directories**

In [1]:
import os
import torch

gpu_id = 0

os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)

torch.cuda.set_device(gpu_id)
device = torch.device(f"cuda:{gpu_id}" if torch.cuda.is_available() else "cpu")
print("✅ Using device:", device)

# Print CUDA device information
print("✅ Using GPU:", torch.cuda.get_device_name(gpu_id))
print("🖥️  Device Count:", torch.cuda.device_count())
print("🔢  Current Device ID:", torch.cuda.current_device())
print("🔥  CUDA is Available:", torch.cuda.is_available())

# Get device properties
device_props = torch.cuda.get_device_properties(gpu_id)
print("\n🔍 GPU Specifications:")
print(f"   - Name: {device_props.name}")
print(f"   - Total Memory: {device_props.total_memory / 1e9:.2f} GB")
print(f"   - Multiprocessors: {device_props.multi_processor_count}")
print(f"   - Compute Capability: {device_props.major}.{device_props.minor}")
print(f"   - Max Threads per Multiprocessor: {device_props.max_threads_per_multi_processor}")

✅ Using device: cuda:0
✅ Using GPU: NVIDIA RTX A6000
🖥️  Device Count: 1
🔢  Current Device ID: 0
🔥  CUDA is Available: True

🔍 GPU Specifications:
   - Name: NVIDIA RTX A6000
   - Total Memory: 51.03 GB
   - Multiprocessors: 84
   - Compute Capability: 8.6
   - Max Threads per Multiprocessor: 1536


In [2]:
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from einops import rearrange
from transformers import LlamaTokenizer, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from sklearn.metrics import accuracy_score, precision_score, recall_score
from scipy.signal import butter, lfilter
import os
import pickle
import wandb
import platform
from dotenv import load_dotenv
from huggingface_hub import login
from tqdm import tqdm
import time
import torch.profiler

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Paths
project_path = "./"
data_path = os.path.join(project_path, "DEAP_Dataset/data_preprocessed_python")
preprocessed_output_dir = os.path.join(project_path, "DEAP_preprocessed")
model_path = "Llama-3.1-8B-Instruct"
classification_output_dir = os.path.join(project_path, "Classification_Output")

# **Dataset**

In [4]:
def load_all_preprocessed_subjects(preprocessed_output_dir, max_subjects=None):
    """
    Reads each file ending with "_sequences.npy" in `preprocessed_output_dir`,
    and finds the corresponding "_labels.npy" file.
    
    If `max_subjects` is None, load ALL available subject files.
    Otherwise, load only the first `max_subjects` files (sorted alphabetically).

    Returns:
        all_sequences: (N,) array of quantized EEG text segments
        all_labels: (N, 2) array of valence, arousal
    """
    all_seq_files = sorted(
        f for f in os.listdir(preprocessed_output_dir) if f.endswith("_sequences.npy")
    )

    if max_subjects is not None:
        all_seq_files = all_seq_files[:max_subjects]

    all_sequences = []
    all_labels = []

    for seq_filename in all_seq_files:
        seq_path = os.path.join(preprocessed_output_dir, seq_filename)
        lab_path = seq_path.replace("_sequences.npy", "_labels.npy")
        
        if not os.path.exists(lab_path):
            print(f"Warning: Labels file not found for {seq_filename}")
            continue
        
        subject_sequences = np.load(seq_path, allow_pickle=True)
        subject_labels = np.load(lab_path, allow_pickle=True)

        all_sequences.append(subject_sequences)
        all_labels.append(subject_labels)

    if len(all_sequences) == 0:
        raise ValueError("No preprocessed subject files found in the directory.")

    all_sequences = np.concatenate(all_sequences, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    print(f"Total loaded sequences: {all_sequences.shape}")
    print(f"Total loaded labels: {all_labels.shape}")
    return all_sequences, all_labels

In [5]:
class DEAPDataset(Dataset):

    def __init__(self, sequences, labels, debug=False):

        """
        sequences: array/list of text strings (quantized EEG), one per segment
        labels: shape [num_segments, 2] => valence, arousal
        debug: print sample info for debugging
        """
        
        self.sequences = sequences
        self.labels = labels
        self.debug = debug

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):

        """
        Return the raw text segment and label (no tokenization here).
        """

        text_segment = self.sequences[idx]
        label = self.labels[idx]

        if self.debug and idx < 1:
            print(f"\n[Dataset __getitem__] Index: {idx} => Text: {text_segment}")

        # Return the raw text and label as a tuple
        return text_segment, label

In [6]:
# Decide how many subject files to load
# e.g. set `max_subjects=2` to load only 2 subject files, or None for all
max_subjects = None  # or None

# Load preprocessed (optionally limited) subject files
sequences, labels = load_all_preprocessed_subjects(
    preprocessed_output_dir,
    max_subjects=max_subjects
)
dataset = DEAPDataset(sequences, labels)

Total loaded sequences: (38400,)
Total loaded labels: (38400, 2)


# **Model**

In [7]:
# Model Definition
class LlamaEmotionClassifier(nn.Module):

    def __init__(self, model_path, classification_output_dir):

        """
        Initialize the Llama-based model for valence & arousal regression.

        Args:
            model_path (str): Local path to the pre-trained Llama model.
            classification_output_dir (str): Directory to save the trained model.

        Returns:
            LlamaEmotionClassifier: An instance of the LlamaEmotionClassifier model with a linear regression head.
        """

        super(LlamaEmotionClassifier, self).__init__()
        self.classification_output_dir = classification_output_dir
        self.model_path = model_path
        self.device = device

        print(f"Loading model on device: {self.device}")

        # Load in 4-bit
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=False
        )

        # 1) Load model
        model = AutoModelForCausalLM.from_pretrained(
            self.model_path,
            quantization_config=bnb_config,
            device_map="auto"
        )

        # 2) Prepare for k-bit training
        model = prepare_model_for_kbit_training(model)
        model.config.use_cache = False

        # 3) LoRA configuration
        peft_config = LoraConfig(
            lora_alpha=8,
            lora_dropout=0.1,
            r=16,
            bias="none",
            task_type="CAUSAL_LM",
            target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj"]
        )
        self.model = get_peft_model(model, peft_config).to(self.device)

        # 4) Tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        self.model.resize_token_embeddings(len(self.tokenizer))

        # 5) Simple regression head on top of last hidden state
        hidden_size = self.model.config.hidden_size
        self.fc = nn.Linear(hidden_size, 2).to(self.device)

        print("✅ LlamaEmotionClassifier initialized.")



    def forward(self, input_ids, attention_mask=None):

        """
        Forward pass for the Llama model.
        Use last_hidden_state for the final regression.
        Returns a 2D (batch_size, 2) => valence, arousal in [0,1].

        Args:
            input_ids (torch.Tensor): Input tensor for the model.

        Returns:
            torch.Tensor: Model output with reduced dimensionality for emotion classification.
        """

        start_time = time.time()
        outputs = self.model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    output_hidden_states=True,
                    return_dict=True
                ) # [batch_size, seq_len, hidden_size]
        
        model_time = time.time() - start_time
        print(f"Time for model forward pass: {model_time:.4f} seconds")

        last_hidden = outputs.hidden_states[-1]
        print(f"Last hidden state shape: {last_hidden.shape}")

        # Mean-pool across seq_len
        pooled = last_hidden.mean(dim=1)
        # Output => [batch_size, 2]
        logits = self.fc(pooled)
        # Sigmoid => valence & arousal in [0,1]
        return torch.sigmoid(logits)



    def train_model(self, train_loader, val_loader, hparams):

        """
        Fine-tune using MSE loss on valence/arousal.

        Args:
            train_loader (DataLoader): DataLoader for training data.
            val_loader (DataLoader): DataLoader for validation data.
            hparams (dict): Dictionary of hyperparameters.
        """

        print("Starting training...")
        wandb.init(project="DEAP_Llama_Finetuning", config=hparams)

        try:
            print(f"WandB Run URL: {wandb.run.url}")  # Print WandB link
            wandb.watch(self, log="all", log_freq=10)  # Track model parameters

            optimizer = torch.optim.AdamW(self.parameters(), lr=hparams["learning_rate"])
            loss_fn = nn.MSELoss()

            total_sequences = len(train_loader.dataset)
            print(f"Total sequences: {total_sequences}")

            for epoch in range(hparams["epochs"]):
                self.train()
                train_loss = 0.0
                start_time = time.time()
                batch_start_time = time.time()

                with tqdm(total=len(train_loader), desc=f"Epoch {epoch + 1}/{hparams['epochs']}") as pbar:
                    for batch_idx, batch in enumerate(train_loader):
                        optimizer.zero_grad()
                        inputs = batch['input_ids'].squeeze(1).to(self.device)
                        labels = batch['labels'].to(self.device)

                        # Print device information and content for the first three batches
                        if batch_idx < 3:
                            print(f"Batch {batch_idx + 1} - inputs device: {inputs.device}")
                            print(f"Batch {batch_idx + 1} - inputs content: {inputs}")
                            print(f"Batch {batch_idx + 1} - labels device: {labels.device}")
                            print(f"Batch {batch_idx + 1} - labels content: {labels}")

                        outputs = self(inputs)
                        loss = loss_fn(outputs, labels)
                        loss.backward()
                        optimizer.step()

                        train_loss += loss.item()

                        # Print outputs and labels for the first three batches
                        if batch_idx < 3:
                            print(f"Batch {batch_idx + 1} - outputs: {outputs}")
                            print(f"Batch {batch_idx + 1} - labels: {labels}")

                        # Calculate and print time per sequence
                        batch_time = time.time() - batch_start_time
                        batch_start_time = time.time()
                        time_per_sequence = batch_time / len(inputs)
                        print(f"Batch {batch_idx + 1} - Time per sequence: {time_per_sequence:.4f} seconds")

                        # Update progress bar
                        pbar.update(1)

                train_loss /= len(train_loader)
                epoch_time = time.time() - start_time

                # Validation step
                self.eval()
                val_loss = 0.0
                with torch.no_grad():
                    for batch_idx, batch in enumerate(val_loader):
                        inputs = batch['input_ids'].squeeze(1).to(self.device)
                        labels = batch['labels'].to(self.device)

                        # Print device information and content for the first three validation batches
                        if batch_idx < 3:
                            print(f"Validation Batch {batch_idx + 1} - inputs device: {inputs.device}")
                            print(f"Validation Batch {batch_idx + 1} - inputs content: {inputs}")
                            print(f"Validation Batch {batch_idx + 1} - labels device: {labels.device}")
                            print(f"Validation Batch {batch_idx + 1} - labels content: {labels}")

                        outputs = self(inputs)
                        loss = loss_fn(outputs, labels)

                        val_loss += loss.item()

                        # Print outputs and labels for the first three validation batches
                        if batch_idx < 3:
                            print(f"Validation Batch {batch_idx + 1} - outputs: {outputs}")
                            print(f"Validation Batch {batch_idx + 1} - labels: {labels}")

                val_loss /= len(val_loader)
                print(f"Epoch {epoch + 1}/{hparams['epochs']}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, Epoch Time: {epoch_time:.2f} seconds")
                wandb.log({"train_loss": train_loss, "val_loss": val_loss, "epoch_time": epoch_time})

                # Estimate remaining time
                remaining_epochs = hparams["epochs"] - (epoch + 1)
                estimated_time_remaining = remaining_epochs * epoch_time
                print(f"Estimated time remaining: {estimated_time_remaining / 60:.2f} minutes")

        finally:
            wandb.finish()

    def test_model(self, test_loader):
        """
        Evaluate on the held-out test set, returning MSE or other regression metrics.
        """
        self.eval()
        loss_fn = nn.MSELoss()
        test_loss = 0.0

        with torch.no_grad():
            for batch_idx, batch in enumerate(test_loader):
                inputs = batch['input_ids']
                attn_mask = batch['attention_mask']
                labels = batch['labels']

                outputs = self(inputs, attention_mask=attn_mask)
                loss = loss_fn(outputs, labels)
                test_loss += loss.item()

        test_loss /= len(test_loader)
        print(f"\n[Test] MSE Loss on Test Set: {test_loss:.4f}")
        return test_loss

    def save(self):

        """
        Save the trained model to the specified directory.
        """
        
        print(f"Saving model to {self.classification_output_dir}...")
        self.model.save_pretrained(self.classification_output_dir)
        print("✅ Model saved successfully.")

In [8]:
# Initialize the Llama emotion classifier
llama_classifier = LlamaEmotionClassifier(
        model_path=model_path,
        classification_output_dir=classification_output_dir
    ).to(device)

Loading model on device: cuda:0


Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.11s/it]


✅ LlamaEmotionClassifier initialized.


# **DataLoader**

In [9]:
def dynamic_tokenize_collate_fn(tokenizer, max_length, device, debug=False):

    """
    Returns a function that can be used as collate_fn in the PyTorch DataLoader.
    The returned function tokenizes the raw text segments in batch.
    """

    def collate_fn(batch):

        """
        batch: list of (text_segment, label) tuples
        """
        
        # Separate text and labels
        text_segments = [item[0] for item in batch]
        labels = [item[1] for item in batch]  # shape: list of (2,) => valence, arousal

        if debug and len(text_segments) > 0:
            print(f"\n[CollateFn] Example text: {text_segments[0]}")

        # Tokenize in batch
        encoded = tokenizer(
            text_segments,
            truncation=True,
            padding="max_length",
            max_length=max_length,
            return_tensors="pt"
        )

        # Extract tensors
        input_ids = encoded["input_ids"]
        attention_mask = encoded["attention_mask"]
        labels_tensor = torch.tensor(labels, dtype=torch.float32)

        # Optional: move to GPU here
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels_tensor = labels_tensor.to(device)

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels_tensor
        }
    return collate_fn


In [10]:
# Build the collate_fn
collate_fn = dynamic_tokenize_collate_fn(
    tokenizer=llama_classifier.tokenizer,
    max_length=128,
    device=device,
    debug=False
)

# **Training**

In [11]:
# Hyperparameters

hparams = {
    "epochs": 1,
    "batch_size": 64,
    "learning_rate": 2e-4,
    "train_split": 0.70,   # 70%
    "val_split": 0.15,     # 15%
    # the remaining 15% is test
}

# Print Hyperparameters for verification
print("hparams:")
for key, value in hparams.items():
    print(f"{key}: {value}")

hparams:
epochs: 1
batch_size: 64
learning_rate: 0.0002
train_split: 0.7
val_split: 0.15


In [12]:
# Train/val/test split
total_len = len(dataset)
train_len = int(hparams["train_split"] * total_len)
val_len = int(hparams["val_split"] * total_len)
test_len = total_len - (train_len + val_len)

train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
    dataset, [train_len, val_len, test_len]
)
print(f"Dataset splits => train: {len(train_dataset)}, val: {len(val_dataset)}, test: {len(test_dataset)}")

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=hparams["batch_size"], shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=hparams["batch_size"], shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=hparams["batch_size"], shuffle=False, collate_fn=collate_fn)

Dataset splits => train: 26880, val: 5760, test: 5760


In [13]:
# Train the model
os.environ["WANDB_API_KEY"] = "4f1fd29237ed9d0652777c7ebb441a170b7c8c6d"
llama_classifier.train_model(train_loader, val_loader, hparams)

# Save the model configuration after training
llama_classifier.save()

Starting training...


[34m[1mwandb[0m: Currently logged in as: [33mgiorgio-1836529[0m ([33mgiorgio-1836529-sapienza-universit-di-roma[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


WandB Run URL: https://wandb.ai/giorgio-1836529-sapienza-universit-di-roma/DEAP_Llama_Finetuning/runs/bplbc1b2
Total sequences: 26880


  labels_tensor = torch.tensor(labels, dtype=torch.float32)
  return fn(*args, **kwargs)


Batch 1 - inputs device: cuda:0
Batch 1 - inputs content: tensor([[128000,   4119,    220,  ...,    931,    220,   7755],
        [128000,   5037,    220,  ...,   7755,    220,   4645],
        [128000,   4119,    220,  ...,   1041,    220,   4645],
        ...,
        [128000,   4645,    220,  ...,    931,    220,    931],
        [128000,   5037,    220,  ...,   7755,    220,   5120],
        [128000,   4645,    220,  ...,    931,    220,   7755]],
       device='cuda:0')
Batch 1 - labels device: cuda:0
Batch 1 - labels content: tensor([[0.0000, 0.6325],
        [0.7487, 0.6300],
        [0.7600, 0.7563],
        [0.4600, 0.6762],
        [0.4988, 0.4963],
        [0.6250, 0.1250],
        [0.2763, 0.6288],
        [0.7325, 0.7375],
        [0.5000, 0.6325],
        [0.6212, 0.3625],
        [0.4588, 0.0225],
        [0.3787, 0.8138],
        [0.3137, 0.2350],
        [0.5013, 0.8900],
        [0.6438, 0.0213],
        [0.2962, 0.6400],
        [0.1838, 0.3063],
        [0.8388, 0.8

Epoch 1/1:   0%|          | 1/420 [00:20<2:23:47, 20.59s/it]

Batch 1 - outputs: tensor([[0.4489, 0.3961],
        [0.4374, 0.3854],
        [0.4835, 0.3214],
        [0.4529, 0.3577],
        [0.4783, 0.3453],
        [0.4482, 0.4267],
        [0.4507, 0.4413],
        [0.4878, 0.4037],
        [0.5163, 0.3481],
        [0.4607, 0.3167],
        [0.4935, 0.3752],
        [0.4462, 0.4320],
        [0.4357, 0.3299],
        [0.4496, 0.3508],
        [0.4915, 0.3474],
        [0.4800, 0.3621],
        [0.4692, 0.3208],
        [0.4710, 0.3324],
        [0.4696, 0.4243],
        [0.4694, 0.3726],
        [0.4278, 0.3272],
        [0.4475, 0.3850],
        [0.4348, 0.3138],
        [0.4547, 0.3163],
        [0.4727, 0.3231],
        [0.4607, 0.3316],
        [0.4409, 0.3241],
        [0.4821, 0.3869],
        [0.4650, 0.3381],
        [0.4508, 0.3057],
        [0.4267, 0.3210],
        [0.5004, 0.4638],
        [0.4855, 0.3463],
        [0.4442, 0.3572],
        [0.4661, 0.4184],
        [0.5277, 0.3887],
        [0.4466, 0.4879],
        [0.4337, 0.

Epoch 1/1:   0%|          | 2/420 [00:40<2:22:22, 20.44s/it]

Batch 2 - outputs: tensor([[0.6556, 0.6351],
        [0.6382, 0.6277],
        [0.6501, 0.7456],
        [0.6358, 0.6868],
        [0.6551, 0.6954],
        [0.6550, 0.6163],
        [0.6543, 0.6553],
        [0.6493, 0.6885],
        [0.6373, 0.7215],
        [0.6761, 0.7810],
        [0.6809, 0.6844],
        [0.7095, 0.6563],
        [0.6637, 0.6601],
        [0.6797, 0.6736],
        [0.6447, 0.6766],
        [0.6624, 0.5923],
        [0.6715, 0.6880],
        [0.6688, 0.7327],
        [0.6482, 0.6975],
        [0.6493, 0.6348],
        [0.6327, 0.6543],
        [0.6745, 0.6903],
        [0.6983, 0.6734],
        [0.6598, 0.6590],
        [0.6365, 0.6256],
        [0.6836, 0.7525],
        [0.6871, 0.6785],
        [0.6656, 0.6704],
        [0.6398, 0.6478],
        [0.6474, 0.6954],
        [0.6044, 0.6597],
        [0.6551, 0.6697],
        [0.6889, 0.6847],
        [0.6553, 0.7401],
        [0.6792, 0.6912],
        [0.6751, 0.7368],
        [0.6537, 0.6145],
        [0.6254, 0.

Epoch 1/1:   1%|          | 3/420 [01:01<2:22:17, 20.47s/it]

Batch 3 - outputs: tensor([[0.5420, 0.5284],
        [0.5586, 0.5485],
        [0.5115, 0.6289],
        [0.5964, 0.5593],
        [0.5236, 0.5597],
        [0.5375, 0.5256],
        [0.5697, 0.5480],
        [0.5368, 0.5086],
        [0.5913, 0.5340],
        [0.5895, 0.5133],
        [0.5606, 0.6891],
        [0.5859, 0.7260],
        [0.5634, 0.5870],
        [0.5726, 0.5713],
        [0.5744, 0.5579],
        [0.5749, 0.6005],
        [0.6023, 0.5524],
        [0.5619, 0.5732],
        [0.5617, 0.5301],
        [0.5970, 0.5906],
        [0.5212, 0.5202],
        [0.5590, 0.5261],
        [0.5688, 0.5687],
        [0.5760, 0.6365],
        [0.6068, 0.6252],
        [0.6009, 0.5511],
        [0.5587, 0.5669],
        [0.5588, 0.5421],
        [0.5825, 0.5436],
        [0.5184, 0.5667],
        [0.5287, 0.5684],
        [0.5380, 0.6469],
        [0.6090, 0.5266],
        [0.5339, 0.5873],
        [0.6029, 0.5134],
        [0.5643, 0.5520],
        [0.5074, 0.4961],
        [0.5754, 0.

Epoch 1/1:   1%|          | 3/420 [01:20<3:06:17, 26.80s/it]


BrokenPipeError: [Errno 32] Broken pipe

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7f5b15e53a50>> (for post_run_cell), with arguments args (<ExecutionResult object at 7f5b179aaf90, execution_count=13 error_before_exec=None error_in_exec=[Errno 32] Broken pipe info=<ExecutionInfo object at 7f5b14142b50, raw_cell="# Train the model
os.environ["WANDB_API_KEY"] = "4.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell://ssh-remote%2B213.171.185.13/home/tesista6/EEG_Driven_Emotion_Classifier.ipynb#X33sdnNjb2RlLXJlbW90ZQ%3D%3D> result=None>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe

# **Testing**

In [None]:
# Test the model
print("Evaluating on the test set...")
test_loss = llama_classifier.test_model(test_loader)