In [1]:
from torcheeg.datasets import SEEDDataset
from torcheeg import transforms

raw_dataset = SEEDDataset(
    root_path='./SEED/SEED_EEG/Preprocessed_EEG',
    io_path = 'E:/FYP/Egg-Based Emotion Recognition/EEg-based-Emotion-Recognition/.torcheeg/datasets_1733174610032_5iJyS',
    online_transform=None,  # Disable transforms
    label_transform=None,
    num_worker=4
)

raw_sample = raw_dataset[0]
print(f"Raw EEG data shape: {raw_sample[0].shape}")  # Should be [62, ...] for SEED
print(f"Label: {raw_sample[1]}")  # Should be a number between 0 and 3

[2024-12-09 02:18:38] INFO (torcheeg/MainThread) 🔍 | Detected cached processing results, reading cache from E:/FYP/Egg-Based Emotion Recognition/EEg-based-Emotion-Recognition/.torcheeg/datasets_1733174610032_5iJyS.


Raw EEG data shape: (62, 200)
Label: {'start_at': 0, 'end_at': 200, 'clip_id': '10_20131130.mat_0', 'subject_id': 10, 'trial_id': 'ww_eeg1', 'emotion': 1, 'date': 20131130, '_record_id': '_record_0'}


In [3]:
import numpy as np
from scipy.signal import butter, lfilter , filtfilt

#Bandpass filter function
def bandpass_filter(data, lowcut=4, highcut=47, fs=200, order=5):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    y = filtfilt(b, a, data, axis=1)
    return y

# Stratified normalization function
def stratified_normalization(data):
    mean = np.mean(data, axis=1, keepdims=True)
    std = np.std(data, axis=1, keepdims=True)
    return (data - mean) / (std + 1e-8)


In [4]:
# Preprocess the EEG data
preprocessed_data = []
labels = []

for i in range(len(raw_dataset)):
    eeg_data, label = raw_dataset[i]
    eeg_data = np.array(eeg_data)  # Convert to numpy array if needed

    # Step 1: Bandpass filtering
    eeg_data_filtered = bandpass_filter(eeg_data)

    # Step 2: Stratified normalization
    eeg_data_normalized = stratified_normalization(eeg_data_filtered)

    preprocessed_data.append(eeg_data_normalized)
    labels.append(label)

# Convert to numpy arrays
preprocessed_data = np.array(preprocessed_data)
labels = np.array(labels)

# Display shape of preprocessed data
print(f"Preprocessed EEG data shape: {preprocessed_data.shape}")  # Should be (num_samples, 62, 200)
print(f"Sample label: {labels[0]}")

Preprocessed EEG data shape: (152730, 62, 200)
Sample label: {'start_at': 0, 'end_at': 200, 'clip_id': '10_20131130.mat_0', 'subject_id': 10, 'trial_id': 'ww_eeg1', 'emotion': 1, 'date': 20131130, '_record_id': '_record_0'}


In [8]:
import torch
import torch.nn as nn

class BaseEncoder(nn.Module):
    def __init__(self, num_channels=62, spatial_filters=16, temporal_filters=16, temporal_filter_length=48):
        super(BaseEncoder, self).__init__()
        
        self.spatial_conv = nn.Conv1d(
            in_channels=num_channels, 
            out_channels=spatial_filters, 
            kernel_size=1
        )
        
        self.temporal_conv = nn.Conv1d(
            in_channels=spatial_filters, 
            out_channels=temporal_filters, 
            kernel_size=temporal_filter_length, 
            padding='same'  # Padding to maintain input length
        )
        
        self.relu = nn.ReLU()
        
    def forward(self, x):
        
        x = self.spatial_conv(x)  # Output shape: [batch_size, spatial_filters, time_points]
        x = self.relu(x)
        x = self.temporal_conv(x)  # Output shape: [batch_size, temporal_filters, time_points]
        x = self.relu(x)
        
        return x

# Instantiate the Base Encoder
base_encoder = BaseEncoder()

# Example input tensor: batch_size=1, num_channels=62, time_points=200
example_input = torch.randn(1, 62, 200)

# Forward pass
output = base_encoder(example_input)
print(f"Output shape: {output.shape}")  # Expected: [1, 16, 200]


Output shape: torch.Size([1, 16, 200])


In [None]:
import torch
import torch.nn as nn

class Projector(nn.Module):
    def __init__(self, spatial_filter_size=16, temporal_filter_size=4, avg_pool_kernel=24, c=2):
        super(Projector, self).__init__()
        
        # Average Pooling
        self.avg_pool = nn.AvgPool1d(kernel_size=avg_pool_kernel, stride=avg_pool_kernel)
        
        # Spatial Convolution (Depthwise)
        self.spatial_conv = nn.Conv1d(
            in_channels=spatial_filter_size, 
            out_channels=c * spatial_filter_size, 
            kernel_size=1, 
            groups=spatial_filter_size  # Depthwise convolution
        )
        
        # Temporal Convolution (Depthwise)
        self.temporal_conv = nn.Conv1d(
            in_channels=c * spatial_filter_size, 
            out_channels=c**2 * spatial_filter_size, 
            kernel_size=temporal_filter_size, 
            padding='same', 
            groups=c * spatial_filter_size  # Depthwise convolution
        )
        
        # Activation Function
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.avg_pool(x)  # Output shape: [batch_size, spatial_filter_size, reduced_time_points]
        x = self.spatial_conv(x)  # Output shape: [batch_size, c * spatial_filter_size, reduced_time_points]
        x = self.relu(x)
        x = self.temporal_conv(x)  # Output shape: [batch_size, c^2 * spatial_filter_size, reduced_time_points]
        x = self.relu(x)
        
        return x

# Instantiate the Projector
projector = Projector()

# Example input tensor: batch_size=1, spatial_filter_size=16, time_points=200
example_input = torch.randn(1, 16, 200)

# Forward pass
output = projector(example_input)
print(f"Output shape: {output.shape}")  # Expected: [1, 64, 8] (200 / 24 = ~8)


Output shape: torch.Size([1, 64, 8])


ACTUAL WORKING START

In [14]:
# Check the shape of the preprocessed data
print(f"Preprocessed Data Shape: {preprocessed_data.shape}")

# Check the first few labels
print(f"First Label: {labels[0]}")
print(f"Number of Samples: {len(preprocessed_data)}")


Preprocessed Data Shape: (152730, 62, 200)
First Label: {'start_at': 0, 'end_at': 200, 'clip_id': '10_20131130.mat_0', 'subject_id': 10, 'trial_id': 'ww_eeg1', 'emotion': 1, 'date': 20131130, '_record_id': '_record_0'}
Number of Samples: 152730


In [34]:
from sklearn.model_selection import LeaveOneGroupOut
from torch.utils.data import DataLoader, Dataset
import random

class EEGContrastiveDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        eeg_a = self.data[idx].astype(np.float32)
        label_a = self.labels[idx]['emotion']
        subject_a = self.labels[idx]['subject_id']

        # Find a positive sample (same emotion, different subject)
        positive_indices = [i for i, lbl in enumerate(self.labels) if lbl['emotion'] == label_a and lbl['subject_id'] != subject_a]
        
         # Fallback: If no positive sample is found, choose a random sample
        if not positive_indices:
            pos_idx = random.choice(range(len(self.data)))
        else:
            pos_idx = random.choice(positive_indices)


        eeg_b = self.data[pos_idx]
        return eeg_a, eeg_b

# Extract subject IDs
subject_ids = [label['subject_id'] for label in labels]

# Initialize LOSO cross-validator
logo = LeaveOneGroupOut()

# Iterate through each LOSO fold
for train_idx, val_idx in logo.split(preprocessed_data, labels, subject_ids):
    # Split the data into training and validation
    train_data, val_data = preprocessed_data[train_idx], preprocessed_data[val_idx]
    train_labels, val_labels = [labels[i] for i in train_idx], [labels[i] for i in val_idx]

    # Create DataLoader for training and validation
    train_loader = DataLoader(EEGContrastiveDataset(train_data, train_labels), batch_size=32, shuffle=True)
    val_loader = DataLoader(EEGContrastiveDataset(val_data, val_labels), batch_size=32, shuffle=False)

    # Print the current subject being used for validation
    val_subject_id = subject_ids[val_idx[0]]
    print(f"Validation Subject: {val_subject_id}")
    
    # Break after the first fold for demonstration purposes
    break


Validation Subject: 1


In [35]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import random

# Combine Base Encoder and Projector into a single model
class CLISA(nn.Module):
    def __init__(self, base_encoder, projector):
        super(CLISA, self).__init__()
        self.base_encoder = base_encoder
        self.projector = projector

    def forward(self, x):
        x = self.base_encoder(x)
        x = self.projector(x)
        return x

# Contrastive Loss Function
def contrastive_loss(z_a, z_b, temperature=0.5):
    batch_size = z_a.shape[0]
    z = torch.cat([z_a, z_b], dim=0)
    sim = torch.matmul(z, z.T) / temperature

    labels = torch.cat([torch.arange(batch_size), torch.arange(batch_size)], dim=0).to(z.device)
    loss_fn = nn.CrossEntropyLoss()
    loss = loss_fn(sim, labels)
    return loss

# # Data Sampler for Positive and Negative Pairs
# class EEGContrastiveDataset(Dataset):
#     def __init__(self, data, labels):
#         self.data = data
#         self.labels = labels

#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):
#         eeg_a = self.data[idx]
#         label_a = self.labels[idx]['emotion']
#         subject_a = self.labels[idx]['subject_id']

#         # Find a positive sample (same emotion, different subject)
#         positive_indices = [i for i, lbl in enumerate(self.labels) if lbl['emotion'] == label_a and lbl['subject_id'] != subject_a]
#         pos_idx = random.choice(positive_indices)

#         eeg_b = self.data[pos_idx]
#         return eeg_a, eeg_b



In [36]:
def save_checkpoint(model, optimizer, epoch, loss, path):
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss
    }, path)

In [38]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.utils.tensorboard import SummaryWriter
from torch.cuda.amp import GradScaler, autocast  # For mixed precision



# Initialize TensorBoard Writer
log_dir = 'E:/FYP/Egg-Based Emotion Recognition/EEg-based-Emotion-Recognition/runs/CLISA'
writer = SummaryWriter(log_dir)

# Initialize Model, Optimizer, and Scheduler
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
base_encoder = BaseEncoder().to(device)
projector = Projector().to(device)
model = CLISA(base_encoder, projector).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.0007, weight_decay=0.015)
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

# Gradient Scaler for Mixed Precision
scaler = GradScaler()

# Early Stopping Parameters
early_stopping_tolerance = 5
best_val_loss = float('inf')
epochs_without_improvement = 0

# Gradient Accumulation Steps
accumulation_steps = 4

# Training Loop
num_epochs = 30
for epoch in range(num_epochs):
    print(f"\nEpoch [{epoch+1}/{num_epochs}]")
    model.train()
    total_train_loss = 0

    for batch_idx, (eeg_a, eeg_b) in enumerate(train_loader):
        print(f"Batch {batch_idx + 1}/{len(train_loader)}")
        eeg_a, eeg_b = eeg_a.to(device).float(), eeg_b.to(device).float()

        # Mixed precision training with autocast
        with autocast():
            z_a = model(eeg_a)
            z_b = model(eeg_b)
            loss = contrastive_loss(z_a.view(z_a.size(0), -1), z_b.view(z_b.size(0), -1))
            loss = loss / accumulation_steps  # Normalize loss for accumulation

        # Backward pass with gradient scaling
        scaler.scale(loss).backward()

        # Gradient accumulation step
        if (batch_idx + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        total_train_loss += loss.item() * accumulation_steps  # Reverse normalization for logging

        if (batch_idx + 1) % 10 == 0:
            print(f"Batch {batch_idx + 1}/{len(train_loader)}, Training Loss: {loss.item() * accumulation_steps:.4f}")
            writer.add_scalar('Loss/train', loss.item() * accumulation_steps, epoch * len(train_loader) + batch_idx)

    avg_loss = total_train_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Average Train Loss: {avg_loss:.4f}")

    # Save checkpoint every 10 epochs
    if (epoch + 1) % 5 == 0:
        save_checkpoint(model, optimizer, epoch + 1, avg_loss, f'./Checkpoints/checkpoint_epoch_{epoch + 1}.pt')

    # Log to TensorBoard
    writer.add_scalar('AvgLoss/train', avg_loss, epoch)
    writer.add_scalar('Learning Rate', scheduler.get_last_lr()[0], epoch)

    # Step the scheduler
    scheduler.step()

    # Validation Loop
    model.eval()
    total_val_loss = 0

    with torch.no_grad():
        for batch_idx, (eeg_a, eeg_b) in enumerate(val_loader):
            eeg_a, eeg_b = eeg_a.to(device).float(), eeg_b.to(device).float()

            with autocast():
                z_a = model(eeg_a)
                z_b = model(eeg_b)
                loss = contrastive_loss(z_a.view(z_a.size(0), -1), z_b.view(z_b.size(0), -1))

            total_val_loss += loss.item()

            if (batch_idx + 1) % 10 == 0:
                print(f"Batch {batch_idx + 1}/{len(val_loader)}, Validation Loss: {loss.item():.4f}")
                writer.add_scalar('Loss/val', loss.item(), epoch * len(val_loader) + batch_idx)

    avg_val_loss = total_val_loss / len(val_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Average Val Loss: {avg_val_loss:.4f}")
    writer.add_scalar('AvgLoss/val', avg_val_loss, epoch)

    # Early Stopping Check
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= early_stopping_tolerance:
        print("Early stopping triggered.")
        break

# Close TensorBoard Writer
writer.close()
print("Contrastive learning completed.")


  scaler = GradScaler()



Epoch [1/30]
Batch 1/4455


  with autocast():


Batch 2/4455
Batch 3/4455
Batch 4/4455
Batch 5/4455
Batch 6/4455
Batch 7/4455
Batch 8/4455
Batch 9/4455
Batch 10/4455
Batch 10/4455, Training Loss: 4.1392
Batch 11/4455
Batch 12/4455
Batch 13/4455
Batch 14/4455
Batch 15/4455
Batch 16/4455
Batch 17/4455
Batch 18/4455
Batch 19/4455
Batch 20/4455
Batch 20/4455, Training Loss: 4.1368
Batch 21/4455
Batch 22/4455
Batch 23/4455
Batch 24/4455
Batch 25/4455
Batch 26/4455
Batch 27/4455
Batch 28/4455
Batch 29/4455
Batch 30/4455
Batch 30/4455, Training Loss: 4.1177
Batch 31/4455
Batch 32/4455
Batch 33/4455
Batch 34/4455
Batch 35/4455
Batch 36/4455
Batch 37/4455
Batch 38/4455
Batch 39/4455
Batch 40/4455
Batch 40/4455, Training Loss: 4.1203
Batch 41/4455
Batch 42/4455
Batch 43/4455
Batch 44/4455
Batch 45/4455
Batch 46/4455
Batch 47/4455
Batch 48/4455
Batch 49/4455
Batch 50/4455
Batch 50/4455, Training Loss: 4.0647
Batch 51/4455
Batch 52/4455
Batch 53/4455
Batch 54/4455
Batch 55/4455
Batch 56/4455
Batch 57/4455
Batch 58/4455
Batch 59/4455
Batch 60/44

  with autocast():


Batch 10/319, Validation Loss: 2.9517
Batch 20/319, Validation Loss: 2.9700
Batch 30/319, Validation Loss: 2.8329
Batch 40/319, Validation Loss: 2.9157
Batch 50/319, Validation Loss: 2.9688
Batch 60/319, Validation Loss: 2.9794
Batch 70/319, Validation Loss: 2.9684
Batch 80/319, Validation Loss: 2.8771
Batch 90/319, Validation Loss: 2.8613
Batch 100/319, Validation Loss: 3.0548
Batch 110/319, Validation Loss: 2.9019
Batch 120/319, Validation Loss: 2.9375
Batch 130/319, Validation Loss: 2.8608
Batch 140/319, Validation Loss: 2.9356
Batch 150/319, Validation Loss: 2.8788
Batch 160/319, Validation Loss: 2.7939
Batch 170/319, Validation Loss: 2.8127
Batch 180/319, Validation Loss: 2.9538
Batch 190/319, Validation Loss: 2.7465
Batch 200/319, Validation Loss: 2.7855
Batch 210/319, Validation Loss: 2.7985
Batch 220/319, Validation Loss: 2.9857
Batch 230/319, Validation Loss: 3.0591
Batch 240/319, Validation Loss: 2.9914
Batch 250/319, Validation Loss: 3.0282
Batch 260/319, Validation Loss: 2.

KeyboardInterrupt: 