In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import os
import scipy.io
from scipy.signal import butter, lfilter
import scipy.signal

In [2]:
import scipy.io
import numpy as np

data_path = "E:\\FYP\\Finalise Fyp\\EEg-based-Emotion-Recognition\\data_preprocessed_matlab\\"

# Initialize arrays
subject_data = np.zeros((32, 40, 40, 8064), dtype=np.float32)
subject_labels = np.zeros((32, 40, 4), dtype=np.float32)

# Load all 32 subjects
for i in range(1, 33):
    mat = scipy.io.loadmat(f"{data_path}s{i:02d}.mat")
    subject_data[i-1] = mat["data"]
    subject_labels[i-1] = mat["labels"]

# Print shapes to confirm
print("EEG Data Shape:", subject_data.shape)  
print("Labels Shape:", subject_labels.shape)  


EEG Data Shape: (32, 40, 40, 8064)
Labels Shape: (32, 40, 4)


In [3]:
##Normalizing EEG data (by Hamza)

subject_data = (subject_data - np.mean(subject_data, axis=-1, keepdims=True)) / np.std(subject_data, axis=-1, keepdims=True)


In [4]:
import numpy as np
import scipy.signal
# Frequency band definitions
freq_bands = {
    "delta": (1, 4),
    "theta": (4, 8),
    "alpha": (8, 14),
    "beta": (14, 30),
    "gamma": (31, 50),
}

# Define function to compute Differential Entropy (DE)
def compute_de(signal):
    """Compute Differential Entropy (DE) for a given EEG segment"""
    variance = np.var(signal, axis=-1, keepdims=True)  # Compute variance
    de = 0.5 * np.log(2 * np.pi * np.e * variance)  # Apply DE formula
    return de.squeeze()  # Remove extra dimensions

# Define function to extract DE features
def extract_de_features(subject_data, fs=128, window_size=128):
    """
    Extract DE features from EEG data.
    - subject_data: EEG data of shape (32, 40, 40, 8064)
    - fs: Sampling frequency (128 Hz)
    - window_size: 1 second (128 samples)
    Returns: DE feature array of shape (32, 40, 40, 5, 63)
    """
    num_subjects, num_trials, num_channels, num_samples = subject_data.shape
    num_bands = len(freq_bands)
    num_windows = num_samples // window_size  # 8064 / 128 = 63 windows

    # Initialize DE feature array
    de_features = np.zeros((num_subjects, num_trials, num_channels, num_bands, num_windows))

    # Loop through subjects, trials, and channels
    for subj in range(num_subjects):
        for trial in range(num_trials):
            for ch in range(num_channels):
                # Extract single-channel EEG data for this trial
                signal = subject_data[subj, trial, ch, :]

                # Apply bandpass filters and compute DE for each frequency band
                for b_idx, (band, (low, high)) in enumerate(freq_bands.items()):
                    # Bandpass filter
                    sos = scipy.signal.butter(4, [low, high], btype="bandpass", fs=fs, output="sos")
                    filtered_signal = scipy.signal.sosfiltfilt(sos, signal) ## sosfiltfilt (by hamza)

                    # Segment into 1-second windows (128 samples each)
                    segmented = np.array(np.split(filtered_signal, num_windows, axis=-1))

                    # Compute DE for each window
                    de_features[subj, trial, ch, b_idx, :] = compute_de(segmented)

    return de_features

# Extract DE features
de_features = extract_de_features(subject_data)

# Print shape to confirm
print("DE Feature Shape:", de_features.shape)  # Expected: (32, 40, 40, 5, 63)

DE Feature Shape: (32, 40, 40, 5, 63)


In [5]:
## by (Hamza)
de_features = (de_features - np.mean(de_features, axis=(1, 2, 4), keepdims=True)) / np.std(de_features, axis=(1, 2, 4), keepdims=True)


In [6]:
#1. Common Feature Extractor (CFE)
class CommonFeatureExtractor(nn.Module):
    def __init__(self, input_dim=200, output_dim=64):
        super(CommonFeatureExtractor, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, output_dim)
        self.activation = nn.LeakyReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.activation(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.activation(self.bn2(self.fc2(x)))
        x = self.fc3(x)
        return x

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ContrastiveLossLcon1(nn.Module):
    def __init__(self, tau=0.2, chunk_size=128):
        super(ContrastiveLossLcon1, self).__init__()
        self.tau = tau
        self.chunk_size = chunk_size  # Controls mini-batch size for similarity computation

    def forward(self, q, labels=None):
        batch_size = q.shape[0]
        q = F.normalize(q, dim=-1)  # Normalize embeddings

        loss = 0.0
        num_chunks = (batch_size + self.chunk_size - 1) // self.chunk_size  # Compute number of chunks

        for i in range(num_chunks):
            start = i * self.chunk_size
            end = min((i + 1) * self.chunk_size, batch_size)

            q_chunk = q[start:end]  # Extract chunk of embeddings
            sim_matrix = torch.mm(q_chunk, q.T)  # Compute similarity (128, 80640)

            # Create a mask for self-similarity **only within the chunk**
            mask = torch.eye(q_chunk.shape[0], dtype=torch.bool, device=q.device)  # (128, 128)
            sim_matrix[:, start:end].masked_fill_(mask, float('-inf'))  # Only mask current chunk

            exp_sim = torch.exp(sim_matrix / self.tau)  # Compute softmax scores

            if labels is None:  # Unsupervised Contrastive Loss
                loss += -torch.log(exp_sim[:, 0] / (exp_sim.sum(dim=1) + 1e-9)).mean()
            else:  # Supervised Contrastive Loss
                mask_same_class = labels[start:end].unsqueeze(1) == labels.unsqueeze(0)
                exp_sim_same = exp_sim * mask_same_class.float()
                loss += -torch.log(exp_sim_same.sum(dim=-1) / (exp_sim.sum(dim=-1) + 1e-9)).mean()

        return loss / num_chunks  # Normalize loss across chunks


In [8]:
# 3. Subject-Specific Feature Extractor (SFE)

class SubjectSpecificFeatureExtractor(nn.Module):
    def __init__(self, input_dim=64, output_dim=32):
        super(SubjectSpecificFeatureExtractor, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)
        self.activation = nn.LeakyReLU()

    def forward(self, x):
        x = F.normalize(x, dim=-1) ## changes by (Hamza)
        return self.activation(self.fc(x))  # Output shape: (batch_size, 32)

In [9]:
#4. Maximum Mean Discrepancy (MMD) Loss
def mmd_loss(source_features, target_features):
    source_mean = source_features.mean(dim=0)
    target_mean = target_features.mean(dim=0)
    loss = torch.norm(source_mean - target_mean, p=2) ** 2  
    return loss

In [10]:
# 5. Contrastive Loss L_con2 with Class Prototypes
class ContrastiveLossLcon2(nn.Module):
    def __init__(self, feature_dim=32, num_classes=4, tau=0.3, gamma=0.5, queue_size=1024):
        super(ContrastiveLossLcon2, self).__init__()
        self.tau = tau
        self.gamma = gamma
        self.num_classes = num_classes
        self.queue_size = queue_size

        # Initialize class prototypes (μ_c)
        self.prototypes = nn.Parameter(torch.randn(num_classes, feature_dim))  # Shape: (4, 32)
        self.prototypes.data = F.normalize(self.prototypes.data, dim=-1)  # Normalize prototypes ##by(Hamza)

        # Initialize memory queue for negative samples
        self.queue = torch.randn(queue_size, feature_dim)  # Shape: (1024, 32)
        self.queue = F.normalize(self.queue, dim=-1)  # Normalize queue embeddings

    def forward(self, z_t, pseudo_labels):
        """
        Compute contrastive loss L_con2 for inter-domain alignment.
        - z_t: Target domain features (batch_size, trials, time_windows, 32)
        - pseudo_labels: Pseudo-labels for target samples (batch_size, trials, time_windows)

        Returns:
            Contrastive loss scalar
        """
        batch_size, trials, time_windows, feature_dim = z_t.shape

        # Flatten input for processing
        z_t = z_t.view(-1, feature_dim)  # Shape: (batch_size * trials * time_windows, 32)
        pseudo_labels = pseudo_labels.view(-1)  # Shape: (batch_size * trials * time_windows)

        # Normalize embeddings
        z_t = F.normalize(z_t, dim=-1)  # Normalize target embeddings
        self.prototypes.data = F.normalize(self.prototypes.data, dim=-1)  # Normalize prototypes

        # Compute similarity to class prototypes
        similarity = torch.mm(z_t, self.prototypes.T)  # Shape: (batch_size * trials * time_windows, 4)
        
        # Select correct class prototype based on pseudo-labels
        proto_sim = similarity.gather(1, pseudo_labels.unsqueeze(1))  # Shape: (batch_size * trials * time_windows, 1)

        # Compute softmax denominator (all possible embeddings)
        queue_sim = torch.mm(z_t, self.queue.T)  # Shape: (batch_size * trials * time_windows, queue_size)
        exp_sim = torch.cat([proto_sim, queue_sim], dim=1)  # Concatenate prototypes & queue
        exp_sim = torch.exp(exp_sim / self.tau)  # Apply temperature scaling

        # Contrastive loss computation
        loss = -torch.log(exp_sim[:, 0] / exp_sim.sum(dim=1))  # Only consider prototype similarity
        loss = loss.mean()

        # Update prototypes with momentum (γ)
        for i in range(self.num_classes):
            class_mask = (pseudo_labels == i).float().unsqueeze(1)  # Mask for samples of class i
            if class_mask.sum() > 0: ## changes by (Hamza)
                class_mean = (class_mask * z_t).sum(dim=0) / (class_mask.sum() + 1e-9)  # Compute class mean
                self.prototypes.data[i] = F.normalize(self.gamma * self.prototypes.data[i] + (1 - self.gamma) * class_mean, dim=-1)

        # Update memory queue (FIFO replacement)
        self.queue = torch.cat([self.queue[batch_size:], z_t.detach()], dim=0)  # Remove old, add new
        self.queue = F.normalize(self.queue, dim=-1) ## changes by (Hamza)

        return loss

In [11]:
# 6. Subject-Specific Classifier (SSC)
class SubjectSpecificClassifier(nn.Module):
    def __init__(self, input_dim=32, num_classes=4):
        super(SubjectSpecificClassifier, self).__init__()
        self.fc = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        return self.fc(x)  

In [12]:
# 7. Generalized Cross-Entropy (GCE) Loss
class GCELoss(nn.Module):
    def __init__(self, q=0.55):
        super(GCELoss, self).__init__()
        self.q = q

    def forward(self, logits, targets):
        probs = F.softmax(logits, dim=-1)
        true_probs = probs.gather(1, targets.unsqueeze(1)).squeeze()
        loss = (1 - true_probs ** self.q) / self.q
        return loss.mean()

In [13]:
de_features = torch.tensor(de_features, dtype=torch.float32)  # (32, 40, 40, 5, 63)
num_subjects, num_trials, num_channels, num_bands, num_windows = de_features.shape
de_features = de_features.view(num_subjects, num_trials, num_windows, num_channels * num_bands)  # (32, 40, 63, 200)
print("Final Shape:", de_features.shape)

Final Shape: torch.Size([32, 40, 63, 200])


In [14]:
de_features = (de_features - de_features.mean(dim=(0, 1), keepdims=True)) / (de_features.std(dim=(0, 1), keepdims=True) + 1e-9)


In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize models
cfe = CommonFeatureExtractor().to(device)
sfe = SubjectSpecificFeatureExtractor().to(device)
ssc = SubjectSpecificClassifier().to(device)
contrastive_loss_lcon1 = ContrastiveLossLcon1().to(device)
contrastive_loss_lcon2 = ContrastiveLossLcon2().to(device)
gce_loss_fn = GCELoss().to(device)

# Projection layer for MMD alignment (outside training loop)
feature_projection = torch.nn.Linear(64, 32).to(device)  # Projects CFE output to 32-dim

# Optimizer
optimizer = optim.Adam(list(cfe.parameters()) + list(sfe.parameters()) + list(ssc.parameters()), lr=0.01)

# Training settings
epochs = 50
batch_size = 32  # Matches subjects in dataset

for epoch in range(epochs):
    optimizer.zero_grad()

    # Reshape DE features for CFE input
    num_subjects, num_trials, num_windows, feature_dim = de_features.shape  # (32, 40, 63, 200)

    # Reshape into (batch_size * trials * time_windows, 200)
    reshaped_features = de_features.view(-1, feature_dim).to(device)  # Shape: (80640, 200)

    print("Final Shape:", reshaped_features.shape)  # Expected: (80640, 200)

    # Forward pass through CFE
    shared_features = cfe(reshaped_features)  # Shape: (80640, 64)
    common_loss = contrastive_loss_lcon1(shared_features)  # Keep on GPU

    # Forward pass through SFE
    subject_features = sfe(shared_features)  # Shape: (80640, 32)

    # Forward pass through SSC
    predictions = ssc(subject_features)  # Shape: (80640, 4)

    # Apply projection before MMD computation (using pre-defined layer)
    shared_features_projected = feature_projection(shared_features)  # Shape: (80640, 32)

    # Compute MMD loss (Only in Early Epochs)
    if epoch < 25:  # Matches MSCL training strategy
        mmd_value = mmd_loss(shared_features_projected, subject_features)
    else:
        mmd_value = torch.tensor(0.0, device=device)

    # Compute GCE Loss (Using actual DEAP labels)
    labels = subject_labels.view(-1).to(device)  # Shape: (80640,)
    gce_loss = gce_loss_fn(predictions, labels)

    # Reshape subject_features for L_con2
    subject_features = subject_features.view(num_subjects, num_trials, num_windows, -1)  # Shape: (32, 40, 63, 32)

    # Compute L_con2 (Using Pseudo Labels)
    pseudo_labels = torch.randint(0, 4, (num_subjects, num_trials, num_windows), device=device)  # Shape: (32, 40, 63)
    lcon2_value = contrastive_loss_lcon2(subject_features, pseudo_labels)

    # Final Loss
    total_loss = gce_loss + mmd_value + common_loss + lcon2_value

    # Backpropagation
    total_loss.backward()
    optimizer.step()

    print(f"Epoch {epoch+1}/{epochs} - Total Loss: {total_loss.item():.4f}")

print("✅ MSCL Training Complete!")


Final Shape: torch.Size([80640, 200])


OutOfMemoryError: CUDA out of memory. Tried to allocate 40.00 MiB. GPU 0 has a total capacity of 2.00 GiB of which 0 bytes is free. Of the allocated memory 9.19 GiB is allocated by PyTorch, and 18.19 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)