In [1]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import os
import matplotlib.pyplot as plt
import torch.optim as optim

In [2]:
import os
import scipy.io as sio
import torch
from torch.utils.data import Dataset

class MatFileDataset(Dataset):
    def __init__(self, directory):
        self.features = []  
        self.signal = []
        self.labels = []    
        self.fractal_feature_length = None
        self._load_data(directory)
        self._validate_labels()

    def _load_data(self, directory):
        for filename in os.listdir(directory):
            if filename.endswith(".mat"):
                filepath = os.path.join(directory, filename)
                mat_data = sio.loadmat(filepath, struct_as_record=False, squeeze_me=True)

                all_window_features = mat_data.get("all_window_features")
                if all_window_features is None:
                    continue
                
                for participant_data in all_window_features:
                    if participant_data is None:
                        continue

                    
                    
                    # Get fractal feature length from first valid window
                    if self.fractal_feature_length is None:
                        for window in participant_data:

                            if hasattr(window, "Dq") and window.Dq is not None:
                                self.fractal_feature_length = len(window.Dq)
                                break

                    for window in participant_data:
                        # Extract labels first for filtering
                        before_label = getattr(window, "before_label", None)
                        after_label = getattr(window, "after_label", None)
                        
                        # Filter condition: both labels must exist and be <=3
                        if (before_label is None or 
                            after_label is None or 
                            not (0 <= before_label <= 3) or 
                            not (0 <= after_label <= 3)):
                            continue
                        
                        # Process features only for valid samples
                        flattened_features = []
                        signals = []
                        
                        if hasattr(window, "raw_window_signal") and window.raw_window_signal is not None:
                            signals.extend(window.raw_window_signal.flatten())
                        if hasattr(window, "Dq") and window.Dq is not None:
                            flattened_features.extend(window.Dq.flatten())
                        # if hasattr(window, "hq") and window.Dq is not None:
                        #     flattened_features.extend(window.Dq.flatten())
                        
                        # Store data
                        self.features.append(
                            torch.tensor(flattened_features, dtype=torch.float32).unsqueeze(1)
                        )
                        self.signal.append(
                            torch.tensor(signals, dtype=torch.float32).unsqueeze(1)
                        )
                        self.labels.append((int(before_label), int(after_label)))
    def _validate_labels(self):
        """Ensure all labels are valid integers 0-3"""
        valid_before = all(0 <= lbl[0] <= 3 for lbl in self.labels)
        valid_after = all(0 <= lbl[1] <= 3 for lbl in self.labels)
        if not (valid_before and valid_after):
            invalid = [
                (i, lbl) for i, lbl in enumerate(self.labels)
                if not (0 <= lbl[0] <=3 and 0 <= lbl[1] <=3)
            ]
            raise ValueError(f"Invalid labels found at indices: {invalid[:10]}")

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        x = {
            'fractal': self.features[idx], 
            'signal': self.signal[idx]
        }
        y = {
            "before_label": torch.tensor(self.labels[idx][0], dtype=torch.long),
            "after_label": torch.tensor(self.labels[idx][1], dtype=torch.long),
        }
        return x, y

# Example Usage
dataset = MatFileDataset("/Users/athenasaghi/VSProjects/CognitiveFatigueDetection/Prediction/window3000/")
print(f"Loaded {len(dataset)} valid samples")

Loaded 1985 valid samples


In [15]:
len(dataset.features) , len(dataset.labels)

(1985, 1985)

In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FractalFeatures(nn.Module):
    def __init__(self, input_dim=41):
        super().__init__()
        self.weight = nn.Parameter(torch.empty(1, input_dim))
        self.norm = nn.LayerNorm(input_dim)
        nn.init.normal_(self.weight, mean=0.0, std=0.02)  # Safer initialization

    def forward(self, x):
        x = x.squeeze(-1)  # [batch, features]
        return self.norm(x * self.weight)


class FractalFeaturePipeline(nn.Module):
    def __init__(self, cnn_output_dim=64, fractal_feature_length=41, num_classes=4):
        super().__init__()
        self.fractal_filter = FractalFeatures(fractal_feature_length)
        
        # CNN
        self.conv1 = nn.Conv1d(1, 16, 3, padding=1)
        self.bn1 = nn.BatchNorm1d(16)
        self.pool1 = nn.MaxPool1d(2)
        
        self.conv2 = nn.Conv1d(16, 32, 3, padding=1)
        self.bn2 = nn.BatchNorm1d(32)
        self.pool2 = nn.MaxPool1d(2)
        
        self.fc_cnn = nn.Sequential(
            nn.Linear(32 * 50, cnn_output_dim),
            nn.LayerNorm(cnn_output_dim)
        )

        # Transformer with validated dimensions
        d_model = cnn_output_dim + fractal_feature_length
        assert d_model % 4 == 0, f"d_model={d_model} must be divisible by 4 (nhead=4)"
        
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=d_model,
                nhead=4, 
                dim_feedforward=256,
                batch_first=True,
                dropout=0.1
            ),
            num_layers=2
        )
        self.output = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Linear(d_model, num_classes * 2)  # Output for two labels
        )

    def forward(self, inputs):
        # CNN branch
        raw_signal = inputs["raw_window_signal"].permute(0, 2, 1)
        x = self.pool1(F.relu(self.bn1(self.conv1(raw_signal))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        cnn_out = self.fc_cnn(x.flatten(1))  # [batch, cnn_output_dim]

        # Fractal branch
        fractal_out = self.fractal_filter(inputs["fractal_features"])  # [batch, fractal_feature_length]
        # Combined features
        combined = torch.cat([cnn_out, fractal_out], dim=1)  # [batch, cnn_output_dim + fractal_feature_length]
        combined = combined.unsqueeze(1)  # Add sequence dimension: [batch, seq_length=1, d_model]
        
        # Transformer
        transformer_out = self.transformer(combined).squeeze(1)  # [batch, d_model]
        return self.output(transformer_out).view(-1, 2, 4)  # Output for two labels


In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Dataset split
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Model, criterion, optimizer
model = FractalFeaturePipeline(cnn_output_dim=63).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0005, weight_decay=1e-5)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    total_loss, total_acc_1, total_acc_2 = 0, 0, 0
    
    for batch_idx, (x_batch, y_batch) in enumerate(train_loader):
        fractal = x_batch["fractal"].to(device).to(torch.float32)
        signal = x_batch["signal"].to(device).to(torch.float32)
        before_labels = y_batch["before_label"].to(device).long()
        after_labels = y_batch["after_label"].to(device).long()
        
        # Handle NaN and Inf
        if torch.isinf(fractal).any():
            fractal[torch.isinf(fractal)] = torch.max(fractal[~torch.isinf(fractal)])
        if torch.isnan(fractal).any():
            fractal[torch.isnan(fractal)] = torch.mean(fractal[~torch.isnan(fractal)])
        
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            outputs = model({
                "raw_window_signal": signal,
                "fractal_features": fractal
            })
        
        loss_1 = criterion(outputs[:, 0, :], before_labels) 
        loss_2 = criterion(outputs[:, 1, :], after_labels) 
        loss = loss_1 + loss_2
        
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        total_loss += loss.item()
        pred_1 = outputs[:, 0, :].argmax(dim=1)
        pred_2 = outputs[:, 1, :].argmax(dim=1)
        total_acc_1 += (pred_1 == before_labels).float().mean().item()
        total_acc_2 += (pred_2 == after_labels).float().mean().item()
        
    avg_loss = total_loss / len(train_loader)
    avg_acc_1 = total_acc_1 / len(train_loader)
    avg_acc_2 = total_acc_2 / len(train_loader)
    
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}, Acc_1: {avg_acc_1:.4f}, Acc_2: {avg_acc_2:.4f}")


RuntimeError: stack expects each tensor to be equal size, but got [3000, 1] at entry 0 and [2981, 1] at entry 29

In [414]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model, criterion, optimizer
model = FractalFeaturePipeline(cnn_output_dim=63).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)

# Online learning loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    total_loss, total_acc_1, total_acc_2 = 0, 0, 0
    
    for i in range(len(dataset)):
        x_sample, y_sample = dataset[i]
        fractal = x_sample["fractal"].unsqueeze(0).to(device).to(torch.float32)
        signal = x_sample["signal"].unsqueeze(0).to(device).to(torch.float32)
        before_label = torch.tensor([y_sample["before_label"]], device=device).long()
        after_label = torch.tensor([y_sample["after_label"]], device=device).long()
        
        # Handle NaN and Inf
        if torch.isinf(fractal).any():
            fractal[torch.isinf(fractal)] = torch.max(fractal[~torch.isinf(fractal)])
        if torch.isnan(fractal).any():
            fractal[torch.isnan(fractal)] = torch.mean(fractal[~torch.isnan(fractal)])
        
        optimizer.zero_grad()
        outputs = model({
            "raw_window_signal": signal,
            "fractal_features": fractal
        })
        
        loss_1 = criterion(outputs[:, 0, :], before_label)
        loss_2 = criterion(outputs[:, 1, :], after_label)
        loss = loss_1 + loss_2
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        pred_1 = outputs[:, 0, :].argmax(dim=1)
        pred_2 = outputs[:, 1, :].argmax(dim=1)
        total_acc_1 += (pred_1 == before_label).float().mean().item()
        total_acc_2 += (pred_2 == after_label).float().mean().item()
        
    avg_loss = total_loss / len(dataset)
    avg_acc_1 = total_acc_1 / len(dataset)
    avg_acc_2 = total_acc_2 / len(dataset)
    
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}, Acc_1: {avg_acc_1:.4f}, Acc_2: {avg_acc_2:.4f}")


Epoch 1/20, Loss: 0.7448, Acc_1: 0.8894, Acc_2: 0.8888
Epoch 2/20, Loss: 0.8599, Acc_1: 0.8717, Acc_2: 0.8701
Epoch 3/20, Loss: 0.8695, Acc_1: 0.8715, Acc_2: 0.8695
Epoch 4/20, Loss: 0.8667, Acc_1: 0.8724, Acc_2: 0.8703
Epoch 5/20, Loss: 0.8670, Acc_1: 0.8728, Acc_2: 0.8708
Epoch 6/20, Loss: 0.8854, Acc_1: 0.8685, Acc_2: 0.8665


KeyboardInterrupt: 