In [3]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, Subset
from sklearn.model_selection import KFold
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
import pickle
import numpy as np

# Define constants
DATA_DIR = 'C:/Users/User/Documents/Lie detect data/EEGData'
BATCH_SIZE = 32
EPOCHS = 100  # Increased to allow early stopping
LEARNING_RATE = 0.001
NUM_FOLDS = 5
PATIENCE = 20  # Early stopping patience

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Custom Dataset class for EEG data
class EEGDataset(Dataset):
    def __init__(self, data_dir):
        self.data = []
        self.labels = []
        self.load_data(data_dir)
        self.normalize_data()

    def load_data(self, data_dir):
        max_length = 0
        temp_data = []
        
        for file_name in os.listdir(data_dir):
            file_path = os.path.join(data_dir, file_name)
            with open(file_path, 'rb') as f:
                eeg_data = pickle.load(f)
                label = 1 if 'lie' in file_name else 0  # Assuming file names contain 'lie' or 'truth'
                temp_data.append((eeg_data, label))
                max_length = max(max_length, eeg_data.shape[1])

        for eeg_data, label in temp_data:
            padded_data = np.pad(eeg_data, ((0, 0), (0, max_length - eeg_data.shape[1])), mode='constant')
            self.data.append(padded_data)
            self.labels.append(label)
        
        self.data = [torch.tensor(d, dtype=torch.float32, device=device) for d in self.data]
        self.labels = torch.tensor(self.labels, dtype=torch.long, device=device)
    
    def normalize_data(self):
        all_data = torch.cat([d.unsqueeze(0) for d in self.data], dim=0)
        mean = all_data.mean()
        std = all_data.std()
        self.data = [(d - mean) / std for d in self.data]

    def augment_data(self, data):
        # Advanced augmentations: Gaussian noise, time shift, scaling
        noise = torch.randn_like(data, device=device) * 0.01
        shift = torch.roll(data, shifts=int(data.shape[1] * 0.1), dims=1)
        scale = data * (1 + 0.1 * torch.randn(1, device=device))
        return noise + shift + scale

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        data, label = self.data[idx], self.labels[idx]
        data = self.augment_data(data)  # Apply augmentation
        return data, label

# Define the EEGNet model
class EEGNet(nn.Module):
    def __init__(self, output_size):
        super(EEGNet, self).__init__()
        self.firstconv = nn.Sequential(
            nn.Conv2d(1, 16, (1, 51), padding=(0, 25)),
            nn.BatchNorm2d(16)
        )
        self.depthwiseConv = nn.Sequential(
            nn.Conv2d(16, 32, (65, 1), groups=16),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.AvgPool2d((1, 4)),
            nn.Dropout(0.5)
        )
        self.separableConv = nn.Sequential(
            nn.Conv2d(32, 32, (1, 15), padding=(0, 7)),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.AvgPool2d((1, 8)),
            nn.Dropout(0.5)
        )
        self.classify = nn.Sequential(
            nn.Flatten(),
            nn.Linear(output_size, 2)
        )

    def forward(self, x):
        x = self.firstconv(x)
        x = self.depthwiseConv(x)
        x = self.separableConv(x)
        return self.classify(x)

# Function to determine the output size of the EEGNet model before the linear layer
def get_output_size(model, shape):
    with torch.no_grad():
        x = torch.zeros(shape, device=device)
        x = model.firstconv(x)
        x = model.depthwiseConv(x)
        x = model.separableConv(x)
        return x.view(x.size(0), -1).size(1)

# Load data
dataset = EEGDataset(DATA_DIR)

# Determine the correct input size for the linear layer
dummy_input_shape = (1, 1, 65, max([d.shape[1] for d in dataset.data]))  # (batch_size, channels, height, width)
output_size = get_output_size(EEGNet(output_size=0).to(device), dummy_input_shape)

# K-Fold Cross Validation
kf = KFold(n_splits=NUM_FOLDS, shuffle=True)

final_accuracies = []
final_precisions = []
final_recalls = []
final_f1s = []
final_aucs = []
final_val_losses = []

for fold, (train_index, val_index) in enumerate(kf.split(dataset)):
    print(f'Fold {fold+1}')
    
    train_subset = Subset(dataset, train_index)
    val_subset = Subset(dataset, val_index)

    train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False)

    # Initialize model, loss function, and optimizer
    model = EEGNet(output_size=output_size).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    best_val_loss = float('inf')
    patience_counter = 0

    # Training loop
    for epoch in range(EPOCHS):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs.unsqueeze(1))
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        scheduler.step()

        # Validation
        model.eval()
        val_running_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs.unsqueeze(1))
                loss = criterion(outputs, labels)
                val_running_loss += loss.item()

        val_loss = val_running_loss / len(val_loader)

        print(f"Epoch {epoch+1}/{EPOCHS}, Train Loss: {running_loss/len(train_loader):.4f}, "
              f"Val Loss: {val_loss:.4f}")

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print(f"Early stopping at epoch {epoch+1}")
                break

    # Final evaluation
    model.eval()
    correct = 0
    total = 0
    all_labels = []
    all_predictions = []
    all_probs = []
    final_val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs.unsqueeze(1))
            loss = criterion(outputs, labels)
            final_val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())
            all_probs.extend(torch.softmax(outputs, dim=1)[:, 1].cpu().numpy())

    final_val_loss /= len(val_loader)
    final_accuracy = 100 * correct / total
    final_precision = precision_score(all_labels, all_predictions, average='macro')
    final_recall = recall_score(all_labels, all_predictions, average='macro')
    final_f1 = f1_score(all_labels, all_predictions, average='macro')
    final_auc = roc_auc_score(all_labels, all_probs)

    # Store final metrics
    final_accuracies.append(final_accuracy)
    final_precisions.append(final_precision)
    final_recalls.append(final_recall)
    final_f1s.append(final_f1)
    final_aucs.append(final_auc)
    final_val_losses.append(final_val_loss)

    print(f'Final Accuracy for fold {fold+1}: {final_accuracy:.2f}%')
    print(f'Final Validation Loss for fold {fold+1}: {final_val_loss:.4f}')
    print(f'Final Precision for fold {fold+1}: {final_precision:.2f}')
    print(f'Final Recall for fold {fold+1}: {final_recall:.2f}')
    print(f'Final F1-Score for fold {fold+1}: {final_f1:.2f}')
    print(f'Final AUC for fold {fold+1}: {final_auc:.2f}\n')

# Report average performance across all folds
print(f'Average Accuracy: {np.mean(final_accuracies):.2f}%')
print(f'Average Validation Loss: {np.mean(final_val_losses):.4f}')
print(f'Average Precision: {np.mean(final_precisions):.2f}')
print(f'Average Recall: {np.mean(final_recalls):.2f}')
print(f'Average F1-Score: {np.mean(final_f1s):.2f}')
print(f'Average AUC: {np.mean(final_aucs):.2f}')


Using device: cuda
Fold 1




Epoch 1/100, Train Loss: 0.9543, Val Loss: 0.6554
Epoch 2/100, Train Loss: 0.5493, Val Loss: 0.6562
Epoch 3/100, Train Loss: 0.5053, Val Loss: 0.6117
Epoch 4/100, Train Loss: 0.4281, Val Loss: 0.5671
Epoch 5/100, Train Loss: 0.3936, Val Loss: 0.5345
Epoch 6/100, Train Loss: 0.3801, Val Loss: 0.5372
Epoch 7/100, Train Loss: 0.3132, Val Loss: 0.5711
Epoch 8/100, Train Loss: 0.2671, Val Loss: 0.6251
Epoch 9/100, Train Loss: 0.2822, Val Loss: 0.6984
Epoch 10/100, Train Loss: 0.2001, Val Loss: 0.6743
Epoch 11/100, Train Loss: 0.2014, Val Loss: 0.6858
Epoch 12/100, Train Loss: 0.3009, Val Loss: 0.6358
Epoch 13/100, Train Loss: 0.2299, Val Loss: 0.6390
Epoch 14/100, Train Loss: 0.2435, Val Loss: 0.6486
Epoch 15/100, Train Loss: 0.2473, Val Loss: 0.5985
Epoch 16/100, Train Loss: 0.2146, Val Loss: 0.6224
Epoch 17/100, Train Loss: 0.1971, Val Loss: 0.6056
Epoch 18/100, Train Loss: 0.2941, Val Loss: 0.5794
Epoch 19/100, Train Loss: 0.5614, Val Loss: 0.5702
Epoch 20/100, Train Loss: 0.1799, Val Lo