In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, Subset
from sklearn.model_selection import KFold
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
import pickle
import numpy as np

# Define constants
DATA_DIR = 'C:/Users/User/Documents/Lie detect data/EEGData'
BATCH_SIZE = 32
EPOCHS = 100  # Increased to allow early stopping
LEARNING_RATE = 0.001
NUM_FOLDS = 5
PATIENCE = 20  # Early stopping patience

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Custom Dataset class for EEG data
class EEGDataset(Dataset):
    def __init__(self, data_dir):
        self.data = []
        self.labels = []
        self.load_data(data_dir)
        self.normalize_data()

    def load_data(self, data_dir):
        max_length = 0
        temp_data = []
        
        for file_name in os.listdir(data_dir):
            file_path = os.path.join(data_dir, file_name)
            with open(file_path, 'rb') as f:
                eeg_data = pickle.load(f)
                label = 1 if 'lie' in file_name else 0  # Assuming file names contain 'lie' or 'truth'
                temp_data.append((eeg_data, label))
                max_length = max(max_length, eeg_data.shape[1])

        for eeg_data, label in temp_data:
            padded_data = np.pad(eeg_data, ((0, 0), (0, max_length - eeg_data.shape[1])), mode='constant')
            self.data.append(padded_data)
            self.labels.append(label)
        
        self.data = [torch.tensor(d, dtype=torch.float32, device=device) for d in self.data]
        self.labels = torch.tensor(self.labels, dtype=torch.long, device=device)
    
    def normalize_data(self):
        all_data = torch.cat([d.unsqueeze(0) for d in self.data], dim=0)
        mean = all_data.mean()
        std = all_data.std()
        self.data = [(d - mean) / std for d in self.data]

    def augment_data(self, data):
        # Advanced augmentations: Gaussian noise, time shift, scaling
        noise = torch.randn_like(data, device=device) * 0.01
        shift = torch.roll(data, shifts=int(data.shape[1] * 0.1), dims=1)
        scale = data * (1 + 0.1 * torch.randn(1, device=device))
        return noise + shift + scale

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        data, label = self.data[idx], self.labels[idx]
        data = self.augment_data(data)  # Apply augmentation
        return data, label

# Define the EEGNet model
class EEGNet(nn.Module):
    def __init__(self, output_size):
        super(EEGNet, self).__init__()
        self.firstconv = nn.Sequential(
            nn.Conv2d(1, 16, (1, 51), padding=(0, 25)),
            nn.BatchNorm2d(16)
        )
        self.depthwiseConv = nn.Sequential(
            nn.Conv2d(16, 32, (65, 1), groups=16),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.AvgPool2d((1, 4)),
            nn.Dropout(0.5)
        )
        self.separableConv = nn.Sequential(
            nn.Conv2d(32, 32, (1, 15), padding=(0, 7)),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.AvgPool2d((1, 8)),
            nn.Dropout(0.5)
        )
        self.classify = nn.Sequential(
            nn.Flatten(),
            nn.Linear(output_size, 2)
        )

    def forward(self, x):
        x = self.firstconv(x)
        x = self.depthwiseConv(x)
        x = self.separableConv(x)
        return self.classify(x)

# Function to determine the output size of the EEGNet model before the linear layer
def get_output_size(model, shape):
    with torch.no_grad():
        x = torch.zeros(shape, device=device)
        x = model.firstconv(x)
        x = model.depthwiseConv(x)
        x = model.separableConv(x)
        return x.view(x.size(0), -1).size(1)

# Load data
dataset = EEGDataset(DATA_DIR)

# Determine the correct input size for the linear layer
dummy_input_shape = (1, 1, 65, max([d.shape[1] for d in dataset.data]))  # (batch_size, channels, height, width)
output_size = get_output_size(EEGNet(output_size=0).to(device), dummy_input_shape)

# K-Fold Cross Validation
kf = KFold(n_splits=NUM_FOLDS, shuffle=True)

accuracies = []
precisions = []
recalls = []
f1s = []
aucs = []
val_losses = []  # New list to store validation losses

for fold, (train_index, val_index) in enumerate(kf.split(dataset)):
    print(f'Fold {fold+1}')
    
    train_subset = Subset(dataset, train_index)
    val_subset = Subset(dataset, val_index)

    train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False)

    # Initialize model, loss function, and optimizer
    model = EEGNet(output_size=output_size).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    best_val_accuracy = 0
    best_val_loss = float('inf')  # Track the best validation loss
    patience_counter = 0

    # Training loop
    for epoch in range(EPOCHS):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs.unsqueeze(1))  # Adding channel dimension
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        scheduler.step()

        # Validation
        model.eval()
        correct = 0
        total = 0
        val_running_loss = 0.0
        all_labels = []
        all_predictions = []
        all_probs = []
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs.unsqueeze(1))  # Adding channel dimension
                loss = criterion(outputs, labels)
                val_running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                all_labels.extend(labels.cpu().numpy())
                all_predictions.extend(predicted.cpu().numpy())
                all_probs.extend(torch.softmax(outputs, dim=1)[:, 1].cpu().numpy())

        val_loss = val_running_loss / len(val_loader)
        val_accuracy = 100 * correct / total
        val_precision = precision_score(all_labels, all_predictions, average='macro')
        val_recall = recall_score(all_labels, all_predictions, average='macro')
        val_f1 = f1_score(all_labels, all_predictions, average='macro')
        val_auc = roc_auc_score(all_labels, all_probs)

        print(f"Epoch {epoch+1}/{EPOCHS}, Train Loss: {running_loss/len(train_loader):.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%, AUC: {val_auc:.2f}")

        # Early stopping based on validation loss
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_val_accuracy = val_accuracy
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print(f"Early stopping at epoch {epoch+1}")
                break

    # Final validation metrics for the fold
    accuracies.append(best_val_accuracy)
    precisions.append(val_precision)
    recalls.append(val_recall)
    f1s.append(val_f1)
    aucs.append(val_auc)
    val_losses.append(best_val_loss)  # Store the best validation loss
    print(f'Final Accuracy for fold {fold+1}: {best_val_accuracy:.2f}%')
    print(f'Final Validation Loss for fold {fold+1}: {best_val_loss:.4f}')
    print(f'Final Precision for fold {fold+1}: {val_precision:.2f}')
    print(f'Final Recall for fold {fold+1}: {val_recall:.2f}')
    print(f'Final F1-Score for fold {fold+1}: {val_f1:.2f}')
    print(f'Final AUC for fold {fold+1}: {val_auc:.2f}\n')

# Report average performance across all folds
print(f'Average Accuracy: {np.mean(accuracies):.2f}%')
print(f'Average Validation Loss: {np.mean(val_losses):.4f}')
print(f'Average Precision: {np.mean(precisions):.2f}')
print(f'Average Recall: {np.mean(recalls):.2f}')
print(f'Average F1-Score: {np.mean(f1s):.2f}')
print(f'Average AUC: {np.mean(aucs):.2f}')


Using device: cuda




Fold 1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/100, Train Loss: 0.6135, Val Loss: 0.6449, Val Accuracy: 66.67%, AUC: 0.88
Epoch 2/100, Train Loss: 0.5394, Val Loss: 0.6637, Val Accuracy: 77.78%, AUC: 0.82
Epoch 3/100, Train Loss: 0.4395, Val Loss: 0.6634, Val Accuracy: 66.67%, AUC: 0.78
Epoch 4/100, Train Loss: 0.4361, Val Loss: 0.6242, Val Accuracy: 77.78%, AUC: 0.83
Epoch 5/100, Train Loss: 0.3701, Val Loss: 0.6401, Val Accuracy: 66.67%, AUC: 0.88
Epoch 6/100, Train Loss: 0.2852, Val Loss: 0.5843, Val Accuracy: 77.78%, AUC: 0.86
Epoch 7/100, Train Loss: 0.2941, Val Loss: 0.5884, Val Accuracy: 77.78%, AUC: 0.85
Epoch 8/100, Train Loss: 0.3256, Val Loss: 0.5863, Val Accuracy: 77.78%, AUC: 0.85
Epoch 9/100, Train Loss: 0.4286, Val Loss: 0.6228, Val Accuracy: 72.22%, AUC: 0.86
Epoch 10/100, Train Loss: 0.2801, Val Loss: 0.7864, Val Accuracy: 77.78%, AUC: 0.76
Epoch 11/100, Train Loss: 0.3033, Val Loss: 0.7607, Val Accuracy: 77.78%, AUC: 0.79
Epoch 12/100, Train Loss: 0.2318, Val Loss: 0.7312, Val Accuracy: 77.78%, AUC: 0.81
E

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 3/100, Train Loss: 0.4701, Val Loss: 0.5760, Val Accuracy: 72.22%, AUC: 0.86
Epoch 4/100, Train Loss: 0.4763, Val Loss: 0.5603, Val Accuracy: 72.22%, AUC: 0.86
Epoch 5/100, Train Loss: 0.4157, Val Loss: 0.5296, Val Accuracy: 72.22%, AUC: 0.88
Epoch 6/100, Train Loss: 0.3829, Val Loss: 0.4991, Val Accuracy: 77.78%, AUC: 0.86
Epoch 7/100, Train Loss: 0.3248, Val Loss: 0.4893, Val Accuracy: 77.78%, AUC: 0.86
Epoch 8/100, Train Loss: 0.2651, Val Loss: 0.5185, Val Accuracy: 72.22%, AUC: 0.83
Epoch 9/100, Train Loss: 0.3408, Val Loss: 0.5600, Val Accuracy: 72.22%, AUC: 0.86
Epoch 10/100, Train Loss: 0.2119, Val Loss: 0.5748, Val Accuracy: 72.22%, AUC: 0.86
Epoch 11/100, Train Loss: 0.2353, Val Loss: 0.5578, Val Accuracy: 77.78%, AUC: 0.88
Epoch 12/100, Train Loss: 0.1960, Val Loss: 0.5407, Val Accuracy: 72.22%, AUC: 0.88
Epoch 13/100, Train Loss: 0.3255, Val Loss: 0.5265, Val Accuracy: 72.22%, AUC: 0.88
Epoch 14/100, Train Loss: 0.2145, Val Loss: 0.5185, Val Accuracy: 77.78%, AUC: 0.89

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 4/100, Train Loss: 0.4761, Val Loss: 0.5080, Val Accuracy: 72.22%, AUC: 0.83
Epoch 5/100, Train Loss: 0.4462, Val Loss: 0.4228, Val Accuracy: 77.78%, AUC: 0.89
Epoch 6/100, Train Loss: 0.3850, Val Loss: 0.3801, Val Accuracy: 77.78%, AUC: 0.90
Epoch 7/100, Train Loss: 0.2851, Val Loss: 0.3579, Val Accuracy: 83.33%, AUC: 0.89
Epoch 8/100, Train Loss: 0.2731, Val Loss: 0.3273, Val Accuracy: 83.33%, AUC: 0.89
Epoch 9/100, Train Loss: 0.2677, Val Loss: 0.2909, Val Accuracy: 83.33%, AUC: 0.92
Epoch 10/100, Train Loss: 0.2425, Val Loss: 0.2725, Val Accuracy: 83.33%, AUC: 0.93
Epoch 11/100, Train Loss: 0.2477, Val Loss: 0.2729, Val Accuracy: 83.33%, AUC: 0.94
Epoch 12/100, Train Loss: 0.1886, Val Loss: 0.2979, Val Accuracy: 83.33%, AUC: 0.92
Epoch 13/100, Train Loss: 0.2348, Val Loss: 0.2818, Val Accuracy: 88.89%, AUC: 0.92
Epoch 14/100, Train Loss: 0.2178, Val Loss: 0.2994, Val Accuracy: 83.33%, AUC: 0.90
Epoch 15/100, Train Loss: 0.2147, Val Loss: 0.3013, Val Accuracy: 83.33%, AUC: 0.9

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 4/100, Train Loss: 0.4796, Val Loss: 0.6141, Val Accuracy: 55.56%, AUC: 0.70
Epoch 5/100, Train Loss: 0.3956, Val Loss: 0.5750, Val Accuracy: 72.22%, AUC: 0.77
Epoch 6/100, Train Loss: 0.3808, Val Loss: 0.5494, Val Accuracy: 66.67%, AUC: 0.81
Epoch 7/100, Train Loss: 0.3286, Val Loss: 0.5386, Val Accuracy: 66.67%, AUC: 0.83
Epoch 8/100, Train Loss: 0.3472, Val Loss: 0.5181, Val Accuracy: 72.22%, AUC: 0.88
Epoch 9/100, Train Loss: 0.2980, Val Loss: 0.5199, Val Accuracy: 72.22%, AUC: 0.86
Epoch 10/100, Train Loss: 0.2834, Val Loss: 0.5233, Val Accuracy: 66.67%, AUC: 0.84
Epoch 11/100, Train Loss: 0.2301, Val Loss: 0.5396, Val Accuracy: 72.22%, AUC: 0.84
Epoch 12/100, Train Loss: 0.3883, Val Loss: 0.5409, Val Accuracy: 72.22%, AUC: 0.84
Epoch 13/100, Train Loss: 0.2794, Val Loss: 0.5258, Val Accuracy: 72.22%, AUC: 0.86
Epoch 14/100, Train Loss: 0.2972, Val Loss: 0.5139, Val Accuracy: 72.22%, AUC: 0.86
Epoch 15/100, Train Loss: 0.3112, Val Loss: 0.5103, Val Accuracy: 72.22%, AUC: 0.8

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 2/100, Train Loss: 0.4921, Val Loss: 0.5822, Val Accuracy: 55.56%, AUC: 0.90
Epoch 3/100, Train Loss: 0.4273, Val Loss: 0.5279, Val Accuracy: 72.22%, AUC: 0.90
Epoch 4/100, Train Loss: 0.4392, Val Loss: 0.4654, Val Accuracy: 83.33%, AUC: 0.90
Epoch 5/100, Train Loss: 0.3261, Val Loss: 0.4076, Val Accuracy: 83.33%, AUC: 0.91
Epoch 6/100, Train Loss: 0.2954, Val Loss: 0.3821, Val Accuracy: 83.33%, AUC: 0.91
Epoch 7/100, Train Loss: 0.2444, Val Loss: 0.3694, Val Accuracy: 83.33%, AUC: 0.91
Epoch 8/100, Train Loss: 0.2309, Val Loss: 0.3751, Val Accuracy: 83.33%, AUC: 0.91
Epoch 9/100, Train Loss: 0.2420, Val Loss: 0.3511, Val Accuracy: 88.89%, AUC: 0.91
Epoch 10/100, Train Loss: 0.3440, Val Loss: 0.3373, Val Accuracy: 88.89%, AUC: 0.90
Epoch 11/100, Train Loss: 0.1936, Val Loss: 0.3295, Val Accuracy: 88.89%, AUC: 0.90
Epoch 12/100, Train Loss: 0.2210, Val Loss: 0.3245, Val Accuracy: 88.89%, AUC: 0.89
Epoch 13/100, Train Loss: 0.2465, Val Loss: 0.3350, Val Accuracy: 88.89%, AUC: 0.89
