In [1]:
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset, Subset
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import KFold
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Directory containing data files
data_dir = r'C:\Users\User\Documents\Lie detect data\AugmentedEEGData'
model_save_dir = r'C:\Users\User\Documents\Lie detect data\Model'
os.makedirs(model_save_dir, exist_ok=True)

# Function to load and label data
def load_data(data_dir):
    X = []
    y = []
    
    for file_name in os.listdir(data_dir):
        if file_name.endswith('.pkl'):
            file_path = os.path.join(data_dir, file_name)
            data = pd.read_pickle(file_path)
            label = 0 if 'lie' in file_name else 1
            X.append(data)
            y.extend([label] * data.shape[0])
    
    X = np.vstack(X)
    y = np.array(y)
    return X, y

# Load and label data
X, y = load_data(data_dir)

# Normalize data
scaler = StandardScaler()
X = X.reshape(X.shape[0], -1)  # Reshape to (n_samples, n_features)
X = scaler.fit_transform(X)
X = X.reshape(-1, 65, 125)  # Reshape back to (n_samples, n_channels, n_times)

# Shuffle data
indices = np.random.permutation(X.shape[0])
X, y = X[indices], y[indices]

# Define dataset class
class EEGDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create dataset
dataset = EEGDataset(X, y)

# Define EEGNet model
class EEGNet(nn.Module):
    def __init__(self, num_classes=2):
        super(EEGNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, (1, 63), padding='same')  # Changed (1, 64) to (1, 63)
        self.batchnorm1 = nn.BatchNorm2d(16)
        self.depthwiseConv2d = nn.Conv2d(16, 32, (65, 1), groups=16, padding='same')
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.activation = nn.ELU()
        self.pooling = nn.AvgPool2d((1, 4))
        self.dropout = nn.Dropout(0.5)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(32 * 65 * 31, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = self.depthwiseConv2d(x)
        x = self.batchnorm2(x)
        x = self.activation(x)
        x = self.pooling(x)
        x = self.dropout(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

# Initialize model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def train_and_evaluate(train_loader, val_loader, fold_idx):
    model = EEGNet(num_classes=2).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

    num_epochs = 100
    best_val_loss = float('inf')
    patience = 10
    trigger_times = 0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += y_batch.size(0)
                correct += (predicted == y_batch).sum().item()

        val_loss /= len(val_loader)
        val_accuracy = correct / total
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss}, Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}')
        
        scheduler.step(val_loss)

        
        # Early Stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            trigger_times = 0
            fold_model_path = os.path.join(model_save_dir, f'model_fold_{fold_idx}.pth')
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'epoch': epoch,
                'val_loss': best_val_loss,
            }, fold_model_path)
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print('Early stopping!')
                break

    return model

# K-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
all_labels = []
all_predictions = []
fold_idx = 1

for train_index, val_index in kf.split(dataset):
    print(f'Fold {fold_idx}')
    train_subset = Subset(dataset, train_index)
    val_subset = Subset(dataset, val_index)

    train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)

    model = train_and_evaluate(train_loader, val_loader, fold_idx)

    model.eval()
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            all_labels.extend(y_batch.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    fold_idx += 1

# Calculate additional metrics
precision = precision_score(all_labels, all_predictions)
recall = recall_score(all_labels, all_predictions)
f1 = f1_score(all_labels, all_predictions)
auc = roc_auc_score(all_labels, all_predictions)
conf_matrix = confusion_matrix(all_labels, all_predictions)

print(f'Precision: {precision}, Recall: {recall}, F1-score: {f1}, AUC: {auc}')
print('Confusion Matrix:')
print(conf_matrix)

Fold 1
Epoch 1/100, Loss: 1.230060080687205, Validation Loss: 0.6744818563262621, Validation Accuracy: 0.8021390374331551
Epoch 2/100, Loss: 0.6352790612727404, Validation Loss: 2.087798945605755, Validation Accuracy: 0.8021390374331551
Epoch 3/100, Loss: 0.9822063148021698, Validation Loss: 0.8253769973913828, Validation Accuracy: 0.8449197860962567
Epoch 4/100, Loss: 0.6537043145536169, Validation Loss: 0.6723188397785028, Validation Accuracy: 0.893048128342246
Epoch 5/100, Loss: 0.42483580506329116, Validation Loss: 0.603432851533095, Validation Accuracy: 0.8823529411764706
Epoch 6/100, Loss: 0.31402360927313566, Validation Loss: 0.7337136800245693, Validation Accuracy: 0.8983957219251337
Epoch 7/100, Loss: 0.47109288176094805, Validation Loss: 0.5130029233793417, Validation Accuracy: 0.9197860962566845
Epoch 8/100, Loss: 0.31460323103237897, Validation Loss: 0.5028753137060752, Validation Accuracy: 0.8983957219251337
Epoch 9/100, Loss: 0.17682129903308427, Validation Loss: 0.404474