In [3]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
import pickle
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, accuracy_score


# Define a function to load and pad data
def load_data(data_dir, max_length):
    X = []
    y = []
    file_list = os.listdir(data_dir)
    for file in file_list:
        with open(os.path.join(data_dir, file), 'rb') as f:
            data = pickle.load(f)
        label = 0 if 'truth' in file else 1
        padded_data = np.zeros((65, max_length))
        length = min(data.shape[1], max_length)
        padded_data[:, :length] = data[:, :length]
        X.append(padded_data)
        y.append(label)
    return np.array(X), np.array(y)

# Load dataset and pad the data
data_dir = "C:\\Users\\User\\Documents\\Lie detect data\\56M_DWTEEGData"
max_length = 1400  # Define maximum length for padding
X, y = load_data(data_dir, max_length)

# Define dataset class
class EEGDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.long)

# Define EEG model
class EnhancedEEGNet(nn.Module):
    def __init__(self, num_classes=2):
        super(EnhancedEEGNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, (1, 63), padding='same')
        self.batchnorm1 = nn.BatchNorm2d(16)
        self.depthwiseConv = nn.Conv2d(16, 32, (65, 1), groups=16, padding='same')
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.separableConv = nn.Conv2d(32, 64, (1, 16), padding='same')
        self.batchnorm3 = nn.BatchNorm2d(64)
        self.activation = nn.LeakyReLU()
        self.avgPool = nn.AvgPool2d((1, 4))
        self.dropout = nn.Dropout(0.7)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(64 * 65 * 15, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = self.depthwiseConv(x)
        x = self.batchnorm2(x)
        x = self.activation(x)
        x = self.avgPool(x)
        x = self.separableConv(x)
        x = self.batchnorm3(x)
        x = self.activation(x)
        x = self.avgPool(x)
        x = self.dropout(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

# Initialize model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def train_and_evaluate(train_loader, val_loader, y_train):
    model = EEGNet(num_classes=2).to(device)
    
    class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
    class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
    
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1)

    num_epochs = 100
    best_val_loss = float('inf')
    patience = 20
    trigger_times = 0

    for epoch in range(num_epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        scheduler.step()

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            trigger_times = 0
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print(f'Early stopping at epoch {epoch}')
                break

        print(f'Epoch {epoch}: Train Loss: {loss.item()}, Validation Loss: {val_loss}')

    return model

kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_idx = 0

all_labels = []
all_predictions = []

for train_index, val_index in kf.split(X, y):
    print(f'Fold {fold_idx + 1}')

    # Split data
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # Normalize data
    scaler = MinMaxScaler()
    X_train = X_train.reshape(X_train.shape[0], -1)
    X_train = scaler.fit_transform(X_train)
    X_train = X_train.reshape(-1, 65, max_length)

    X_val = X_val.reshape(X_val.shape[0], -1)
    X_val = scaler.transform(X_val)
    X_val = X_val.reshape(-1, 65, max_length)

    # Create datasets
    train_dataset = EEGDataset(X_train, y_train)
    val_dataset = EEGDataset(X_val, y_val)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    model = train_and_evaluate(train_loader, val_loader, y_train)

    model.eval()
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            all_labels.extend(y_batch.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    fold_idx += 1


# Calculate additional metrics
accuracy = accuracy_score(all_labels, all_predictions)
precision = precision_score(all_labels, all_predictions)
recall = recall_score(all_labels, all_predictions)
f1 = f1_score(all_labels, all_predictions)
auc = roc_auc_score(all_labels, all_predictions)
conf_matrix = confusion_matrix(all_labels, all_predictions)

print(f'Accuracy: {accuracy},Precision: {precision}, Recall: {recall}, F1-score: {f1}, AUC: {auc}')
print('Confusion Matrix:')
print(conf_matrix)

Fold 1
Epoch 0: Train Loss: 0.675963282585144, Validation Loss: 0.675963282585144
Epoch 1: Train Loss: 0.6797987222671509, Validation Loss: 0.6797987222671509
Epoch 2: Train Loss: 0.664971113204956, Validation Loss: 0.664971113204956
Epoch 3: Train Loss: 0.6679155230522156, Validation Loss: 0.6679155230522156
Epoch 4: Train Loss: 0.6694998145103455, Validation Loss: 0.6694998145103455
Epoch 5: Train Loss: 0.668910026550293, Validation Loss: 0.668910026550293
Epoch 6: Train Loss: 0.6596152186393738, Validation Loss: 0.6596152186393738
Epoch 7: Train Loss: 0.6534891724586487, Validation Loss: 0.6534891724586487
Epoch 8: Train Loss: 0.6464304327964783, Validation Loss: 0.6464304327964783
Epoch 9: Train Loss: 0.6408336758613586, Validation Loss: 0.6408336758613586
Epoch 10: Train Loss: 0.6090766787528992, Validation Loss: 0.6090766787528992
Epoch 11: Train Loss: 0.593525767326355, Validation Loss: 0.593525767326355
Epoch 12: Train Loss: 0.5923876762390137, Validation Loss: 0.59238767623901