In [7]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import pickle

model_save_dir = r'C:\Users\User\Documents\Lie detect data\Model'
os.makedirs(model_save_dir, exist_ok=True)

# Define a function to load, cut and pad data
def load_data(data_dir, max_length):
    X = []
    y = []
    file_list = os.listdir(data_dir)
    for file in file_list:
        with open(os.path.join(data_dir, file), 'rb') as f:
            data = pickle.load(f)
        label = 1 if 'truth' in file else 0
        if data.shape[1] > max_length:
            processed_data = data[:, :max_length]  # Cut data if it exceeds max_length
        else:
            processed_data = np.zeros((data.shape[0], max_length))
            processed_data[:, :data.shape[1]] = data  # Pad data if it is shorter than max_length
        X.append(processed_data)
        y.append(label)
    return np.array(X), np.array(y)

# Load dataset and process the data
data_dir = "C:\\Users\\User\\Documents\\Lie detect data\\56M_DWTEEGData"
max_length = 1400  # Define maximum length for cutting and padding
X, y = load_data(data_dir, max_length)

# Define dataset class
class EEGDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.long)

# Define EEGNet model
class EEGNet(nn.Module):
    def __init__(self, num_classes=2):
        super(EEGNet, self).__init__()
        self.conv1 = nn.Conv1d(65, 32, kernel_size=63, padding=31)
        self.batchnorm1 = nn.BatchNorm1d(32)
        self.depthwiseConv1d = nn.Conv1d(32, 64, kernel_size=65, groups=32, padding=32)
        self.batchnorm2 = nn.BatchNorm1d(64)
        self.activation = nn.ReLU()
        
        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)  # Additional convolutional layer
        self.batchnorm3 = nn.BatchNorm1d(128)
        
        self.pooling = nn.AvgPool1d(kernel_size=4)
        self.dropout = nn.Dropout(0.8)
        
        self.global_pool = nn.AdaptiveAvgPool1d(1)  # Global average pooling
        
        self._calculate_num_features()
        self.fc = nn.Linear(self.num_features, num_classes)

    def _calculate_num_features(self):
        with torch.no_grad():
            sample_input = torch.zeros(1, 65, 1400)
            sample_output = self._forward_features(sample_input)
            self.num_features = sample_output.shape[1]

    def _forward_features(self, x):
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = self.activation(x)
        
        x = self.depthwiseConv1d(x)
        x = self.batchnorm2(x)
        x = self.activation(x)
        
        x = self.conv2(x)  # Additional convolutional layer
        x = self.batchnorm3(x)
        x = self.activation(x)
        
        x = self.pooling(x)
        x = self.dropout(x)
        x = self.global_pool(x)  # Global average pooling layer
        x = torch.flatten(x, 1)  # Flatten the output for the fully connected layer
        return x

    def forward(self, x):
        x = self._forward_features(x)
        x = self.fc(x)
        return x


# Initialize model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def train_and_evaluate(train_loader, val_loader, y_train):
    model = EEGNet(num_classes=2).to(device)
    
    class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
    class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
    
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1)

    num_epochs = 100
    best_val_loss = float('inf')
    patience = 10
    trigger_times = 0

    for epoch in range(num_epochs):
        model.train()
        running_train_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item()
        
        avg_train_loss = running_train_loss / len(train_loader)
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        scheduler.step()

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            trigger_times = 0
            fold_model_path = os.path.join(model_save_dir, f'fold3_model_fold_{fold_idx}.pth')
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'epoch': epoch,
                'val_loss': best_val_loss,
            }, fold_model_path)
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print(f'Early stopping at epoch {epoch}')
                break

        print(f'Epoch {epoch}: Train Loss: {avg_train_loss}, Validation Loss: {val_loss}')

    return model

kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_idx = 0

all_labels = []
all_predictions = []

for train_index, val_index in kf.split(X, y):
    print(f'Fold {fold_idx + 1}')

    # Split data
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # Normalize data using scaler fitted on training data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1))
    X_val = scaler.transform(X_val.reshape(X_val.shape[0], -1))
    X_train = X_train.reshape(-1, 65, max_length)
    X_val = X_val.reshape(-1, 65, max_length)

    # Save the scaler to a file
    with open(r'C:\Users\User\Documents\Lie detect data\Model\simpleEEGNet_scaler.pkl', 'wb') as f:
        pickle.dump(scaler, f)

    # Create datasets and dataloaders
    train_dataset = EEGDataset(X_train, y_train)
    val_dataset = EEGDataset(X_val, y_val)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    model = train_and_evaluate(train_loader, val_loader, y_train)

    model.eval()
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            all_labels.extend(y_batch.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    fold_idx += 1


# Calculate additional metrics
accuracy = accuracy_score(all_labels, all_predictions)
precision = precision_score(all_labels, all_predictions)
recall = recall_score(all_labels, all_predictions)
f1 = f1_score(all_labels, all_predictions)
auc = roc_auc_score(all_labels, all_predictions)
conf_matrix = confusion_matrix(all_labels, all_predictions)

print(f'Accuracy: {accuracy},Precision: {precision}, Recall: {recall}, F1-score: {f1}, AUC: {auc}')
print('Confusion Matrix:')
print(conf_matrix)


"""
weight decay = 1e-7
learning rate = 0.0001
epoch = 100
batch size = 32
early stopping patience = 10
standard scaler
ReLU
cross entropy loss
drop out = 0.8

"""

Fold 1
Epoch 0: Train Loss: 0.6569392482439677, Validation Loss: 0.6694141626358032
Epoch 1: Train Loss: 0.6232163111368815, Validation Loss: 0.6567577719688416
Epoch 2: Train Loss: 0.5966388583183289, Validation Loss: 0.641223669052124
Epoch 3: Train Loss: 0.5778760313987732, Validation Loss: 0.6257113814353943
Epoch 4: Train Loss: 0.5604292154312134, Validation Loss: 0.6116464734077454
Epoch 5: Train Loss: 0.5522220134735107, Validation Loss: 0.5995446443557739
Epoch 6: Train Loss: 0.5355083147684733, Validation Loss: 0.5892245173454285
Epoch 7: Train Loss: 0.5361021558443705, Validation Loss: 0.5808311700820923
Epoch 8: Train Loss: 0.5350726246833801, Validation Loss: 0.5731722116470337
Epoch 9: Train Loss: 0.5279630223910013, Validation Loss: 0.5679006576538086
Epoch 10: Train Loss: 0.5272724231084188, Validation Loss: 0.5510343909263611
Epoch 11: Train Loss: 0.5116100907325745, Validation Loss: 0.5325568914413452
Epoch 12: Train Loss: 0.489448885122935, Validation Loss: 0.52511537

'\nweight decay = 1e-7\nlearning rate = 0.0001\nepoch = 100\nbatch size = 32\nearly stopping patience = 10\nstandard scaler\nReLU\ncross entropy loss\ndrop out = 0.8\n\n'