In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import pickle


model_save_dir = r'C:\Users\User\Documents\Lie detect data\Model'
os.makedirs(model_save_dir, exist_ok=True)

# Define a function to load and pad data
def load_data(data_dir, max_length):
    X = []
    y = []
    file_list = os.listdir(data_dir)
    for file in file_list:
        with open(os.path.join(data_dir, file), 'rb') as f:
            data = pickle.load(f)
        label = 1 if 'truth' in file else 0
        if data.shape[1] > max_length:
            processed_data = data[:, :max_length]  # Cut data if it exceeds max_length
        else:
            processed_data = np.zeros((data.shape[0], max_length))
            processed_data[:, :data.shape[1]] = data  # Pad data if it is shorter than max_length
        X.append(processed_data)
        y.append(label)
    return np.array(X), np.array(y)

# Load dataset and pad the data
data_dir = "C:\\Users\\User\\Documents\\Lie detect data\\56M_DWTEEGData"
max_length = 500 # Define maximum length for padding
X, y = load_data(data_dir, max_length)

# Define dataset class
class EEGDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # Ensure the data is reshaped to [1, Chans, Samples]
        return torch.tensor(self.X[idx], dtype=torch.float32).unsqueeze(0), torch.tensor(self.y[idx], dtype=torch.long)

# Define EEGNet model
class EEGNet(nn.Module):
    def __init__(self, nb_classes, Chans=65, Samples=500, dropoutRate=0.5, 
                 kernLength=125, F1=8, D=2, F2=None, norm_rate=0.25, dropoutType='Dropout'):
        super(EEGNet, self).__init__()
        if F2 is None:
            F2 = F1 * D
        
        self.conv1 = nn.Conv2d(1, F1, (1, kernLength), padding='same', bias=False)
        self.batchnorm1 = nn.BatchNorm2d(F1)
        
        self.depthwiseConv = nn.Conv2d(F1, F1 * D, (Chans, 1), groups=F1, bias=False)
        self.batchnorm2 = nn.BatchNorm2d(F1 * D)
        
        self.averagePool1 = nn.AvgPool2d((1, 4))
        
        if dropoutType == 'SpatialDropout2D':
            self.dropout1 = nn.Dropout2d(dropoutRate)
        elif dropoutType == 'Dropout':
            self.dropout1 = nn.Dropout(dropoutRate)
        else:
            raise ValueError('dropoutType must be one of SpatialDropout2D or Dropout')
        
        self.separableConv1 = nn.Conv2d(F1 * D, F2, (1, 16), padding='same', bias=False)
        self.batchnorm3 = nn.BatchNorm2d(F2)
        
        self.averagePool2 = nn.AvgPool2d((1, 8))
        
        if dropoutType == 'SpatialDropout2D':
            self.dropout2 = nn.Dropout2d(dropoutRate)
        elif dropoutType == 'Dropout':
            self.dropout2 = nn.Dropout(dropoutRate)
        
        # Add more depth by adding another separable convolutional block
        self.separableConv2 = nn.Conv2d(F2, F2, (1, 16), padding='same', bias=False)
        self.batchnorm4 = nn.BatchNorm2d(F2)
        self.averagePool3 = nn.AvgPool2d((1, 8))
        
        if dropoutType == 'SpatialDropout2D':
            self.dropout3 = nn.Dropout2d(dropoutRate)
        elif dropoutType == 'Dropout':
            self.dropout3 = nn.Dropout(dropoutRate)
        
        self.flatten = nn.Flatten()
        self.dense = nn.Linear(F2 * (Samples // 256), nb_classes)
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = F.elu(x)
        
        x = self.depthwiseConv(x)
        x = self.batchnorm2(x)
        x = F.elu(x)
        x = self.averagePool1(x)
        x = self.dropout1(x)
        
        x = self.separableConv1(x)
        x = self.batchnorm3(x)
        x = F.elu(x)
        x = self.averagePool2(x)
        x = self.dropout2(x)
        
        # Pass through the additional depth layers
        x = self.separableConv2(x)
        x = self.batchnorm4(x)
        x = F.elu(x)
        x = self.averagePool3(x)
        x = self.dropout3(x)
        
        x = self.flatten(x)
        x = self.dense(x)
        x = self.softmax(x)
        
        return x


# Initialize model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def train_and_evaluate(train_loader, val_loader, y_train):
    nb_classes = 2
    model = EEGNet(nb_classes=nb_classes).to(device)
    
    class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
    class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
    
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-8)
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1)

    num_epochs = 500
    best_val_loss = float('inf')
    patience = 100
    trigger_times = 0

    for epoch in range(num_epochs):
        model.train()
        running_train_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item()
        
        avg_train_loss = running_train_loss / len(train_loader)
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        scheduler.step()

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            trigger_times = 0
            fold_model_path = os.path.join(model_save_dir, f'fold3_model_fold_{fold_idx}.pth')
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'epoch': epoch,
                'val_loss': best_val_loss,
            }, fold_model_path)
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print(f'Early stopping at epoch {epoch}')
                break

        print(f'Epoch {epoch}: Train Loss: {avg_train_loss}, Validation Loss: {val_loss}')

    return model

kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_idx = 0

all_labels = []
all_predictions = []

for train_index, val_index in kf.split(X, y):
    print(f'Fold {fold_idx + 1}')

    # Split data
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # Normalize data using scaler fitted on training data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1))
    X_val = scaler.transform(X_val.reshape(X_val.shape[0], -1))
    X_train = X_train.reshape(-1, 65, max_length)
    X_val = X_val.reshape(-1, 65, max_length)

    # Save the scaler to a file
    with open(r'C:\Users\User\Documents\Lie detect data\Model\simpleEEGNet_scaler.pkl', 'wb') as f:
        pickle.dump(scaler, f)

    # Create datasets and dataloaders
    train_dataset = EEGDataset(X_train, y_train)
    val_dataset = EEGDataset(X_val, y_val)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    model = train_and_evaluate(train_loader, val_loader, y_train)

    model.eval()
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            all_labels.extend(y_batch.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    fold_idx += 1


# Calculate additional metrics
accuracy = accuracy_score(all_labels, all_predictions)
precision = precision_score(all_labels, all_predictions)
recall = recall_score(all_labels, all_predictions)
f1 = f1_score(all_labels, all_predictions)
auc = roc_auc_score(all_labels, all_predictions)
conf_matrix = confusion_matrix(all_labels, all_predictions)

print(f'Accuracy: {accuracy},Precision: {precision}, Recall: {recall}, F1-score: {f1}, AUC: {auc}')
print('Confusion Matrix:')
print(conf_matrix)


Fold 1


  return F.conv2d(input, weight, bias, self.stride,


Epoch 0: Train Loss: 0.693996528784434, Validation Loss: 0.6935914754867554
Epoch 1: Train Loss: 0.6643290718396505, Validation Loss: 0.6902887225151062
Epoch 2: Train Loss: 0.6432450612386068, Validation Loss: 0.6884269118309021
Epoch 3: Train Loss: 0.6301511327425638, Validation Loss: 0.6905054450035095
Epoch 4: Train Loss: 0.6118823687235514, Validation Loss: 0.6926698684692383
Epoch 5: Train Loss: 0.6103557348251343, Validation Loss: 0.6950153708457947
Epoch 6: Train Loss: 0.5919282635052999, Validation Loss: 0.6973549723625183
Epoch 7: Train Loss: 0.5904016296068827, Validation Loss: 0.6991558074951172
Epoch 8: Train Loss: 0.5979834397633871, Validation Loss: 0.6993719935417175
Epoch 9: Train Loss: 0.5930612285931905, Validation Loss: 0.6999982595443726
Epoch 10: Train Loss: 0.5715248783429464, Validation Loss: 0.6943541169166565
Epoch 11: Train Loss: 0.5566069086392721, Validation Loss: 0.6972734332084656
Epoch 12: Train Loss: 0.5417172710100809, Validation Loss: 0.69524848461151