In [6]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from scipy.signal import butter, filtfilt
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from sklearn.utils.class_weight import compute_class_weight

# Directory containing data files
data_dir = r'C:\Users\User\Documents\Lie detect data\56M_AugmentedEEGData'
model_save_dir = r'C:\Users\User\Documents\Lie detect data\Model'
os.makedirs(model_save_dir, exist_ok=True)

# Function to load and label data
def load_data(data_dir):
    X = []
    y = []
    groups = []
    
    for idx, file_name in enumerate(os.listdir(data_dir)):
        if file_name.endswith('.pkl'):
            file_path = os.path.join(data_dir, file_name)
            data = pd.read_pickle(file_path)
            label = 0 if 'lie' in file_name else 1
            X.append(data)
            y.extend([label] * data.shape[0])
            groups.extend([idx] * data.shape[0])  # Use file index as group label
    
    X = np.vstack(X)
    y = np.array(y)
    groups = np.array(groups)
    return X, y, groups


# Define dataset class
class EEGDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class EnhancedEEGNet(nn.Module):
    def __init__(self, num_classes=2):
        super(EnhancedEEGNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, (1, 63), padding='same')
        self.batchnorm1 = nn.BatchNorm2d(16)
        self.depthwiseConv = nn.Conv2d(16, 32, (65, 1), groups=16, padding='same')
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.separableConv = nn.Conv2d(32, 64, (1, 16), padding='same')
        self.batchnorm3 = nn.BatchNorm2d(64)
        self.activation = nn.LeakyReLU()
        self.avgPool = nn.AvgPool2d((1, 4))
        self.dropout = nn.Dropout(0.7)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(64 * 65 * 15, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = self.depthwiseConv(x)
        x = self.batchnorm2(x)
        x = self.activation(x)
        x = self.avgPool(x)
        x = self.separableConv(x)
        x = self.batchnorm3(x)
        x = self.activation(x)
        x = self.avgPool(x)
        x = self.dropout(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

def train_and_evaluate(train_loader, val_loader, model, criterion, optimizer, scheduler, device, num_epochs=100, patience=20):
    best_val_loss = float('inf')
    trigger_times = 0
    
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += y_batch.size(0)
                correct += (predicted == y_batch).sum().item()

        val_loss /= len(val_loader)
        val_accuracy = correct / total
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')
        
        scheduler.step()
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            trigger_times = 0
            torch.save(model.state_dict(), r'C:\Users\User\Documents\Lie detect data\Model\fold3_model.pth')
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print('Early stopping!')
                break
    
    return model

def normalize_eeg_data(X_train, X_val):
    # Assuming X_train and X_val shapes are (samples, channels, time_points)
    n_channels = X_train.shape[1]
    
    # Initialize a StandardScaler for each channel
    scalers = [StandardScaler() for _ in range(n_channels)]
    
    # Normalize each channel independently
    for i in range(n_channels):
        X_train[:, i, :] = scalers[i].fit_transform(X_train[:, i, :])
        X_val[:, i, :] = scalers[i].transform(X_val[:, i, :])
    
    return X_train, X_val

def main():
    X, y, groups = load_data(data_dir)
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    kf = GroupKFold(n_splits=5)
    all_labels = []
    all_predictions = []
    
    for fold, (train_index, val_index) in enumerate(kf.split(X, y, groups)):
        print(f'Fold {fold + 1}')
        
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]

        # Data Normalization
        X_train, X_val = normalize_eeg_data(X_train, X_val)
        
        train_dataset = EEGDataset(X_train, y_train)
        val_dataset = EEGDataset(X_val, y_val)
        
        train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
        
        model = EnhancedEEGNet().to(device)
        
        class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
        class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
        
        criterion = nn.CrossEntropyLoss(weight=class_weights)
        optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
        
        model = train_and_evaluate(train_loader, val_loader, model, criterion, optimizer, scheduler, device)
        
        model.eval()
        predictions = []
        with torch.no_grad():
            for X_batch, _ in val_loader:
                X_batch = X_batch.to(device)
                outputs = model(X_batch)
                _, predicted = torch.max(outputs, 1)
                predictions.extend(predicted.cpu().numpy())
        
        all_labels.extend(y_val)
        all_predictions.extend(predictions)
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_predictions)
    precision = precision_score(all_labels, all_predictions, average='weighted')
    recall = recall_score(all_labels, all_predictions, average='weighted')
    f1 = f1_score(all_labels, all_predictions, average='weighted')
    auc = roc_auc_score(all_labels, all_predictions, average='weighted')
    conf_matrix = confusion_matrix(all_labels, all_predictions)
    
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1-score: {f1:.4f}')
    print(f'AUC: {auc:.4f}')
    print('Confusion Matrix:')
    print(conf_matrix)

if __name__ == '__main__':
    main()

ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 625 and the array at index 1 has size 791