In [13]:
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset, random_split
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import math
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix



# Define paths
eeg_data_dir = r'C:\Users\User\Documents\Lie detect data\AugmentedEEGData'
poly_data_dir = r'C:\Users\User\Documents\Lie detect data\CombinedPolyData'

# Set a fixed length for the polygraph signals
FIXED_LENGTH = 250  # Adjust as needed

# Function to load and preprocess EEG data
def load_eeg_data(data_dir):
    X = []
    y = []
    
    for file_name in os.listdir(data_dir):
        if file_name.endswith('.pkl'):
            file_path = os.path.join(data_dir, file_name)
            data = pd.read_pickle(file_path)
            label = 0 if 'lie' in file_name else 1
            X.append(data)
            y.extend([label] * data.shape[0])
    
    X = np.vstack(X)
    y = np.array(y)
    return X, y

# Function to load and preprocess polygraph data
def load_and_preprocess_poly_data(data_dir):
    X = []
    y = []

    for file_name in os.listdir(data_dir):
        if file_name.endswith('.pkl'):
            file_path = os.path.join(data_dir, file_name)
            data = pd.read_pickle(file_path)
            label = 0 if 'lie' in file_name else 1
            
            # Handle different lengths by truncating or padding
            if data.shape[1] > FIXED_LENGTH:
                data = data[:, :FIXED_LENGTH]  # Truncate
            elif data.shape[1] < FIXED_LENGTH:
                pad_width = FIXED_LENGTH - data.shape[1]
                data = np.pad(data, ((0, 0), (0, pad_width)), mode='constant')  # Pad with zeros
            
            X.append(data)
            y.extend([label] * data.shape[0])

    X = np.array(X)
    y = np.array(y)
    return X, y

# Load and preprocess data
eeg_X, eeg_y = load_eeg_data(eeg_data_dir)
poly_X, poly_y = load_and_preprocess_poly_data(poly_data_dir)

# Ensure both datasets have the same number of samples
n_samples = min(eeg_X.shape[0], poly_X.shape[0])
eeg_X, eeg_y = eeg_X[:n_samples], eeg_y[:n_samples]
poly_X, poly_y = poly_X[:n_samples], poly_y[:n_samples]

# Normalize EEG data
eeg_scaler = StandardScaler()
eeg_X = eeg_X.reshape(eeg_X.shape[0], -1)  # Reshape to (n_samples, n_features)
eeg_X = eeg_scaler.fit_transform(eeg_X)
eeg_X = eeg_X.reshape(-1, 65, 125)  # Reshape back to (n_samples, n_channels, n_times)

# Normalize Polygraph data
poly_scaler = StandardScaler()
poly_X = poly_X.reshape(poly_X.shape[0], -1)  # Reshape to (n_samples, n_features)
poly_X = poly_scaler.fit_transform(poly_X)
poly_X = poly_X.reshape(-1, 4, FIXED_LENGTH)  # Reshape back to (n_samples, n_channels, n_times)

# Shuffle data
eeg_indices = np.random.permutation(eeg_X.shape[0])
eeg_X, eeg_y = eeg_X[eeg_indices], eeg_y[eeg_indices]

poly_indices = np.random.permutation(poly_X.shape[0])
poly_X, poly_y = poly_X[poly_indices], poly_y[poly_indices]

# Define a custom dataset class that handles both EEG and polygraph data
class CombinedDataset(Dataset):
    def __init__(self, eeg_X, eeg_y, poly_X, poly_y):
        self.eeg_X = torch.tensor(eeg_X, dtype=torch.float32)
        self.eeg_y = torch.tensor(eeg_y, dtype=torch.long)
        self.poly_X = torch.tensor(poly_X, dtype=torch.float32)
        self.poly_y = torch.tensor(poly_y, dtype=torch.long)

    def __len__(self):
        return len(self.eeg_X)

    def __getitem__(self, idx):
        return self.eeg_X[idx], self.eeg_y[idx], self.poly_X[idx], self.poly_y[idx]

# Create dataset and data loaders
combined_dataset = CombinedDataset(eeg_X, eeg_y, poly_X, poly_y)
train_size = int(0.7 * len(combined_dataset))
val_size = int(0.15 * len(combined_dataset))
test_size = len(combined_dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(combined_dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)



# Define EEGNet model
class EEGNet(nn.Module):
    def __init__(self, num_classes=2):
        super(EEGNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, (1, 63), padding='same')
        self.batchnorm1 = nn.BatchNorm2d(16)
        self.depthwiseConv2d = nn.Conv2d(16, 32, (65, 1), groups=16, padding='same')
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.activation = nn.ELU()
        self.pooling = nn.AvgPool2d((1, 4))
        self.dropout = nn.Dropout(0.5)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(32 * 65 * 31, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = self.depthwiseConv2d(x)
        x = self.batchnorm2(x)
        x = self.activation(x)
        x = self.pooling(x)
        x = self.dropout(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

# Define PolygraphNet model
class PolygraphNet(nn.Module):
    def __init__(self, num_classes=2):
        super(PolygraphNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, (1, 4), padding='same')
        self.batchnorm1 = nn.BatchNorm2d(16)
        self.depthwiseConv2d = nn.Conv2d(16, 32, (4, 1), groups=16, padding='same')
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.activation = nn.ELU()
        self.pooling = nn.AvgPool2d((1, 4))
        self.dropout = nn.Dropout(0.5)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(32 * 4 * 62, num_classes)  # Adjust input size accordingly

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = self.depthwiseConv2d(x)
        x = self.batchnorm2(x)
        x = self.activation(x)
        x = self.pooling(x)
        x = self.dropout(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

# Define Ensemble model
class EnsembleModel(nn.Module):
    def __init__(self, eeg_model, poly_model):
        super(EnsembleModel, self).__init__()
        self.eeg_model = eeg_model
        self.poly_model = poly_model
        self.fc = nn.Linear(4, 2)  # Combine outputs of both models

    def forward(self, eeg_input, poly_input):
        eeg_output = self.eeg_model(eeg_input)
        poly_output = self.poly_model(poly_input)
        combined_output = torch.cat((eeg_output, poly_output), dim=1)
        output = self.fc(combined_output)
        return output

# Initialize the models, loss function, and optimizers
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
eeg_model = EEGNet(num_classes=2).to(device)
poly_model = PolygraphNet(num_classes=2).to(device)
ensemble_model = EnsembleModel(eeg_model, poly_model).to(device)

criterion = nn.CrossEntropyLoss()
ensemble_optimizer = optim.Adam(ensemble_model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
best_val_loss = float('inf')

for epoch in range(num_epochs):
    ensemble_model.train()
    train_loss = 0.0
    for eeg_X_batch, eeg_y_batch, poly_X_batch, poly_y_batch in train_loader:
        eeg_X_batch, eeg_y_batch = eeg_X_batch.to(device), eeg_y_batch.to(device)
        poly_X_batch, poly_y_batch = poly_X_batch.to(device), poly_y_batch.to(device)
        
        ensemble_optimizer.zero_grad()
        outputs = ensemble_model(eeg_X_batch, poly_X_batch)
        loss = criterion(outputs, eeg_y_batch)  # Assuming same labels for both modalities
        loss.backward()
        ensemble_optimizer.step()
        train_loss += loss.item()
    
    train_loss /= len(train_loader)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss}')
    
    # Evaluate on validation set
    val_loss, val_accuracy, val_precision, val_recall, val_f1, val_conf_matrix = evaluate_model(ensemble_model, val_loader, criterion)
    print(f'Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}')
    print(f'Precision: {val_precision}, Recall: {val_recall}, F1-score: {val_f1}')
    print('Confusion Matrix:')
    print(val_conf_matrix)
    
    # Save the best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_path = 'best_ensemble_model.pth'
        torch.save(ensemble_model.state_dict(), best_model_path)

# Evaluate on test set
test_loss, test_accuracy, test_precision, test_recall, test_f1, test_conf_matrix = evaluate_model(ensemble_model, test_loader, criterion)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')
print(f'Precision: {test_precision}, Recall: {test_recall}, F1-score: {test_f1}')
print('Confusion Matrix:')
print(test_conf_matrix)

def evaluate_model(model, data_loader, criterion):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for eeg_X_batch, eeg_y_batch, poly_X_batch, poly_y_batch in data_loader:
            eeg_X_batch, eeg_y_batch = eeg_X_batch.to(device), eeg_y_batch.to(device)
            poly_X_batch, poly_y_batch = poly_X_batch.to(device), poly_y_batch.to(device)
            
            outputs = model(eeg_X_batch, poly_X_batch)
            loss = criterion(outputs, eeg_y_batch)  # Assuming same labels for both modalities
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += eeg_y_batch.size(0)
            correct += (predicted == eeg_y_batch).sum().item()
            
            all_labels.extend(eeg_y_batch.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    total_loss /= len(data_loader)
    accuracy = correct / total
    precision = precision_score(all_labels, all_predictions, average='binary', zero_division=1)
    recall = recall_score(all_labels, all_predictions, average='binary', zero_division=1)
    f1 = f1_score(all_labels, all_predictions, average='binary', zero_division=1)
    conf_matrix = confusion_matrix(all_labels, all_predictions, labels=[0, 1])

    return total_loss, accuracy, precision, recall, f1, conf_matrix
num_epochs = 100
best_val_loss = float('inf')

for epoch in range(num_epochs):
    ensemble_model.train()
    train_loss = 0.0
    for eeg_X_batch, eeg_y_batch, poly_X_batch, poly_y_batch in train_loader:
        eeg_X_batch, eeg_y_batch = eeg_X_batch.to(device), eeg_y_batch.to(device)
        poly_X_batch, poly_y_batch = poly_X_batch.to(device), poly_y_batch.to(device)
        
        ensemble_optimizer.zero_grad()
        outputs = ensemble_model(eeg_X_batch, poly_X_batch)
        loss = criterion(outputs, eeg_y_batch)  # Assuming same labels for both modalities
        loss.backward()
        ensemble_optimizer.step()
        train_loss += loss.item()
    
    train_loss /= len(train_loader)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss}')
    
    # Evaluate on validation set
    val_loss, val_accuracy, val_precision, val_recall, val_f1, val_conf_matrix = evaluate_model(ensemble_model, val_loader, criterion)
    print(f'Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}')
    print(f'Precision: {val_precision}, Recall: {val_recall}, F1-score: {val_f1}')
    print('Confusion Matrix:')
    print(val_conf_matrix)
    
    # Save the best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_path = 'best_ensemble_model.pth'
        torch.save(ensemble_model.state_dict(), best_model_path)

# Evaluate on test set
test_loss, test_accuracy, test_precision, test_recall, test_f1, test_conf_matrix = evaluate_model(ensemble_model, test_loader, criterion)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')
print(f'Precision: {test_precision}, Recall: {test_recall}, F1-score: {test_f1}')
print('Confusion Matrix:')
print(test_conf_matrix)



Epoch 1/100, Loss: 0.33768240362405777
Validation Loss: 0.39915546774864197, Validation Accuracy: 0.9230769230769231
Precision: 0.0, Recall: 1.0, F1-score: 0.0
Confusion Matrix:
[[12  1]
 [ 0  0]]
Epoch 2/100, Loss: 0.006093421077821404
Validation Loss: 0.20346347987651825, Validation Accuracy: 1.0
Precision: 1.0, Recall: 1.0, F1-score: 1.0
Confusion Matrix:
[[13  0]
 [ 0  0]]
Epoch 3/100, Loss: 0.0003505947533994913
Validation Loss: 0.1051301583647728, Validation Accuracy: 1.0
Precision: 1.0, Recall: 1.0, F1-score: 1.0
Confusion Matrix:
[[13  0]
 [ 0  0]]
Epoch 4/100, Loss: 7.43446325941477e-05
Validation Loss: 0.05544733256101608, Validation Accuracy: 1.0
Precision: 1.0, Recall: 1.0, F1-score: 1.0
Confusion Matrix:
[[13  0]
 [ 0  0]]
Epoch 5/100, Loss: 2.896267415053444e-05
Validation Loss: 0.0279568862169981, Validation Accuracy: 1.0
Precision: 1.0, Recall: 1.0, F1-score: 1.0
Confusion Matrix:
[[13  0]
 [ 0  0]]
Epoch 6/100, Loss: 1.8920603906735778e-05
Validation Loss: 0.0131017453