In [22]:
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler
import torch.nn.functional as F 

# Constants
EEG_DATA_DIR = r'C:\Users\User\Documents\Lie detect data\6M_AugmentedEEGData'
POLY_DATA_DIR = r'C:\Users\User\Documents\Lie detect data\6M_CombinedPolyData'

def pad_sequence(sequence, target_length):
    """Pad the sequence to the target length."""
    pad_length = target_length - sequence.shape[1]
    if pad_length > 0:
        return np.pad(sequence, ((0, 0), (0, pad_length)), mode='constant')
    else:
        return sequence[:, :target_length]

def load_eeg_data(data_dir):
    X, y = [], []
    lie_count, truth_count = 0, 0
    file_sample_counts = []
    for file_name in os.listdir(data_dir):
        if file_name.endswith('.pkl'):
            file_path = os.path.join(data_dir, file_name)
            try:
                data = pd.read_pickle(file_path)
                print(f"EEG File: {file_name}, Shape: {data.shape}, Type: {type(data)}")
                label = 0 if 'lie' in file_name.lower() else 1
                X.extend(data)
                y.extend([label] * data.shape[0])
                file_sample_counts.append(data.shape[0])
                if label == 0:
                    lie_count += data.shape[0]
                else:
                    truth_count += data.shape[0]
            except Exception as e:
                print(f"Error loading EEG file {file_name}: {str(e)}")
    print(f"Loaded from EEG {data_dir}: {lie_count} lie samples, {truth_count} truth samples")
    return np.array(X), np.array(y), file_sample_counts

def load_poly_data(data_dir):
    X, y = [], []
    lie_count, truth_count = 0, 0
    max_length = 0
    for file_name in os.listdir(data_dir):
        if file_name.endswith('.pkl'):
            file_path = os.path.join(data_dir, file_name)
            try:
                data = pd.read_pickle(file_path)
                print(f"Poly File: {file_name}, Shape: {data.shape}, Type: {type(data)}")
                max_length = max(max_length, data.shape[1])
                label = 0 if 'lie' in file_name.lower() else 1
                X.append(data)
                y.append(label)
                if label == 0:
                    lie_count += 1
                else:
                    truth_count += 1
            except Exception as e:
                print(f"Error loading Poly file {file_name}: {str(e)}")
    print(f"Loaded from Poly {data_dir}: {lie_count} lie samples, {truth_count} truth samples")
    
    # Pad all poly samples to the maximum length
    X_padded = np.array([pad_sequence(x, max_length) for x in X])
    return X_padded, np.array(y)

class CombinedDataset(Dataset):
    def __init__(self, eeg_X, eeg_y, poly_X, poly_y, file_sample_counts):
        self.eeg_X = torch.tensor(eeg_X, dtype=torch.float32)
        self.eeg_y = torch.tensor(eeg_y, dtype=torch.long)
        self.poly_X = torch.tensor(poly_X, dtype=torch.float32)
        self.poly_y = torch.tensor(poly_y, dtype=torch.long)
        
        # Create a mapping from EEG sample index to Poly file index
        self.eeg_to_poly_map = []
        poly_index = 0
        for count in file_sample_counts:
            self.eeg_to_poly_map.extend([poly_index] * count)
            poly_index += 1

    def __len__(self):
        return len(self.eeg_X)

    def __getitem__(self, idx):
        eeg_sample = self.eeg_X[idx]
        eeg_label = self.eeg_y[idx]
        poly_idx = self.eeg_to_poly_map[idx]
        poly_sample = self.poly_X[poly_idx]
        
        # Pad the poly sample to 4475 time steps if it's shorter
        if poly_sample.shape[1] < 4475:
            poly_sample = F.pad(poly_sample, (0, 4475 - poly_sample.shape[1]))
        
        poly_label = self.poly_y[poly_idx]
        
        return eeg_sample, eeg_label, poly_sample, poly_label

# Load data
print("Loading EEG data...")
eeg_X, eeg_y, file_sample_counts = load_eeg_data(EEG_DATA_DIR)
print("Loading Poly data...")
poly_X, poly_y = load_poly_data(POLY_DATA_DIR)

# Check for class imbalance
unique, counts = np.unique(eeg_y, return_counts=True)
print("EEG Class distribution:", dict(zip(unique, counts)))
unique, counts = np.unique(poly_y, return_counts=True)
print("Poly Class distribution:", dict(zip(unique, counts)))

# Normalize EEG data
eeg_scaler = StandardScaler()
eeg_X = eeg_scaler.fit_transform(eeg_X.reshape(-1, eeg_X.shape[-1])).reshape(eeg_X.shape)

# Normalize Poly data
poly_scaler = StandardScaler()
poly_X = poly_scaler.fit_transform(poly_X.reshape(-1, poly_X.shape[-1])).reshape(poly_X.shape)

# Create full dataset
full_dataset = CombinedDataset(eeg_X, eeg_y, poly_X, poly_y, file_sample_counts)

# Print dataset size and shapes
print("Total number of samples:", len(full_dataset))
print("EEG data shape:", eeg_X.shape)
print("Poly data shape:", poly_X.shape)


# Example of how to use the dataset with DataLoader
train_loader = DataLoader(full_dataset, batch_size=32, shuffle=True)

# Print a sample batch and verify alignment
for eeg_batch, eeg_labels, poly_batch, poly_labels in train_loader:
    print("EEG batch shape:", eeg_batch.shape)
    print("EEG labels shape:", eeg_labels.shape)
    print("Poly batch shape:", poly_batch.shape)
    print("Poly labels shape:", poly_labels.shape)
        
    # Verify label alignment
    print("Labels match:", torch.all(eeg_labels == poly_labels))
        
    # Print a few sample indices and their corresponding data
    for i in range(5):  # Print first 5 samples in the batch
        eeg_sample = eeg_batch[i]
        poly_sample = poly_batch[i]
        print(f"Sample {i}:")
        print(f"  EEG label: {eeg_labels[i].item()}, Poly label: {poly_labels[i].item()}")
        print(f"  EEG data (first 5 values): {eeg_sample[0, :5]}")
        print(f"  Poly data (first 5 values): {poly_sample[0, :5]}")
        
    break  # Just print the first batch



Loading EEG data...
EEG File: augmented_lie_1.pkl, Shape: (6, 65, 125), Type: <class 'numpy.ndarray'>
EEG File: augmented_lie_10.pkl, Shape: (11, 65, 125), Type: <class 'numpy.ndarray'>
EEG File: augmented_lie_11.pkl, Shape: (17, 65, 125), Type: <class 'numpy.ndarray'>
EEG File: augmented_lie_12.pkl, Shape: (13, 65, 125), Type: <class 'numpy.ndarray'>
EEG File: augmented_lie_13.pkl, Shape: (13, 65, 125), Type: <class 'numpy.ndarray'>
EEG File: augmented_lie_14.pkl, Shape: (15, 65, 125), Type: <class 'numpy.ndarray'>
EEG File: augmented_lie_15.pkl, Shape: (5, 65, 125), Type: <class 'numpy.ndarray'>
EEG File: augmented_lie_16.pkl, Shape: (4, 65, 125), Type: <class 'numpy.ndarray'>
EEG File: augmented_lie_17.pkl, Shape: (6, 65, 125), Type: <class 'numpy.ndarray'>
EEG File: augmented_lie_18.pkl, Shape: (4, 65, 125), Type: <class 'numpy.ndarray'>
EEG File: augmented_lie_19.pkl, Shape: (5, 65, 125), Type: <class 'numpy.ndarray'>
EEG File: augmented_lie_2.pkl, Shape: (6, 65, 125), Type: <clas

In [1]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Define the model architectures (these should be identical to your training code)
class EEGNet(nn.Module):
    def __init__(self, num_classes=2):
        super(EEGNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, (1, 51), padding='same')
        self.batchnorm1 = nn.BatchNorm2d(16)
        self.depthwiseConv2d = nn.Conv2d(16, 32, (65, 1), groups=16, padding='same')
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.activation = nn.ELU()
        self.pooling = nn.AvgPool2d((1, 4))
        self.dropout = nn.Dropout(0.5)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(32 * 65 * 31, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = self.depthwiseConv2d(x)
        x = self.batchnorm2(x)
        x = self.activation(x)
        x = self.pooling(x)
        x = self.dropout(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

class PolygraphNet(nn.Module):
    def __init__(self, num_classes=2):
        super(PolygraphNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, (1, 51), padding='same')
        self.batchnorm1 = nn.BatchNorm2d(16)
        self.depthwiseConv2d = nn.Conv2d(16, 32, (4, 1), groups=16, padding='same')
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.activation = nn.ELU()
        self.pooling = nn.AvgPool2d((1, 4))
        self.dropout = nn.Dropout(0.5)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(32 * 4 * 1118, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = self.depthwiseConv2d(x)
        x = self.batchnorm2(x)
        x = self.activation(x)
        x = self.pooling(x)
        x = self.dropout(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

class EnsembleModel(nn.Module):
    def __init__(self, eeg_model, poly_model):
        super(EnsembleModel, self).__init__()
        self.eeg_model = eeg_model
        self.poly_model = poly_model
        self.fc = nn.Linear(4, 2)

    def forward(self, eeg_input, poly_input):
        eeg_output = self.eeg_model(eeg_input)
        poly_output = self.poly_model(poly_input)
        combined_output = torch.cat((eeg_output, poly_output), dim=1)
        output = self.fc(combined_output)
        return output

# Load the state dictionary
model_path = r'C:\Users\User\Documents\Lie detect data\Ipynb\ensemble_model_fold_3.pth'
loaded_dict = torch.load(model_path, map_location=torch.device('cpu'))

# Instantiate the models
eeg_model = EEGNet()
poly_model = PolygraphNet()
ensemble_model = EnsembleModel(eeg_model, poly_model)

# Load state dictionaries
ensemble_model.load_state_dict(loaded_dict['model_state_dict'])
ensemble_model.eval()

# Move the model to the appropriate device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ensemble_model.to(device)

# Create the dataset and DataLoader (assuming CombinedDataset is defined somewhere)
test_dataset = CombinedDataset(eeg_X, eeg_y, poly_X, poly_y, file_sample_counts)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

all_labels = []
all_predictions = []

with torch.no_grad():
    for eeg_X_batch, eeg_y_batch, poly_X_batch, poly_y_batch in test_loader:
        print(f"EEG batch shape: {eeg_X_batch.shape}")
        print(f"Poly batch shape: {poly_X_batch.shape}")
        
        eeg_X_batch, poly_X_batch = eeg_X_batch.to(device), poly_X_batch.to(device)
        labels = eeg_y_batch.to(device)
        
        outputs = ensemble_model(eeg_X_batch, poly_X_batch)
        _, predicted = torch.max(outputs.data, 1)
        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())

# Convert lists to numpy arrays for metric calculations
all_labels = np.array(all_labels)
all_predictions = np.array(all_predictions)

# Calculate metrics
accuracy = accuracy_score(all_labels, all_predictions)
precision = precision_score(all_labels, all_predictions, average='binary')
recall = recall_score(all_labels, all_predictions, average='binary')
f1 = f1_score(all_labels, all_predictions, average='binary')
conf_matrix = confusion_matrix(all_labels, all_predictions)


# Print the metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print('Confusion Matrix:')
print(conf_matrix)




FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\User\\Documents\\Lie detect data\\Ipynb\\ensemble_model_fold_1.pth'