# Apply DWT

In [6]:
import os
import pickle
import numpy as np
import pywt
from sklearn.preprocessing import StandardScaler

# Directory containing the data files
data_dir = r'C:\Users\User\Documents\Lie detect data\EEGData'

def pad_or_truncate(array, target_length):
    """Pad or truncate an array to reach a specific length."""
    if len(array) > target_length:
        return array[:target_length]
    else:
        return np.pad(array, (0, target_length - len(array)), mode='constant')

def apply_dwt(eeg_signal, wavelet='db4', level=4, target_length=1300):
    coeffs = pywt.wavedec(eeg_signal, wavelet, level=level)
    dwt_features = np.concatenate(coeffs)
    dwt_features = pad_or_truncate(dwt_features, target_length)
    return dwt_features

def preprocess_data(eeg_data, wavelet='db4', level=4, seq_length=1300):
    preprocessed_data = []
    for signal in eeg_data:
        if signal.ndim == 1:
            signal_dwt = apply_dwt(signal, wavelet, level, seq_length)
        else:
            signal_dwt = np.array([apply_dwt(channel, wavelet, level, seq_length) for channel in signal])
        preprocessed_data.append(signal_dwt.T)  # Transpose to ensure shape is (seq_length, channels)
    return np.array(preprocessed_data)

def load_and_preprocess_data(data_dir, wavelet='db4', level=4, seq_length=1300):
    data = []
    labels = []
    
    for filename in os.listdir(data_dir):
        if filename.endswith('.pkl'):
            filepath = os.path.join(data_dir, filename)
            with open(filepath, 'rb') as f:
                eeg = pickle.load(f)
            
            # Apply DWT and preprocess
            preprocessed_signal = preprocess_data(eeg, wavelet, level, seq_length)
            data.append(preprocessed_signal)
            
            # Extract label from filename
            label = 1 if filename.startswith('truth') else 0
            labels.append(label)
    
    return np.array(data), np.array(labels)

# Load and preprocess all data
X, y = load_and_preprocess_data(data_dir)

# Normalize the features
scaler = StandardScaler()
X_reshaped = X.reshape(-1, X.shape[-1])
X_normalized = scaler.fit_transform(X_reshaped).reshape(X.shape)

# Transpose the normalized data to get the final shape (samples, seq_length, channels)
X_normalized = X_normalized.transpose(0, 2, 1)

# Save preprocessed data for later use
np.save('X_normalized.npy', X_normalized)
np.save('y.npy', y)

print("Data preprocessing complete. Saved X_normalized.npy and y.npy")
print(f"Final X shape: {X_normalized.shape}")
print(f"Final y shape: {y.shape}")


Data preprocessing complete. Saved X_normalized.npy and y.npy
Final X shape: (90, 1300, 65)
Final y shape: (90,)


# LSTM-NCP

In [30]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# Convert numpy arrays to PyTorch tensors
X_tensor = torch.tensor(np.load(r'C:\Users\User\Documents\Lie detect data\X_normalized.npy'), dtype=torch.float32)
y_tensor = torch.tensor(np.load(r'C:\Users\User\Documents\Lie detect data\y.npy'), dtype=torch.float32)  # Ensure labels are float for BCELoss

# Create a dataset and split into training and validation sets
dataset = TensorDataset(X_tensor, y_tensor)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# Print class distribution
print("Class Distribution in Training Set:", torch.bincount(y_tensor[train_dataset.indices].long()))
print("Class Distribution in Validation Set:", torch.bincount(y_tensor[val_dataset.indices].long()))

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm1 = nn.LSTM(input_size, hidden_size, num_layers=1, batch_first=True)
        self.lstm2 = nn.LSTM(hidden_size, 128, num_layers=1, batch_first=True)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.15)
        self.bn = nn.BatchNorm1d(128)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(128, 1)  # Fully connected layer
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h_0 = torch.zeros(1, x.size(0), self.hidden_size).to(device)
        c_0 = torch.zeros(1, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm1(x, (h_0, c_0))

        h_1 = torch.zeros(1, x.size(0), 128).to(device)
        c_1 = torch.zeros(1, x.size(0), 128).to(device)
        out, _ = self.lstm2(out, (h_1, c_1))

        out = self.relu(out)
        out = this.dropout(out)
        out = out[:, -1, :]  # Take the output of the last time step
        out = self.bn(out)
        out = self.flatten(out)
        out = self.fc(out)
        out = self.sigmoid(out)
        return out

# Calculate class weights
num_pos = (y_tensor == 1).sum().item()
num_neg = (y_tensor == 0).sum().item()
weight = num_neg / num_pos

# Initialize the model
input_size = 65  # Number of channels
hidden_size = 128
num_layers = 1  # Adjusted due to the specific structure requirements
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(input_size, hidden_size, num_layers).to(device)

# Loss and optimizer
criterion = nn.BCELoss(pos_weight=torch.tensor([weight]).to(device))  # Binary Cross-Entropy Loss with class weights
optimizer = optim.Adam(model.parameters(), lr=0.00001)  # Lower learning rate

# Training loop
num_epochs = 500
for epoch in range(num_epochs):
    model.train()
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        outputs = model(inputs).squeeze()  # Squeeze to match the label shape
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    # Validation loop
    model.eval()
    val_loss = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs).squeeze()  # Squeeze to match the label shape
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
            preds = (outputs > 0.5).float()  # Convert probabilities to binary predictions
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    val_loss /= len(val_loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)

    print(f'Validation Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}')




Class Distribution in Training Set: tensor([29, 43])
Class Distribution in Validation Set: tensor([ 6, 12])


TypeError: BCELoss.__init__() got an unexpected keyword argument 'pos_weight'