In [7]:
import numpy as np
import pickle
import pywt

# Load the data
truth_file_path = r'C:\Users\User\Documents\Lie detect data\EEGData\truth_42.pkl'
lie_file_path = r'C:\Users\User\Documents\Lie detect data\EEGData\lie_1.pkl'

with open(truth_file_path, 'rb') as f:
    truth_data = pickle.load(f)

with open(lie_file_path, 'rb') as f:
    lie_data = pickle.load(f)

# Inspect the data
print(f'Truth data shape: {truth_data.shape}')
print(f'Lie data shape: {lie_data.shape}')

def apply_dwt(eeg_signal):
    # Apply DWT
    coeffs = pywt.wavedec(eeg_signal, 'db4', level=4)
    # Concatenate all coefficients
    return np.concatenate(coeffs)

def preprocess_data(eeg_data):
    preprocessed_data = []
    for signal in eeg_data:
        if signal.ndim == 1:
            signal_dwt = apply_dwt(signal)
        else:
            signal_dwt = np.array([apply_dwt(channel) for channel in signal])
        preprocessed_data.append(signal_dwt.flatten())
    return preprocessed_data

# Apply DWT to both truth and lie data
truth_preprocessed = preprocess_data(truth_data)
lie_preprocessed = preprocess_data(lie_data)

# Check and print the lengths of preprocessed data
truth_lengths = [len(signal) for signal in truth_preprocessed]
lie_lengths = [len(signal) for signal in lie_preprocessed]

print(f"Truth data - Min length: {min(truth_lengths)}, Max length: {max(truth_lengths)}")
print(f"Lie data - Min length: {min(lie_lengths)}, Max length: {max(lie_lengths)}")

# Find the maximum length of preprocessed data
max_length = max(max(len(signal) for signal in truth_preprocessed), 
                 max(len(signal) for signal in lie_preprocessed))

# Pad all signals to the maximum length
truth_padded = [np.pad(signal, (0, max_length - len(signal))) for signal in truth_preprocessed]
lie_padded = [np.pad(signal, (0, max_length - len(signal))) for signal in lie_preprocessed]

# Convert to numpy arrays
truth_preprocessed = np.array(truth_padded)
lie_preprocessed = np.array(lie_padded)

# Print shapes to verify
print(f"Truth preprocessed shape: {truth_preprocessed.shape}")
print(f"Lie preprocessed shape: {lie_preprocessed.shape}")

# Combine data and create labels
X = np.concatenate((truth_preprocessed, lie_preprocessed))
y = np.array([0] * truth_preprocessed.shape[0] + [1] * lie_preprocessed.shape[0])

# Normalize the features
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)

# Reshape data for LSTM
X_normalized = X_normalized.reshape((X_normalized.shape[0], X_normalized.shape[1], 1))

# Save preprocessed data for later use
np.save('X_normalized.npy', X_normalized)
np.save('y.npy', y)


Truth data shape: (65, 1204)
Lie data shape: (65, 463)
Truth data - Min length: 1229, Max length: 1229
Lie data - Min length: 490, Max length: 490
Truth preprocessed shape: (65, 1229)
Lie preprocessed shape: (65, 1229)
