In [None]:
import pandas as pd
import numpy as np
from scipy.signal import butter, lfilter
from sklearn.preprocessing import StandardScaler
from scipy.stats import zscore

# Define bandpass filter
def bandpass_filter(data, lowcut, highcut, fs, order=4):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return lfilter(b, a, data)

# Load EEG data from CSV
file_path = "/content/drive/MyDrive/RAW EEG_Clean/divya-chess_EPOCX_204902_2024.11.24T15.55.21+05.30.mc.pm.fe.bp.csv"  # Replace with your file path
eeg_data = pd.read_csv(file_path)

# Separate EEG.Counter and EEG signals
eeg_counter = eeg_data['EEG.Counter']  # Keep EEG.Counter separate
eeg_signals = eeg_data.drop(columns=['EEG.Counter'])  # Process only EEG signal columns

# Check for missing values and handle them
if eeg_signals.isnull().sum().sum() > 0:
    print("Handling missing values...")
    eeg_signals = eeg_signals.fillna(eeg_signals.mean())

# Apply bandpass filter to EEG signals
fs = 128  # Sampling frequency in Hz (adjust to your dataset)
lowcut = 0.1  # Low cutoff frequency in Hz
highcut = 45  # High cutoff frequency in Hz

print("Applying bandpass filter...")
filtered_signals = eeg_signals.apply(lambda col: bandpass_filter(col, lowcut, highcut, fs))

# Normalize the filtered signals
print("Normalizing the data...")
scaler = StandardScaler()
normalized_signals = pd.DataFrame(scaler.fit_transform(filtered_signals), columns=filtered_signals.columns)

# Remove outliers using z-score
print("Removing outliers...")
z_scores = np.abs(zscore(normalized_signals))
threshold = 3  # Common threshold for z-score
cleaned_signals = normalized_signals[(z_scores < threshold).all(axis=1)]

# Retain corresponding EEG.Counter values for the cleaned signals
cleaned_counter = eeg_counter[cleaned_signals.index]

# Combine cleaned EEG.Counter and signals into one DataFrame
preprocessed_data = pd.concat([cleaned_counter.reset_index(drop=True), cleaned_signals.reset_index(drop=True)], axis=1)

# Save the preprocessed data to a new CSV file
output_file = "preprocessed_eeg_data_divya.csv"
print(f"Saving preprocessed data to {output_file}...")
preprocessed_data.to_csv(output_file, index=False)

# If using Google Colab, download the file
try:
    from google.colab import files
    files.download(output_file)
except ImportError:
    print("Not running in Google Colab. File saved locally.")

print("Preprocessing complete!")

# Download the file
files.download(output_file)



Applying bandpass filter...
Normalizing the data...
Removing outliers...
Saving preprocessed data to preprocessed_eeg_data_divya.csv...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Preprocessing complete!


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>