In [4]:
import pandas as pd
import numpy as np
import timestamp
import scipy.signal as sp_signal
from scipy.fftpack import fft
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from serial import Serial


In [38]:
eeg = pd.read_csv('data/EEG_run-1.csv')
target = pd.read_csv('move_log.csv')
eeg = eeg.drop(columns=['EEG_Channel_4','EEG_Channel_5','EEG_Channel_6','EEG_Channel_7'])
eeg.head()

Unnamed: 0,EEG_Channel_0,EEG_Channel_1,EEG_Channel_2,EEG_Channel_3
0,0.0,0.0,0.0,0.0
1,-11567.295977,-3618.948593,-348.664862,162.184258
2,-11566.044279,-3614.098264,-349.603635,155.858714
3,-11567.966529,-3618.188634,-355.526847,149.220246
4,-11565.440782,-3620.44616,-354.521019,156.864543


In [39]:
target.head()

Unnamed: 0,timestamp,direction,speed
0,2025-03-10 16:12:05,down,9
1,2025-03-10 16:12:08,right,12
2,2025-03-10 16:12:10,right,8
3,2025-03-10 16:12:14,up,16
4,2025-03-10 16:12:16,right,20


In [42]:
eeg_df = pd.read_csv("data/EEG_run-1.csv")
move_df = pd.read_csv("move1_log.csv")

# Convert movement log timestamp column to datetime
move_df['timestamp'] = pd.to_datetime(move_df['timestamp'])

# Determine the start time for EEG data (EEG started 16 seconds before the earliest movement)
offset = pd.Timedelta(seconds=16)
start_time_eeg = move_df['timestamp'].min() - offset

# Generate a timestamp for each EEG sample based on your sampling rate (250 Hz)
n_samples = len(eeg_df)
time_deltas = pd.to_timedelta(np.arange(n_samples) / 250, unit='s')
eeg_df['timestamp'] = start_time_eeg + time_deltas

# Sort dataframes by timestamp (required by merge_asof)
eeg_df = eeg_df.sort_values('timestamp')
move_df = move_df.sort_values('timestamp')

# Merge the two dataframes on timestamp with a 10 ms tolerance.
# We include the "event" and "speed" columns from move_df.
merged_df = pd.merge_asof(
    eeg_df,
    move_df[['timestamp', 'event', 'speed']],
    on='timestamp',
    tolerance=pd.Timedelta(milliseconds=10),
    direction='nearest'
)

# Fill missing values in event and speed
merged_df['event'] = merged_df['event'].fillna('none')
merged_df['speed'] = merged_df['speed'].fillna(0)

# Create a 'ms' column to measure time in milliseconds from the start
merged_df['ms'] = (merged_df['timestamp'] - merged_df['timestamp'].iloc[0]).dt.total_seconds() * 1000

# Save the merged dataframe
merged_df.to_csv('merged_df.csv', index=False)
print("Merged data saved to merged_df.csv")


Merged data saved to merged_df.csv


In [48]:
merged_df[3995:4010]

Unnamed: 0,EEG_Channel_0,EEG_Channel_1,EEG_Channel_2,EEG_Channel_3,EEG_Channel_4,EEG_Channel_5,EEG_Channel_6,EEG_Channel_7,timestamp,event,speed,ms
3995,-11597.783756,-3737.457542,-422.023287,146.493333,187500.0,-187500.022352,-187500.022352,-187500.022352,2025-03-12 03:45:41.980,none,0.0,15980.0
3996,-11594.788622,-3736.071734,-421.643307,148.169714,187500.0,-187500.022352,-187500.022352,-187500.022352,2025-03-12 03:45:41.984,none,0.0,15984.0
3997,-11588.798355,-3731.713144,-419.430485,149.421412,187500.0,-187500.022352,-187500.022352,-187500.022352,2025-03-12 03:45:41.988,none,0.0,15988.0
3998,-11596.643817,-3736.78699,-422.805598,144.369917,187500.0,-187500.022352,-187500.022352,-187500.022352,2025-03-12 03:45:41.992,blink,0.0,15992.0
3999,-11593.492221,-3740.475028,-425.286642,144.325214,187500.0,-187500.022352,-187500.022352,-187500.022352,2025-03-12 03:45:41.996,blink,0.0,15996.0
4000,-11587.501954,-3740.206807,-422.939709,144.928711,187500.0,-187500.022352,-187500.022352,-187500.022352,2025-03-12 03:45:42.000,blink,0.0,16000.0
4001,-11597.895515,-3739.268033,-423.766723,145.93454,187500.0,-187500.022352,-187500.022352,-187500.022352,2025-03-12 03:45:42.004,blink,0.0,16004.0
4002,-11596.442651,-3733.613042,-423.096171,148.214417,187500.0,-187500.022352,-187500.022352,-187500.022352,2025-03-12 03:45:42.008,blink,0.0,16008.0
4003,-11596.442651,-3741.100876,-429.220549,139.810162,187500.0,-187500.022352,-187500.022352,-187500.022352,2025-03-12 03:45:42.012,none,0.0,16012.0
4004,-11595.727396,-3737.904577,-428.751162,138.37965,187500.0,-187500.022352,-187500.022352,-187500.022352,2025-03-12 03:45:42.016,none,0.0,16016.0


In [53]:
df = pd.read_csv("merged_df.csv")

# Ensure timestamp is a datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])


# (Assumes columns are labeled like 'EEG_Channel_0', 'EEG_Channel_1', etc.)
eeg_channels = [col for col in df.columns if "EEG_Channel" in col]

# EOG Filtering
def bandpass_filter(data, lowcut=0.5, highcut=35, fs=250, order=6):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = sp_signal.butter(order, [low, high], btype='band')
    return sp_signal.filtfilt(b, a, data)

def notch_filter(data, freq=50, fs=250, Q=30):
    b, a = sp_signal.iirnotch(freq / (fs/2), Q)
    return sp_signal.filtfilt(b, a, data)

# Apply filters to each EEG channel
for ch in eeg_channels:
    df[ch] = bandpass_filter(df[ch], fs=250)
    df[ch] = notch_filter(df[ch], freq=50, fs=250, Q=30)

# Feature Extraction
def extract_features(df, eeg_channels):
    feature_df = pd.DataFrame()
    
    # 1. Rolling Mean (window=5 samples)
    for ch in eeg_channels:
        feature_df[f'RM_{ch}'] = df[ch].rolling(window=5).mean()
    
    # 2. FFT features: absolute FFT values, use the first half of the spectrum.
    for ch in eeg_channels:
        n = len(df)
        fft_vals = np.abs(fft(df[ch].values))[:n//2]
        # Pad or trim to ensure length equals n
        if len(fft_vals) < n:
            fft_vals = np.pad(fft_vals, (0, n - len(fft_vals)), mode='constant')
        else:
            fft_vals = fft_vals[:n]
        feature_df[f'FFT_{ch}'] = fft_vals
    
    # 3. Gradient features: the rate of change.
    for ch in eeg_channels:
        feature_df[f'Grad_{ch}'] = np.gradient(df[ch])
    
    # Include behavioral features: speed and event.
    feature_df["speed"] = df["speed"]
    feature_df["event"] = df["event"]
    
    return feature_df.dropna()

df_features = extract_features(df, eeg_channels)

In [None]:
# ----------------------------
# Balance the Data
# ----------------------------
# Print original distribution
print("Original distribution of events:")
print(df_features['event'].value_counts())

# Separate non-'none' events and 'none'
non_none_df = df_features[df_features['event'] != "none"]
none_df = df_features[df_features['event'] == "none"]

# target a ratio of 2:1 for 'none' to non-'none' events.
target_none_count = len(non_none_df) * 2
target_none_count = min(target_none_count, len(none_df))

# Randomly sample 'none' events to match target_none_count
sampled_none_df = none_df.sample(n=target_none_count, random_state=42)

# Combine with non-'none' events and shuffle
balanced_df = pd.concat([non_none_df, sampled_none_df]).sample(frac=1, random_state=42)
print("Balanced distribution of events:")
print(balanced_df['event'].value_counts())

# Encode Labels
le = LabelEncoder()
balanced_df["event"] = le.fit_transform(balanced_df["event"])

# Prepare Data for Training
X = balanced_df.drop("event", axis=1)
y = balanced_df["event"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train and Evaluate SVM Model
svm_model = SVC(kernel='linear', C=1)
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nBalanced Model Accuracy: {accuracy:.2f}")

# Get unique labels present in the test set and generate corresponding target names
unique_labels = np.unique(y_test)
target_names = [le.inverse_transform([lab])[0] for lab in unique_labels]

print("\nBalanced Classification Report:")
print(classification_report(y_test, y_pred, labels=unique_labels, target_names=target_names))

# Hyperparameter Tuning (GridSearchCV)
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear']}
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(X_train, y_train)
print("Best Parameters:", grid_search.best_params_)

Original distribution of events:
event
none     15244
blink        5
down         5
up           5
left         5
right        5
Name: count, dtype: int64
Balanced distribution of events:
event
none     50
blink     5
up        5
left      5
down      5
right     5
Name: count, dtype: int64

Balanced Model Accuracy: 0.93

Balanced Classification Report:
              precision    recall  f1-score   support

       blink       0.00      0.00      0.00         1
        left       1.00      1.00      1.00         1
        none       0.92      1.00      0.96        12
       right       1.00      1.00      1.00         1

    accuracy                           0.93        15
   macro avg       0.73      0.75      0.74        15
weighted avg       0.87      0.93      0.90        15

Best Parameters: {'C': 1, 'kernel': 'linear'}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
