In [1]:
pip install lazypredict

Collecting lazypredict
  Downloading lazypredict-0.2.16-py2.py3-none-any.whl.metadata (13 kB)
Collecting click (from lazypredict)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Collecting pandas (from lazypredict)
  Downloading pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
Collecting lightgbm (from lazypredict)
  Downloading lightgbm-4.6.0-py3-none-manylinux_2_28_x86_64.whl.metadata (17 kB)
Collecting xgboost (from lazypredict)
  Downloading xgboost-2.1.4-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting pytest-runner (from lazypredict)
  Downloading pytest_runner-6.0.1-py3-none-any.whl.metadata (7.3 kB)
Collecting mlflow>=2.0.0 (from lazypredict)
  Downloading mlflow-2.22.0-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.22.0 (from mlflow>=2.0.0->lazypredict)
  Downloading mlflow_skinny-2.22.0-py3-none-any.whl.metadata (31 kB)
Collecting Flask<4 (from mlflow>=2.0.0->lazypredict)
  Downloading flask-3.1.

In [1]:
from lazypredict.Supervised import LazyClassifier

In [2]:
import os
import numpy as np
import joblib
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd


In [None]:
import os
import numpy as np
import joblib
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Path to preprocessed .pkl EEG files
data_dir = "preprocessed_epochs"
all_files = sorted(glob(os.path.join(data_dir, "*_epochs.pkl")))

# Group .pkl files by subject (assuming filenames like session1_sub01_epochs.pkl)
subject_sessions = {}
for file in all_files:
    basename = os.path.basename(file)
    subject_id = basename.split("_")[1]  # e.g., sub01
    subject_sessions.setdefault(subject_id, []).append(file)

# Dictionary to store results
all_results = {}

for subject, files in subject_sessions.items():
    X_list, y_list = [], []
    print(f"\n🔄 Processing subject: {subject}")

    for file in files:
        try:
            X, y = joblib.load(file)
            X_list.append(X)
            y_list.append(y)
        except Exception as e:
            print(f"❌ Failed to load {file}: {e}")
            continue

    if len(X_list) == 0:
        print(f"⚠️ No valid data found for {subject}")
        continue

    # Combine 3 sessions
    X_subject = np.concatenate(X_list)
    y_subject = np.concatenate(y_list)

    # Flatten each EEG sample from (channels, timepoints) to 1D
    X_flat = X_subject.reshape(X_subject.shape[0], -1)

    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_flat)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_subject, test_size=0.2, random_state=42)

    # LazyPredict classification
    clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
    models, _ = clf.fit(X_train, X_test, y_train, y_test)

    print(f"📊 Classification results for {subject}:")
    print(models[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

    all_results[subject] = models

# Save results to CSV (optional)
summary_df = pd.concat([df.assign(Subject=subj) for subj, df in all_results.items()])
summary_df.to_csv("lazy_subjectwise_results.csv")
print("\n✅ All subject results saved to lazy_subjectwise_results.csv")


🔄 Processing subject: sub10


  0%|          | 0/32 [00:00<?, ?it/s]

In [None]:
import os
import numpy as np
import joblib
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Path to preprocessed .pkl EEG files
data_dir = "preprocessed_epochs"
all_files = sorted(glob(os.path.join(data_dir, "*_epochs.pkl")))

# Group .pkl files by subject (assuming filenames like session1_sub01_epochs.pkl)
subject_sessions = {}
for file in all_files:
    basename = os.path.basename(file)
    subject_id = basename.split("_")[1]  # e.g., sub01
    subject_sessions.setdefault(subject_id, []).append(file)

# Dictionary to store results
all_results = {}

for subject, files in subject_sessions.items():
    print(f"\n🔄 Processing subject: {subject}")
    X_all, y_all = [], []

    for file in files:
        try:
            X, y = joblib.load(file)
            # Flatten and scale this session's data
            X_flat = X.reshape(X.shape[0], -1)
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(X_flat)

            # Store only temporarily
            X_all.append(X_scaled)
            y_all.append(y)
            
            # Immediately delete large variables to reduce RAM
            del X, y, X_flat
        except Exception as e:
            print(f"❌ Failed to load {file}: {e}")

    # Process subject if valid data found
    if len(X_all) == 0:
        print(f"⚠️ No valid data for {subject}")
        continue

    for X_sub, y_sub in zip(X_all, y_all):
        try:
            X_train, X_test, y_train, y_test = train_test_split(X_sub, y_sub, test_size=0.2, random_state=42)
            clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
            models, _ = clf.fit(X_train, X_test, y_train, y_test)

            if subject not in all_results:
                all_results[subject] = []
            all_results[subject].append(models)

            # Clear to save memory
            del X_train, X_test, y_train, y_test, X_sub, y_sub
        except Exception as e:
            print(f"❌ LazyClassifier failed for {subject}: {e}")

# Save results to CSV (optional)
summary_df = pd.concat([df.assign(Subject=subj) for subj, df_list in all_results.items() for df in df_list])
summary_df.to_csv("lazy_subjectwise_results.csv")
print("\n✅ All subject results saved to lazy_subjectwise_results.csv")



🔄 Processing subject: sub10


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 41, number of negative: 39
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.678154 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037200
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.512500 -> initscore=0.050010
[LightGBM] [Info] Start training from score 0.050010


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.690610 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036789
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.694361 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036802
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083

🔄 Processing subject: sub11


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.686781 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036730
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 41, number of negative: 39
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.682742 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036987
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.512500 -> initscore=0.050010
[LightGBM] [Info] Start training from score 0.050010


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.648368 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037081
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083

🔄 Processing subject: sub12


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.661623 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037073
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 41, number of negative: 39
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.661244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037405
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.512500 -> initscore=0.050010
[LightGBM] [Info] Start training from score 0.050010

🔄 Processing subject: sub13


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.677136 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037135
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.668292 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036927
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010

🔄 Processing subject: sub14


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.683443 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037186
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.751654 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036803
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 41, number of negative: 39
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.676552 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037227
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.512500 -> initscore=0.050010
[LightGBM] [Info] Start training from score 0.050010

🔄 Processing subject: sub15


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 41, number of negative: 39
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.698021 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036707
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.512500 -> initscore=0.050010
[LightGBM] [Info] Start training from score 0.050010


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.735134 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037148
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.673813 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036979
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083

🔄 Processing subject: sub16


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.715909 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037242
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.722062 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036703
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 43, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.763599 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037419
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.537500 -> initscore=0.150282
[LightGBM] [Info] Start training from score 0.150282

🔄 Processing subject: sub17


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 43, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.747587 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036746
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.537500 -> initscore=0.150282
[LightGBM] [Info] Start training from score 0.150282


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 43, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.737188 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037443
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.537500 -> initscore=0.150282
[LightGBM] [Info] Start training from score 0.150282


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 43, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.744510 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037312
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.537500 -> initscore=0.150282
[LightGBM] [Info] Start training from score 0.150282

🔄 Processing subject: sub18


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 43, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.727374 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037409
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.537500 -> initscore=0.150282
[LightGBM] [Info] Start training from score 0.150282


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.758896 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036751
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.839217 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037412
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083

🔄 Processing subject: sub19


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.714724 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037455
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.711227 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036681
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.660250 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037506
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010

🔄 Processing subject: sub1


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.739333 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037108
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.656403 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036750
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.666124 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037201
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000

🔄 Processing subject: sub20


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 42, number of negative: 38
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.694794 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036942
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525000 -> initscore=0.100083
[LightGBM] [Info] Start training from score 0.100083


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.682083 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036687
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.669423 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037067
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083

🔄 Processing subject: sub21


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 37, number of negative: 43
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.671705 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036796
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.462500 -> initscore=-0.150282
[LightGBM] [Info] Start training from score -0.150282


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 43, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.705439 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036857
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.537500 -> initscore=0.150282
[LightGBM] [Info] Start training from score 0.150282


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.659893 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036960
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000

🔄 Processing subject: sub22


  0%|          | 0/32 [00:00<?, ?it/s]

In [1]:
import os
import numpy as np
import joblib
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from lazypredict.Supervised import LazyClassifier
import pandas as pd

In [2]:
#ONLY ONE SUBJECT

import os
import numpy as np
import joblib
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
#from lazypredict.Supervised import LazyClassifier

# ----------- CONFIGURE THIS ----------
subject_id = "1"  # e.g., 'sub1', 'sub10', etc.
data_dir = "preprocessed_epochs"
# -------------------------------------

# Get all 3 session files for this subject
subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

X_list, y_list = [], []

for file in subject_files:
    try:
        print(f"📂 Loading: {file}")
        X, y = joblib.load(file)
        X_list.append(X)
        y_list.append(y)
    except Exception as e:
        print(f"❌ Error loading {file}: {e}")

# Check if data was loaded
if not X_list:
    print("❌ No data loaded. Check subject ID or file path.")
else:
    # Process subject data
    for i in range(len(X_list)):
        # Reshape & standardize session data
        X_flat = X_list[i].reshape(X_list[i].shape[0], -1)
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X_flat)

        # Train-test split
        X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_list[i], test_size=0.2, random_state=42)

        print(f"\n🚀 Running LazyPredict on Session {i+1} of {subject_id}...")
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        print(f"📊 Results for session {i+1} of {subject_id}:\n")
        print(models[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

📂 Loading: preprocessed_epochs/session1_sub1_twist_MI_epochs.pkl
📂 Loading: preprocessed_epochs/session2_sub1_twist_MI_epochs.pkl
📂 Loading: preprocessed_epochs/session3_sub1_twist_MI_epochs.pkl

🚀 Running LazyPredict on Session 1 of 1...


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.696818 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037108
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083
📊 Results for session 1 of 1:

                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
RandomForestClassifier             0.70      0.70     0.71        1.68
LGBMClassifier                     0.65      0.65     0.67       35.57
LinearDiscriminantAnalysis         0.60      0.60     0.62        3.01
BaggingClassifier                  0.60      0.60     0.60       16.14
XGBClassifier                      0.55      0.55  

  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.684637 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036750
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010
📊 Results for session 2 of 1:

                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
BaggingClassifier                  0.60      0.59     0.62       16.19
XGBClassifier                      0.50      0.50     0.51       14.76
LabelPropagation                   0.45      0.28     0.50        1.13
LabelSpreading                     0.45      0.28     0.50        1.13
DummyClassifier                    0.45      0.28  

  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.705722 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037201
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
📊 Results for session 3 of 1:

                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
DecisionTreeClassifier             0.65      0.65     0.65        3.98
ExtraTreeClassifier                0.60      0.60     0.60        1.09
LinearDiscriminantAnalysis         0.60      0.60     0.60        2.23
RandomForestClassifier             0.60      0.60     0.60        1.66
CalibratedClassifierCV             0.55      0.55     0.55        5.50
LabelPropagation                   

In [3]:
# ----------- CONFIGURE THIS ----------
subject_id = "6"  # e.g., 'sub1', 'sub10', etc.
data_dir = "preprocessed_epochs"
# -------------------------------------

# Get all 3 session files for this subject
subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

X_list, y_list = [], []

for file in subject_files:
    try:
        print(f"📂 Loading: {file}")
        X, y = joblib.load(file)
        X_list.append(X)
        y_list.append(y)
    except Exception as e:
        print(f"❌ Error loading {file}: {e}")

# Check if data was loaded
if not X_list:
    print("❌ No data loaded. Check subject ID or file path.")
else:
    # Process subject data
    for i in range(len(X_list)):
        # Reshape & standardize session data
        X_flat = X_list[i].reshape(X_list[i].shape[0], -1)
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X_flat)

        # Train-test split
        X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_list[i], test_size=0.2, random_state=42)

        print(f"\n🚀 Running LazyPredict on Session {i+1} of {subject_id}...")
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        print(f"📊 Results for session {i+1} of {subject_id}:\n")
        print(models[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

📂 Loading: preprocessed_epochs/session1_sub6_twist_MI_epochs.pkl
📂 Loading: preprocessed_epochs/session3_sub6_twist_MI_epochs.pkl

🚀 Running LazyPredict on Session 1 of 6...


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 37, number of negative: 43
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.655792 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037353
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.462500 -> initscore=-0.150282
[LightGBM] [Info] Start training from score -0.150282
📊 Results for session 1 of 6:

                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
AdaBoostClassifier                 0.60      0.61     0.63       58.49
GaussianNB                         0.60      0.61     0.63        1.21
RidgeClassifier                    0.55      0.54     0.62        1.22
XGBClassifier                      0.55      0.54     0.62       13.96
RidgeClassifierCV                  0.55      0.54  

  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.658235 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036938
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
📊 Results for session 2 of 6:

                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
ExtraTreesClassifier               0.65      0.63     0.65        1.34
LGBMClassifier                     0.65      0.65     0.65       36.56
LogisticRegression                 0.60      0.60     0.60        1.26
NearestCentroid                    0.55      0.55     0.55        1.23
QuadraticDiscriminantAnalysis      0.55      0.44     0.55        1.56
XGBClassifier                      

In [4]:
# ----------- CONFIGURE THIS ----------
subject_id = "10"  # e.g., 'sub1', 'sub10', etc.
data_dir = "preprocessed_epochs"
# -------------------------------------

# Get all 3 session files for this subject
subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

X_list, y_list = [], []

for file in subject_files:
    try:
        print(f"📂 Loading: {file}")
        X, y = joblib.load(file)
        X_list.append(X)
        y_list.append(y)
    except Exception as e:
        print(f"❌ Error loading {file}: {e}")

# Check if data was loaded
if not X_list:
    print("❌ No data loaded. Check subject ID or file path.")
else:
    # Process subject data
    for i in range(len(X_list)):
        # Reshape & standardize session data
        X_flat = X_list[i].reshape(X_list[i].shape[0], -1)
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X_flat)

        # Train-test split
        X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_list[i], test_size=0.2, random_state=42)

        print(f"\n🚀 Running LazyPredict on Session {i+1} of {subject_id}...")
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        print(f"📊 Results for session {i+1} of {subject_id}:\n")
        print(models[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

📂 Loading: preprocessed_epochs/session1_sub10_twist_MI_epochs.pkl
📂 Loading: preprocessed_epochs/session2_sub10_twist_MI_epochs.pkl
📂 Loading: preprocessed_epochs/session3_sub10_twist_MI_epochs.pkl

🚀 Running LazyPredict on Session 1 of 10...


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 41, number of negative: 39
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.671712 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037200
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.512500 -> initscore=0.050010
[LightGBM] [Info] Start training from score 0.050010
📊 Results for session 1 of 10:

                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
Perceptron                         0.70      0.70     0.70        1.23
SGDClassifier                      0.70      0.70     0.70        1.23
NearestCentroid                    0.65      0.65     0.64        1.22
LogisticRegression                 0.60      0.60     0.61        1.26
LinearDiscriminantAnalysis         0.60      0.60   

  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.646945 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036789
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010
📊 Results for session 2 of 10:

                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
GaussianNB                         0.80      0.80     0.80        1.19
NearestCentroid                    0.65      0.65     0.66        1.25
BernoulliNB                        0.60      0.59     0.62        1.14
LinearDiscriminantAnalysis         0.60      0.60     0.61        2.17
RandomForestClassifier             0.55      0.53 

  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.653639 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036802
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083
📊 Results for session 3 of 10:

                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
LinearDiscriminantAnalysis         0.60      0.59     0.65        2.17
Perceptron                         0.60      0.60     0.60        1.23
SGDClassifier                      0.60      0.60     0.60        1.23
KNeighborsClassifier               0.55      0.53     0.60        1.09
RidgeClassifierCV                  0.55      0.54 

In [None]:
import os
import numpy as np
import joblib
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Path to preprocessed .pkl EEG files
                        
subject_id = "1"  # e.g., 'sub1', 'sub10', etc.
data_dir = "preprocessed_epochs"
# -------------------------------------

# Get all 3 session files for this subject
subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

X_all = []
y_all = []

for file in subject_files:
    try:
        print(f"📂 Loading: {file}")
        X, y = joblib.load(file)
        X_all.append(X)
        y_all.append(y)
    except Exception as e:
        print(f"❌ Error loading {file}: {e}")

# Check if data was loaded
if not X_all:
    print("❌ No data loaded. Check subject ID or file path.")
else:
    # Combine all 3 sessions
    X_combined = np.concatenate(X_all, axis=0)
    y_combined = np.concatenate(y_all, axis=0)

    # Flatten and scale
    X_flat = X_combined.reshape(X_combined.shape[0], -1)
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_flat)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_combined, test_size=0.2, random_state=42)

    print(f"\n🚀 Running LazyPredict on combined sessions for subject {subject_id}...")
    clf = LazyClassifier(verbose=0, ignore_warnings=True)
    models, _ = clf.fit(X_train, X_test, y_train, y_test)

    print(f"📊 Final classification results for subject {subject_id} (3 sessions combined):")
    print(models[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

    # Optional: Save to CSV
    #models[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']].to_csv(f"{subject_id}_lazy_results.csv")

📂 Loading: preprocessed_epochs/session1_sub1_twist_MI_epochs.pkl
📂 Loading: preprocessed_epochs/session2_sub1_twist_MI_epochs.pkl
📂 Loading: preprocessed_epochs/session3_sub1_twist_MI_epochs.pkl

🚀 Running LazyPredict on combined sessions for subject 1...


  0%|          | 0/32 [00:00<?, ?it/s]

In [1]:
import os
import numpy as np
import joblib
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from lazypredict.Supervised import LazyClassifier
import pandas as pd

# -------- CONFIGURE --------
subject_id = "1"  # e.g., "sub01"
data_dir = "preprocessed_epochs"
# ---------------------------

subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

results = []

for file in subject_files:
    try:
        print(f"📂 Processing: {file}")
        X, y = joblib.load(file)

        # Flatten and scale
        X = X.reshape(X.shape[0], -1)
        X = StandardScaler().fit_transform(X)

        # Split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # LazyPredict
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        models['Session'] = os.path.basename(file)
        results.append(models)

        # Immediately delete large arrays to free memory
        del X, y, X_train, X_test, y_train, y_test, models

    except Exception as e:
        print(f"❌ Failed on {file}: {e}")

# -------------------------
# Aggregate + Save Results
# -------------------------
if results:
    df_all = pd.concat(results)
    avg_df = df_all.groupby(df_all.index).mean(numeric_only=True)

    print(f"\n✅ Averaged LazyPredict Results for {subject_id} (across {len(results)} sessions):")
    print(avg_df[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

    #avg_df.to_csv(f"{subject_id}_lazy_average_results.csv")
    #print(f"📁 Saved: {subject_id}_lazy_average_results.csv")
else:
    print("⚠️ No results generated.")


📂 Processing: preprocessed_epochs/session1_sub1_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.682793 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037108
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083
📂 Processing: preprocessed_epochs/session2_sub1_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.739040 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036750
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010
📂 Processing: preprocessed_epochs/session3_sub1_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.643413 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037201
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000

✅ Averaged LazyPredict Results for 1 (across 3 sessions):
                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
AdaBoostClassifier                 0.43      0.42     0.43       58.63
BaggingClassifier                  0.57      0.56     0.57       18.68
BernoulliNB                        0.42      0.40     0.43        1.49
CalibratedClassifierCV             0.47      0.44     0.49        5.86
DecisionTreeClassifier             0.52      0.50     0.49        4.33
DummyCl

In [2]:
# -------- CONFIGURE --------
subject_id = "6"  # e.g., "sub01"
data_dir = "preprocessed_epochs"
# ---------------------------

subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

results = []

for file in subject_files:
    try:
        print(f"📂 Processing: {file}")
        X, y = joblib.load(file)

        # Flatten and scale
        X = X.reshape(X.shape[0], -1)
        X = StandardScaler().fit_transform(X)

        # Split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # LazyPredict
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        models['Session'] = os.path.basename(file)
        results.append(models)

        # Immediately delete large arrays to free memory
        del X, y, X_train, X_test, y_train, y_test, models

    except Exception as e:
        print(f"❌ Failed on {file}: {e}")

# -------------------------
# Aggregate + Save Results
# -------------------------
if results:
    df_all = pd.concat(results)
    avg_df = df_all.groupby(df_all.index).mean(numeric_only=True)

    print(f"\n✅ Averaged LazyPredict Results for {subject_id} (across {len(results)} sessions):")
    print(avg_df[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

    #avg_df.to_csv(f"{subject_id}_lazy_average_results.csv")
    #print(f"📁 Saved: {subject_id}_lazy_average_results.csv")
else:
    print("⚠️ No results generated.")


📂 Processing: preprocessed_epochs/session1_sub6_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 37, number of negative: 43
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.661809 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037353
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.462500 -> initscore=-0.150282
[LightGBM] [Info] Start training from score -0.150282
📂 Processing: preprocessed_epochs/session3_sub6_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.649633 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036938
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000

✅ Averaged LazyPredict Results for 6 (across 2 sessions):
                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
AdaBoostClassifier                 0.53      0.52     0.54       56.87
BaggingClassifier                  0.45      0.44     0.49       15.94
BernoulliNB                        0.45      0.45     0.46        1.15
CalibratedClassifierCV             0.40      0.32     0.47        4.88
DecisionTreeClassifier             0.50      0.49     0.52        3.95
DummyCl

In [3]:
# -------- CONFIGURE --------
subject_id = "10"  # e.g., "sub01"
data_dir = "preprocessed_epochs"
# ---------------------------

subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

results = []

for file in subject_files:
    try:
        print(f"📂 Processing: {file}")
        X, y = joblib.load(file)

        # Flatten and scale
        X = X.reshape(X.shape[0], -1)
        X = StandardScaler().fit_transform(X)

        # Split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # LazyPredict
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        models['Session'] = os.path.basename(file)
        results.append(models)

        # Immediately delete large arrays to free memory
        del X, y, X_train, X_test, y_train, y_test, models

    except Exception as e:
        print(f"❌ Failed on {file}: {e}")

# -------------------------
# Aggregate + Save Results
# -------------------------
if results:
    df_all = pd.concat(results)
    avg_df = df_all.groupby(df_all.index).mean(numeric_only=True)

    print(f"\n✅ Averaged LazyPredict Results for {subject_id} (across {len(results)} sessions):")
    print(avg_df[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

    #avg_df.to_csv(f"{subject_id}_lazy_average_results.csv")
    #print(f"📁 Saved: {subject_id}_lazy_average_results.csv")
else:
    print("⚠️ No results generated.")


📂 Processing: preprocessed_epochs/session1_sub10_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 41, number of negative: 39
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.736777 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037200
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.512500 -> initscore=0.050010
[LightGBM] [Info] Start training from score 0.050010
📂 Processing: preprocessed_epochs/session2_sub10_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.771098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036789
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010
📂 Processing: preprocessed_epochs/session3_sub10_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.793956 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036802
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083

✅ Averaged LazyPredict Results for 10 (across 3 sessions):
                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
AdaBoostClassifier                 0.45      0.45     0.46       61.86
BaggingClassifier                  0.42      0.39     0.43       19.19
BernoulliNB                        0.48      0.48     0.49        1.22
CalibratedClassifierCV             0.50      0.47     0.53        6.36
DecisionTreeClassifier

In [5]:
# -------- CONFIGURE --------
subject_id = "2"  # e.g., "sub01"
data_dir = "preprocessed_epochs"
# ---------------------------

subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

results = []

for file in subject_files:
    try:
        print(f"📂 Processing: {file}")
        X, y = joblib.load(file)

        # Flatten and scale
        X = X.reshape(X.shape[0], -1)
        X = StandardScaler().fit_transform(X)

        # Split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # LazyPredict
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        models['Session'] = os.path.basename(file)
        results.append(models)

        # Immediately delete large arrays to free memory
        del X, y, X_train, X_test, y_train, y_test, models

    except Exception as e:
        print(f"❌ Failed on {file}: {e}")

# -------------------------
# Aggregate + Save Results
# -------------------------
if results:
    df_all = pd.concat(results)
    avg_df = df_all.groupby(df_all.index).mean(numeric_only=True)

    print(f"\n✅ Averaged LazyPredict Results for {subject_id} (across {len(results)} sessions):")
    print(avg_df[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

    #avg_df.to_csv(f"{subject_id}_lazy_average_results.csv")
    #print(f"📁 Saved: {subject_id}_lazy_average_results.csv")
else:
    print("⚠️ No results generated.")


📂 Processing: preprocessed_epochs/session1_sub2_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.778731 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037082
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010
📂 Processing: preprocessed_epochs/session2_sub2_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.750958 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036790
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010
📂 Processing: preprocessed_epochs/session3_sub2_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.882527 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036950
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000

✅ Averaged LazyPredict Results for 2 (across 3 sessions):
                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
AdaBoostClassifier                 0.42      0.40     0.43       61.56
BaggingClassifier                  0.43      0.43     0.43       18.38
BernoulliNB                        0.43      0.43     0.43        1.20
CalibratedClassifierCV             0.53      0.52     0.54        5.99
DecisionTreeClassifier             0.47      0.47     0.47        4.02
DummyCl

In [2]:
# -------- CONFIGURE --------
subject_id = "3"  # e.g., "sub01"
data_dir = "preprocessed_epochs"
# ---------------------------

subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

results = []

for file in subject_files:
    try:
        print(f"📂 Processing: {file}")
        X, y = joblib.load(file)

        # Flatten and scale
        X = X.reshape(X.shape[0], -1)
        X = StandardScaler().fit_transform(X)

        # Split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # LazyPredict
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        models['Session'] = os.path.basename(file)
        results.append(models)

        # Immediately delete large arrays to free memory
        del X, y, X_train, X_test, y_train, y_test, models

    except Exception as e:
        print(f"❌ Failed on {file}: {e}")

# -------------------------
# Aggregate + Save Results
# -------------------------
if results:
    df_all = pd.concat(results)
    avg_df = df_all.groupby(df_all.index).mean(numeric_only=True)

    print(f"\n✅ Averaged LazyPredict Results for {subject_id} (across {len(results)} sessions):")
    print(avg_df[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

    #avg_df.to_csv(f"{subject_id}_lazy_average_results.csv")
    #print(f"📁 Saved: {subject_id}_lazy_average_results.csv")
else:
    print("⚠️ No results generated.")


📂 Processing: preprocessed_epochs/session1_sub3_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.779506 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037032
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083
📂 Processing: preprocessed_epochs/session2_sub3_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 37, number of negative: 43
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.656200 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036936
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.462500 -> initscore=-0.150282
[LightGBM] [Info] Start training from score -0.150282
📂 Processing: preprocessed_epochs/session3_sub3_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.692731 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037710
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000

✅ Averaged LazyPredict Results for 3 (across 3 sessions):
                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
AdaBoostClassifier                 0.40      0.40     0.42       56.86
BaggingClassifier                  0.33      0.29     0.38       16.30
BernoulliNB                        0.35      0.35     0.35        2.42
CalibratedClassifierCV             0.35      0.29     0.40        6.30
DecisionTreeClassifier             0.38      0.38     0.40        3.92
DummyCl

In [6]:
# -------- CONFIGURE --------
subject_id = "4"  # e.g., "sub01"
data_dir = "preprocessed_epochs"
# ---------------------------

subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

results = []

for file in subject_files:
    try:
        print(f"📂 Processing: {file}")
        X, y = joblib.load(file)

        # Flatten and scale
        X = X.reshape(X.shape[0], -1)
        X = StandardScaler().fit_transform(X)

        # Split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # LazyPredict
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        models['Session'] = os.path.basename(file)
        results.append(models)

        # Immediately delete large arrays to free memory
        del X, y, X_train, X_test, y_train, y_test, models

    except Exception as e:
        print(f"❌ Failed on {file}: {e}")

# -------------------------
# Aggregate + Save Results
# -------------------------
if results:
    df_all = pd.concat(results)
    avg_df = df_all.groupby(df_all.index).mean(numeric_only=True)

    print(f"\n✅ Averaged LazyPredict Results for {subject_id} (across {len(results)} sessions):")
    print(avg_df[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

    #avg_df.to_csv(f"{subject_id}_lazy_average_results.csv")
    #print(f"📁 Saved: {subject_id}_lazy_average_results.csv")
else:
    print("⚠️ No results generated.")


📂 Processing: preprocessed_epochs/session1_sub4_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.674038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037033
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010
📂 Processing: preprocessed_epochs/session2_sub4_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.671089 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6034161
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010
📂 Processing: preprocessed_epochs/session3_sub4_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.684160 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037011
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000

✅ Averaged LazyPredict Results for 4 (across 3 sessions):
                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
AdaBoostClassifier                 0.53      0.53     0.54       61.40
BaggingClassifier                  0.47      0.45     0.48       18.06
BernoulliNB                        0.57      0.56     0.57        1.17
CalibratedClassifierCV             0.52      0.51     0.53        5.78
DecisionTreeClassifier             0.52      0.51     0.51        4.39
DummyCl

In [7]:
# -------- CONFIGURE --------
subject_id = "5"  # e.g., "sub01"
data_dir = "preprocessed_epochs"
# ---------------------------

subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

results = []

for file in subject_files:
    try:
        print(f"📂 Processing: {file}")
        X, y = joblib.load(file)

        # Flatten and scale
        X = X.reshape(X.shape[0], -1)
        X = StandardScaler().fit_transform(X)

        # Split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # LazyPredict
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        models['Session'] = os.path.basename(file)
        results.append(models)

        # Immediately delete large arrays to free memory
        del X, y, X_train, X_test, y_train, y_test, models

    except Exception as e:
        print(f"❌ Failed on {file}: {e}")

# -------------------------
# Aggregate + Save Results
# -------------------------
if results:
    df_all = pd.concat(results)
    avg_df = df_all.groupby(df_all.index).mean(numeric_only=True)

    print(f"\n✅ Averaged LazyPredict Results for {subject_id} (across {len(results)} sessions):")
    print(avg_df[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

    avg_df.to_csv(f"{subject_id}_lazy_average_results.csv")
    print(f"📁 Saved: {subject_id}_lazy_average_results.csv")
else:
    print("⚠️ No results generated.")


📂 Processing: preprocessed_epochs/session1_sub5_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 41, number of negative: 39
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.653808 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036994
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.512500 -> initscore=0.050010
[LightGBM] [Info] Start training from score 0.050010
📂 Processing: preprocessed_epochs/session2_sub5_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 37, number of negative: 43
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.641678 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037571
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.462500 -> initscore=-0.150282
[LightGBM] [Info] Start training from score -0.150282
📂 Processing: preprocessed_epochs/session3_sub5_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.656895 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037085
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000

✅ Averaged LazyPredict Results for 5 (across 3 sessions):
                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
AdaBoostClassifier                 0.52      0.51     0.54       59.03
BaggingClassifier                  0.43      0.41     0.46       16.86
BernoulliNB                        0.45      0.43     0.48        1.14
CalibratedClassifierCV             0.53      0.53     0.57        5.11
DecisionTreeClassifier             0.45      0.45     0.44        4.44
DummyCl

In [3]:
# -------- CONFIGURE --------
subject_id = "7"  # e.g., "sub01"
data_dir = "preprocessed_epochs"
# ---------------------------

subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

results = []

for file in subject_files:
    try:
        print(f"📂 Processing: {file}")
        X, y = joblib.load(file)

        # Flatten and scale
        X = X.reshape(X.shape[0], -1)
        X = StandardScaler().fit_transform(X)

        # Split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # LazyPredict
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        models['Session'] = os.path.basename(file)
        results.append(models)

        # Immediately delete large arrays to free memory
        del X, y, X_train, X_test, y_train, y_test, models

    except Exception as e:
        print(f"❌ Failed on {file}: {e}")

# -------------------------
# Aggregate + Save Results
# -------------------------
if results:
    df_all = pd.concat(results)
    avg_df = df_all.groupby(df_all.index).mean(numeric_only=True)

    print(f"\n✅ Averaged LazyPredict Results for {subject_id} (across {len(results)} sessions):")
    print(avg_df[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

    avg_df.to_csv(f"{subject_id}_lazy_average_results.csv")
    print(f"📁 Saved: {subject_id}_lazy_average_results.csv")
else:
    print("⚠️ No results generated.")


📂 Processing: preprocessed_epochs/session1_sub7_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 42, number of negative: 38
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.682872 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036601
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.525000 -> initscore=0.100083
[LightGBM] [Info] Start training from score 0.100083
📂 Processing: preprocessed_epochs/session3_sub7_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 38, number of negative: 42
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.647390 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036932
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.475000 -> initscore=-0.100083
[LightGBM] [Info] Start training from score -0.100083

✅ Averaged LazyPredict Results for 7 (across 2 sessions):
                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
AdaBoostClassifier                 0.55      0.54     0.58       58.17
BaggingClassifier                  0.55      0.54     0.54       16.88
BernoulliNB                        0.55      0.54     0.57        1.19
CalibratedClassifierCV             0.45      0.36     0.53        6.36
DecisionTreeClassifier 

In [4]:
# -------- CONFIGURE --------
subject_id = "8"  # e.g., "sub01"
data_dir = "preprocessed_epochs"
# ---------------------------

subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

results = []

for file in subject_files:
    try:
        print(f"📂 Processing: {file}")
        X, y = joblib.load(file)

        # Flatten and scale
        X = X.reshape(X.shape[0], -1)
        X = StandardScaler().fit_transform(X)

        # Split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # LazyPredict
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        models['Session'] = os.path.basename(file)
        results.append(models)

        # Immediately delete large arrays to free memory
        del X, y, X_train, X_test, y_train, y_test, models

    except Exception as e:
        print(f"❌ Failed on {file}: {e}")

# -------------------------
# Aggregate + Save Results
# -------------------------
if results:
    df_all = pd.concat(results)
    avg_df = df_all.groupby(df_all.index).mean(numeric_only=True)

    print(f"\n✅ Averaged LazyPredict Results for {subject_id} (across {len(results)} sessions):")
    print(avg_df[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

    avg_df.to_csv(f"{subject_id}_lazy_average_results.csv")
    print(f"📁 Saved: {subject_id}_lazy_average_results.csv")
else:
    print("⚠️ No results generated.")


📂 Processing: preprocessed_epochs/session1_sub8_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.748172 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036617
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
📂 Processing: preprocessed_epochs/session2_sub8_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.668343 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037086
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010
📂 Processing: preprocessed_epochs/session3_sub8_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 43, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.647159 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036746
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.537500 -> initscore=0.150282
[LightGBM] [Info] Start training from score 0.150282

✅ Averaged LazyPredict Results for 8 (across 3 sessions):
                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
AdaBoostClassifier                 0.50      0.50     0.50       57.48
BaggingClassifier                  0.57      0.56     0.58       15.91
BernoulliNB                        0.52      0.51     0.53        1.17
CalibratedClassifierCV             0.47      0.45     0.48        5.00
DecisionTreeClassifier   

In [5]:
# -------- CONFIGURE --------
subject_id = "9"  # e.g., "sub01"
data_dir = "preprocessed_epochs"
# ---------------------------

subject_files = sorted(glob(os.path.join(data_dir, f"session*_sub{subject_id}_twist_MI_epochs.pkl")))

results = []

for file in subject_files:
    try:
        print(f"📂 Processing: {file}")
        X, y = joblib.load(file)

        # Flatten and scale
        X = X.reshape(X.shape[0], -1)
        X = StandardScaler().fit_transform(X)

        # Split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # LazyPredict
        clf = LazyClassifier(verbose=0, ignore_warnings=True)
        models, _ = clf.fit(X_train, X_test, y_train, y_test)

        models['Session'] = os.path.basename(file)
        results.append(models)

        # Immediately delete large arrays to free memory
        del X, y, X_train, X_test, y_train, y_test, models

    except Exception as e:
        print(f"❌ Failed on {file}: {e}")

# -------------------------
# Aggregate + Save Results
# -------------------------
if results:
    df_all = pd.concat(results)
    avg_df = df_all.groupby(df_all.index).mean(numeric_only=True)

    print(f"\n✅ Averaged LazyPredict Results for {subject_id} (across {len(results)} sessions):")
    print(avg_df[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']])

    avg_df.to_csv(f"{subject_id}_lazy_average_results.csv")
    print(f"📁 Saved: {subject_id}_lazy_average_results.csv")
else:
    print("⚠️ No results generated.")


📂 Processing: preprocessed_epochs/session1_sub9_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.669118 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6036961
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
📂 Processing: preprocessed_epochs/session2_sub9_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 39, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.630622 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037111
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.487500 -> initscore=-0.050010
[LightGBM] [Info] Start training from score -0.050010
📂 Processing: preprocessed_epochs/session3_sub9_twist_MI_epochs.pkl


  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 40, number of negative: 40
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.660026 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6037511
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 213071
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000

✅ Averaged LazyPredict Results for 9 (across 3 sessions):
                               Accuracy  F1 Score  ROC AUC  Time Taken
Model                                                                 
AdaBoostClassifier                 0.45      0.45     0.45       58.20
BaggingClassifier                  0.42      0.40     0.42       15.51
BernoulliNB                        0.45      0.45     0.45        1.13
CalibratedClassifierCV             0.43      0.43     0.44        5.10
DecisionTreeClassifier             0.47      0.46     0.46        3.96
DummyCl

In [None]:
#Per-Subject Averaging across all subjects

import os
import numpy as np
import joblib
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from lazypredict.Supervised import LazyClassifier
import pandas as pd

# Path to preprocessed .pkl EEG files
data_dir = "preprocessed_epochs"
all_files = sorted(glob(os.path.join(data_dir, "session*_sub*_twist_MI_epochs.pkl")))

# Group .pkl files by subject
subject_sessions = {}
for file in all_files:
    basename = os.path.basename(file)
    subject_id = basename.split("_")[1]  # sub10, sub11 etc.
    subject_sessions.setdefault(subject_id, []).append(file)

# To store all subjects' averaged results
all_subject_averages = []

for subject, files in subject_sessions.items():
    print(f"\n🔄 Processing subject: {subject}")
    session_results = []

    for file in sorted(files):
        try:
            print(f"📂  Loading: {os.path.basename(file)}")
            X, y = joblib.load(file)

            # Flatten and scale
            X = X.reshape(X.shape[0], -1)
            X = StandardScaler().fit_transform(X)

            # Split
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

            # LazyPredict
            clf = LazyClassifier(verbose=0, ignore_warnings=True)
            models, _ = clf.fit(X_train, X_test, y_train, y_test)

            models["Session"] = os.path.basename(file)
            session_results.append(models)

            # Cleanup
            del X, y, X_train, X_test, y_train, y_test, models

        except Exception as e:
            print(f"❌ Failed on {file}: {e}")

    # Compute average over sessions
    if session_results:
        df_all = pd.concat(session_results)
        avg_df = df_all.groupby(df_all.index).mean(numeric_only=True)
        avg_df["Subject"] = subject
        all_subject_averages.append(avg_df)

        print(f"✅ Finished {subject}, averaged across {len(session_results)} sessions")

# -------------------------
# Save All Results
# -------------------------
if all_subject_averages:
    result_df = pd.concat(all_subject_averages)
    result_df.to_csv("lazy_all_subjects_averaged_results.csv")
    print("\n📁 Saved final results to lazy_all_subjects_averaged_results.csv")
else:
    print("\n⚠️ No subject data processed.")


In [None]:
#Entire Dataset

import os
import numpy as np
import joblib
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from lazypredict.Supervised import LazyClassifier
import pandas as pd

# Folder with all session .pkl files
data_dir = "preprocessed_epochs"
all_files = sorted(glob(os.path.join(data_dir, "session*_sub*_twist_MI_epochs.pkl")))

# Load and flatten samples one by one to conserve memory
X_list = []
y_list = []

for file in all_files:
    try:
        print(f"📂 Loading: {os.path.basename(file)}")
        X, y = joblib.load(file)
        X = X.reshape(X.shape[0], -1)  # Flatten (channels*time)
        X_list.append(X)
        y_list.append(y)

        # Free memory
        del X, y

    except Exception as e:
        print(f"❌ Error in {file}: {e}")

# Concatenate at once
print("🔧 Concatenating all samples...")
X_all = np.vstack(X_list)
y_all = np.hstack(y_list)

# Free the lists
del X_list, y_list

# Scale
scaler = StandardScaler()
X_all_scaled = scaler.fit_transform(X_all)

# Split for training/testing
X_train, X_test, y_train, y_test = train_test_split(X_all_scaled, y_all, test_size=0.2, random_state=42)

# LazyPredict classification
print("🧠 Running LazyPredict on full dataset...")
clf = LazyClassifier(verbose=0, ignore_warnings=True)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)

# Save results
#models.to_csv("lazy_full_dataset_results.csv")
#print("\n✅ LazyPredict complete. Results saved to 'lazy_full_dataset_results.csv'.")

# Optional: Show top results
print(models[['Accuracy', 'F1 Score', 'ROC AUC', 'Time Taken']].head())


📂 Loading: session1_sub10_twist_MI_epochs.pkl
📂 Loading: session1_sub11_twist_MI_epochs.pkl
📂 Loading: session1_sub12_twist_MI_epochs.pkl
📂 Loading: session1_sub13_twist_MI_epochs.pkl
📂 Loading: session1_sub14_twist_MI_epochs.pkl
📂 Loading: session1_sub15_twist_MI_epochs.pkl
📂 Loading: session1_sub16_twist_MI_epochs.pkl
📂 Loading: session1_sub17_twist_MI_epochs.pkl
📂 Loading: session1_sub18_twist_MI_epochs.pkl
📂 Loading: session1_sub19_twist_MI_epochs.pkl
📂 Loading: session1_sub1_twist_MI_epochs.pkl
📂 Loading: session1_sub20_twist_MI_epochs.pkl
📂 Loading: session1_sub21_twist_MI_epochs.pkl
📂 Loading: session1_sub22_twist_MI_epochs.pkl
📂 Loading: session1_sub24_twist_MI_epochs.pkl
📂 Loading: session1_sub25_twist_MI_epochs.pkl
📂 Loading: session1_sub2_twist_MI_epochs.pkl
📂 Loading: session1_sub3_twist_MI_epochs.pkl
📂 Loading: session1_sub4_twist_MI_epochs.pkl
📂 Loading: session1_sub5_twist_MI_epochs.pkl
📂 Loading: session1_sub6_twist_MI_epochs.pkl
📂 Loading: session1_sub7_twist_MI_epochs