In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import f1_score, classification_report
from mne.decoding import CSP
from tensorflow import keras
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, LearningRateScheduler

# --- 0) Config ---
PREPROCESSED_PATH = '/kaggle/input/preprocessed-mtc-aic/mtc-aic3_dataset_preprocessed'
output_dir = './models'
os.makedirs(output_dir, exist_ok=True)

eeg_indices = [1, 3]  # C3 (index1), C4 (index3)

# --- 1) Load metadata ---
train_df = pd.read_csv(os.path.join(PREPROCESSED_PATH, 'train.csv'))
val_df = pd.read_csv(os.path.join(PREPROCESSED_PATH, 'validation.csv'))

# Filter MI trials
train_mi = train_df[train_df['task'] == 'MI']
val_mi = val_df[val_df['task'] == 'MI']

# --- 2) Data Loading Functions ---
def get_trial_path(row, split):
    """Reconstruct absolute path to NPZ file"""
    return os.path.join(
        PREPROCESSED_PATH,
        row['task'],
        split,
        row['subject_id'],
        str(row['trial_session']),
        f"trial_{row['trial']}.npz"
    )

def load_trials(df, split):
    """Load preprocessed EEG data"""
    X_list, y_list = [], []
    for _, row in df.iterrows():
        # Build correct file path
        npz_path = get_trial_path(row, split)
        
        # Load data
        with np.load(npz_path) as data:
            eeg_data = data['data']
        
        # Select C3/C4 channels
        data_sub = eeg_data[:, eeg_indices].T  # (2, time)
        
        # Binary label
        label_bin = 1 if row['label'] == 'Right' else 0
        
        X_list.append(data_sub)
        y_list.append(label_bin)
    
    return np.array(X_list), np.array(y_list)

# Load data
X_train_all, y_train = load_trials(train_mi, 'train')
X_val_all, y_val = load_trials(val_mi, 'validation')

# Transpose to (n_trials, time, channels)
X_train_raw = X_train_all.transpose(0, 2, 1).astype('float32')  # (n, 2250, 2)
X_val_raw = X_val_all.transpose(0, 2, 1).astype('float32')      # (n, 2250, 2)

# --- 3) CSP Processing ---
csp = CSP(n_components=2, log=False, norm_trace=False)
X_train_csp_input = X_train_raw.transpose(0, 2, 1).astype('float64')
csp.fit(X_train_csp_input, y_train)

W = csp.filters_[:2]

def apply_csp(X):
    X_t = X.transpose(0, 2, 1)
    transformed = np.stack([W.dot(ep) for ep in X_t], axis=0)
    return transformed.transpose(0, 2, 1).astype('float32')

Xtr_csp = apply_csp(X_train_raw)
Xvl_csp = apply_csp(X_val_raw)

# For 2D models
Xtr_csp_2d = Xtr_csp[..., np.newaxis]
Xvl_csp_2d = Xvl_csp[..., np.newaxis]

# --- 4) One-hot labels ---
ytr_oh = keras.utils.to_categorical(y_train, 2)
yvl_oh = keras.utils.to_categorical(y_val, 2)

# --- 5) Data augmentation ---
def aug_gen(X, y, seed=0, batch_size=32):
    n = X.shape[0]
    rng = np.random.RandomState(seed)
    while True:
        idx = rng.randint(0, n, batch_size)
        bx, by = X[idx], y[idx]
        # Gentle noise augmentation
        noise = rng.normal(0, 0.1, bx.shape) * np.std(bx, axis=(1,2), keepdims=True)
        yield bx + noise, by

# Calculate steps
batch_size = 64
steps_raw = int(np.ceil(len(X_train_raw) / batch_size))
steps_csp1d = int(np.ceil(len(Xtr_csp) / batch_size))
steps_csp2d = int(np.ceil(len(Xtr_csp_2d) / batch_size))

train_gen_raw = aug_gen(X_train_raw, ytr_oh, seed=0, batch_size=batch_size)
train_gen_csp1d = aug_gen(Xtr_csp, ytr_oh, seed=1, batch_size=batch_size)
train_gen_csp2d = aug_gen(Xtr_csp_2d, ytr_oh, seed=2, batch_size=batch_size)

# --- 6) Cosine LR schedule ---
def cosine_lr(epoch, lr_max=1e-3, epochs=100):
    """Cosine learning rate decay"""
    return lr_max * (1 + np.cos(np.pi * epoch / epochs)) / 2

# --- 7) F1 Score Metric ---
class F1Score(tf.keras.metrics.Metric):
    def __init__(self, name='f1_score', **kwargs):
        super().__init__(name=name, **kwargs)
        self.tp = self.add_weight(name='tp', initializer='zeros')
        self.fp = self.add_weight(name='fp', initializer='zeros')
        self.fn = self.add_weight(name='fn', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.argmax(y_pred, axis=1)
        y_true = tf.argmax(y_true, axis=1)
        
        tp = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 1), tf.equal(y_pred, 1)), tf.float32))
        fp = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 0), tf.equal(y_pred, 1)), tf.float32))
        fn = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 1), tf.equal(y_pred, 0)), tf.float32))
        
        self.tp.assign_add(tp)
        self.fp.assign_add(fp)
        self.fn.assign_add(fn)

    def result(self):
        p = self.tp / (self.tp + self.fp + tf.keras.backend.epsilon())
        r = self.tp / (self.tp + self.fn + tf.keras.backend.epsilon())
        return 2 * p * r / (p + r + tf.keras.backend.epsilon())

    def reset_states(self):
        self.tp.assign(0)
        self.fp.assign(0)
        self.fn.assign(0)

# --- 8) Callback factory ---
def get_callbacks(name):
    return [
        EarlyStopping(monitor="val_f1_score", mode="max", patience=15, restore_best_weights=True),
        ModelCheckpoint(os.path.join(output_dir, f"best_{name}.h5"),
                        monitor="val_f1_score", mode="max", save_best_only=True),
        CSVLogger(os.path.join(output_dir, f"log_{name}.csv")),
        LearningRateScheduler(cosine_lr)
    ]

# --- 9) Simplified Model Builders ---
def build_model_simple(input_shape):
    """Simplified model for EEG classification"""
    model = keras.Sequential([
        layers.Input(input_shape),
        layers.Conv1D(32, 50, activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling1D(4),
        layers.Conv1D(64, 25, activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling1D(4),
        layers.Conv1D(128, 10, activation='relu'),
        layers.BatchNormalization(),
        layers.GlobalAveragePooling1D(),
        layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.Dropout(0.5),
        layers.Dense(2, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy', F1Score()])
    return model

def build_model_lstm(input_shape):
    """LSTM model for temporal patterns"""
    model = keras.Sequential([
        layers.Input(input_shape),
        layers.Conv1D(32, 10, activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling1D(2),
        layers.Bidirectional(layers.LSTM(64, return_sequences=True)),
        layers.Bidirectional(layers.LSTM(32)),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(2, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy', F1Score()])
    return model

def build_model_csp(input_shape):
    """Model specifically designed for CSP features"""
    model = keras.Sequential([
        layers.Input(input_shape),
        layers.Conv1D(64, 10, activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling1D(3),
        layers.Conv1D(128, 5, activation='relu'),
        layers.BatchNormalization(),
        layers.GlobalAveragePooling1D(),
        layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
        layers.Dropout(0.5),
        layers.Dense(2, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy', F1Score()])
    return model

# --- 10) Train & evaluate ---
builders = {
    'simple_raw': build_model_simple,
    'lstm_csp': build_model_lstm,
    'csp_enhanced': build_model_csp,
}

results = {}
shape_raw   = X_train_raw.shape[1:]    # (2250, 2)
shape_csp1d = Xtr_csp.shape[1:]        # (2250, 2)
shape_csp2d = Xtr_csp_2d.shape[1:]     # (2250, 2, 1)

for name, build_fn in builders.items():
    print(f"\n>>> Training {name}")
    
    # Select appropriate input shape and data
    if 'csp' in name:
        if name == 'csp_enhanced':
            model = build_fn(shape_csp1d)
            gen, steps, val_x = train_gen_csp1d, steps_csp1d, Xvl_csp
        else:  # lstm_csp
            model = build_fn(shape_csp1d)
            gen, steps, val_x = train_gen_csp1d, steps_csp1d, Xvl_csp
    else:  # simple_raw
        model = build_fn(shape_raw)
        gen, steps, val_x = train_gen_raw, steps_raw, X_val_raw

    history = model.fit(
        gen, 
        steps_per_epoch=steps,
        validation_data=(val_x, yvl_oh),
        epochs=100,
        callbacks=get_callbacks(name),
        verbose=2
    )

    # Evaluate
    preds = np.argmax(model.predict(val_x), axis=1)
    f1 = f1_score(y_val, preds)
    print(f"{name} → val F1 = {f1:.4f}")
    print(classification_report(y_val, preds, target_names=["Left", "Right"]))
    results[name] = (f1, model)

# --- 11) Save best model ---
if results:
    best_name, (best_f1, best_model) = max(results.items(), key=lambda kv: kv[1][0])
    print(f"\n=== Final best: {best_name} (F1={best_f1:.4f}) ===")
    best_model.save(os.path.join(output_dir, 'best_final.h5'))
else:
    print("No models trained successfully")

Computing rank from data with rank=None
    Using tolerance 0.45 (2.2e-16 eps * 2 dim * 1e+15  max singular value)
    Estimated rank (data): 2
    data: rank 2 computed from 2 data channels with 0 projectors
Reducing data rank from 2 -> 2
Estimating class=0 covariance using EMPIRICAL
Done.
Estimating class=1 covariance using EMPIRICAL
Done.

>>> Training simple_raw
Epoch 1/100


2025-06-26 06:13:37.963197: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


38/38 - 18s - 465ms/step - accuracy: 0.5078 - f1_score: 0.5015 - loss: 0.7840 - val_accuracy: 0.4600 - val_f1_score: 0.5424 - val_loss: 0.7281 - learning_rate: 0.0010
Epoch 2/100
38/38 - 12s - 310ms/step - accuracy: 0.5062 - f1_score: 0.5407 - loss: 0.7454 - val_accuracy: 0.4600 - val_f1_score: 0.5424 - val_loss: 0.7680 - learning_rate: 9.9975e-04
Epoch 3/100
38/38 - 11s - 294ms/step - accuracy: 0.5181 - f1_score: 0.5769 - loss: 0.7203 - val_accuracy: 0.5000 - val_f1_score: 0.6032 - val_loss: 0.7297 - learning_rate: 9.9877e-04
Epoch 4/100
38/38 - 12s - 308ms/step - accuracy: 0.5086 - f1_score: 0.5305 - loss: 0.7128 - val_accuracy: 0.4600 - val_f1_score: 0.5091 - val_loss: 0.6972 - learning_rate: 9.9655e-04
Epoch 5/100
38/38 - 11s - 301ms/step - accuracy: 0.5152 - f1_score: 0.5879 - loss: 0.7142 - val_accuracy: 0.5600 - val_f1_score: 0.0000e+00 - val_loss: 0.6826 - learning_rate: 9.9262e-04
Epoch 6/100
38/38 - 11s - 298ms/step - accuracy: 0.5325 - f1_score: 0.5894 - loss: 0.6994 - val_a

In [None]:
import numpy as np
import pandas as pd
import os
import joblib
import pywt
from scipy import signal, stats
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from tensorflow.keras.models import Model, save_model
from tensorflow.keras.layers import (Input, Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, 
                                     Dense, Flatten, LSTM, Reshape, Dropout, BatchNormalization,
                                     Attention, Multiply, GlobalAveragePooling1D, Permute, 
                                     concatenate, SimpleRNN, GRU)
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers
from tensorflow.keras.activations import elu, relu
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from mne.decoding import CSP

# Configuration
DATA_ROOT = '/kaggle/input/preprocessed-mtc-aic'  # Root directory of the dataset
BASE_DATA_PATH = os.path.join(DATA_ROOT, 'mtc-aic3_dataset_preprocessed')  # Full path to preprocessed data
FS = 250  # Sampling rate
MI_CHANNELS = ['C3', 'CZ', 'C4']  # Focus on central channels for motor imagery
CHANNEL_INDICES = [1, 2, 3]  # Indices of C3, CZ, C4 in the original data
N_CHANNELS = len(MI_CHANNELS)
CLASSES = ['left', 'right']
N_CLASSES = len(CLASSES)
RESULTS_DIR = '/kaggle/working/results'
os.makedirs(RESULTS_DIR, exist_ok=True)

# Load dataframes
train_df = pd.read_csv(os.path.join(BASE_DATA_PATH, 'train.csv'))
val_df = pd.read_csv(os.path.join(BASE_DATA_PATH, 'validation.csv'))
test_df = pd.read_csv(os.path.join(BASE_DATA_PATH, 'test.csv'))

# Filter MI trials and focus on central channels
def filter_mi_channels(df):
    mi_df = df[df['task'] == 'MI'].copy()
    return mi_df

train_mi = filter_mi_channels(train_df)
val_mi = filter_mi_channels(val_df)
test_mi = filter_mi_channels(test_df)

# Load preprocessed data and select MI channels
def load_data(df):
    data = []
    for path in df['processed_path']:
        # Construct full path by joining with DATA_ROOT
        full_path = os.path.join(DATA_ROOT, path.lstrip('./'))
        with np.load(full_path) as npz_file:
            full_data = npz_file['data']
            # Select only central channels (C3, CZ, C4)
            data.append(full_data[:, CHANNEL_INDICES])
    return np.array(data)

X_train_raw = load_data(train_mi)
X_val_raw = load_data(val_mi)
X_test_raw = load_data(test_mi)  # Test data for final predictions

# Convert labels - handle case sensitivity
def map_label(label):
    # Normalize label to lowercase for case-insensitive matching
    normalized = label.strip().lower()
    if normalized == 'left':
        return 0
    elif normalized == 'right':
        return 1
    else:
        raise ValueError(f"Invalid label: {label}")

# Use case-insensitive column lookup
def get_label_column(df):
    for col in df.columns:
        if col.lower() == 'label':
            return col
    return None  # Return None if not found

# Get labels for train and validation
label_col_train = get_label_column(train_mi)
label_col_val = get_label_column(val_mi)

if label_col_train is None or label_col_val is None:
    raise KeyError("No 'label' column found in training or validation data")

y_train = train_mi[label_col_train].apply(map_label).values
y_val = val_mi[label_col_val].apply(map_label).values

# Standardize data
scaler = StandardScaler()
X_train_raw = scaler.fit_transform(
    X_train_raw.reshape(-1, X_train_raw.shape[-1])
).reshape(X_train_raw.shape)
X_val_raw = scaler.transform(
    X_val_raw.reshape(-1, X_val_raw.shape[-1])
).reshape(X_val_raw.shape)
X_test_raw = scaler.transform(
    X_test_raw.reshape(-1, X_test_raw.shape[-1])
).reshape(X_test_raw.shape)

# Frequency bands for feature extraction
FREQ_BANDS = {
    'delta': (1, 4),
    'theta': (4, 8),
    'alpha': (8, 13),
    'beta': (13, 30),
    'gamma': (30, 45)
}

# Feature extraction functions
def compute_stft(data, nperseg=256, noverlap=192):
    n_trials, n_times, n_channels = data.shape
    f, t, Zxx = signal.stft(data[:, :, 0], fs=FS, nperseg=nperseg, noverlap=noverlap, axis=1)
    n_freq, n_time = Zxx.shape[1:3]
    stft_data = np.zeros((n_trials, n_time, n_freq, n_channels), dtype=np.float32)
    
    for c in range(n_channels):
        _, _, Zxx = signal.stft(data[:, :, c], fs=FS, nperseg=nperseg, noverlap=noverlap, axis=1)
        stft_data[..., c] = np.abs(Zxx).transpose(0, 2, 1)
    return stft_data

# Fixed CSP feature extraction function
def compute_csp_features(X, y, n_components=4):
    """Return full CSP time-series (n_trials, timesteps, n_components)"""
    # Create CSP with transform_into='csp_space' to get time-series output
    # Set log=None as required by transform_into='csp_space'
    csp = CSP(n_components=n_components, reg=None, log=None, norm_trace=False,
              transform_into='csp_space')
    # Input shape: (trials, channels, time)
    X_csp_time = csp.fit_transform(X.transpose(0, 2, 1), y)
    # Output is (trials, components, time) -> transpose to (trials, time, components)
    return X_csp_time.transpose(0, 2, 1), csp

# Compute STFT features
X_train_stft = compute_stft(X_train_raw)
X_val_stft = compute_stft(X_val_raw)

# Print STFT shapes for verification
print("\n" + "="*50)
print("STFT Shape Verification")
print("="*50)
print(f"STFT Training Shape: {X_train_stft.shape}")
print(f"STFT Validation Shape: {X_val_stft.shape}")
print("="*50 + "\n")

# Compute CSP features
X_train_csp, csp = compute_csp_features(X_train_raw, y_train)
# Transform validation set (ensure same format as training)
X_val_csp = csp.transform(X_val_raw.transpose(0, 2, 1)).transpose(0, 2, 1)

# Print CSP shapes for verification
print("\n" + "="*50)
print("CSP Shape Verification")
print("="*50)
print(f"CSP Training Shape: {X_train_csp.shape}")
print(f"CSP Validation Shape: {X_val_csp.shape}")
print("="*50 + "\n")

# Handcrafted feature extraction (focused on MI channels)
def extract_handcrafted_features(X):
    """Extract domain-specific features for traditional ML models"""
    n_trials, n_timesteps, n_channels = X.shape
    features = []
    
    for i in range(n_trials):
        trial_features = []
        
        # Channel-specific features
        for ch in range(n_channels):
            channel_data = X[i, :, ch]
            
            # Time-domain features
            trial_features.append(np.mean(channel_data))
            trial_features.append(np.std(channel_data))
            trial_features.append(stats.skew(channel_data))
            trial_features.append(stats.kurtosis(channel_data))
            trial_features.append(np.median(np.abs(channel_data)))
            
            # Frequency-domain features
            f, Pxx = signal.welch(channel_data, fs=FS, nperseg=256)
            for band, (low, high) in FREQ_BANDS.items():
                band_mask = (f >= low) & (f <= high)
                trial_features.append(np.log1p(np.sum(Pxx[band_mask])))
        
        # Cross-channel features (C3-C4 asymmetry - most important for MI)
        c3_data = X[i, :, 0]  # C3 is first channel
        c4_data = X[i, :, 2]  # C4 is third channel
            
        # Time-domain asymmetry
        trial_features.append(np.mean(c3_data - c4_data))
        trial_features.append(np.mean(np.abs(c3_data) - np.mean(np.abs(c4_data))))
        
        # Frequency-domain asymmetry
        for band in FREQ_BANDS:
            c3_band = trial_features[5*0 + 4 + list(FREQ_BANDS.keys()).index(band) + 1]
            c4_band = trial_features[5*2 + 4 + list(FREQ_BANDS.keys()).index(band) + 1]
            trial_features.append(c3_band - c4_band)
        
        # Hjorth parameters
        def hjorth_parameters(data):
            diff1 = np.diff(data)
            diff2 = np.diff(diff1)
            var0 = np.var(data)
            var1 = np.var(diff1)
            var2 = np.var(diff2)
            activity = var0
            mobility = np.sqrt(var1 / var0)
            complexity = np.sqrt(var2 / var1) / mobility
            return activity, mobility, complexity
        
        for ch, name in zip([0, 2], ['C3', 'C4']):
            activity, mobility, complexity = hjorth_parameters(X[i, :, ch])
            trial_features.extend([activity, mobility, complexity])
        
        features.append(trial_features)
    
    return np.array(features)

# Data augmentation for CNN models
def augment_data(X, y, augmentation_factor=1):
    X_aug = [X]
    y_aug = [y]
    
    for _ in range(augmentation_factor):
        # Gaussian noise
        noise = np.random.normal(0, 0.05, X.shape)
        X_aug.append(X + noise)
        y_aug.append(y)
        
        # Time warping
        warp_factor = 0.2
        warp_points = int(X.shape[1] * warp_factor)
        X_warped = np.zeros_like(X)
        for i in range(X.shape[0]):
            start = np.random.randint(0, warp_points)
            end = np.random.randint(X.shape[1] - warp_points, X.shape[1])
            for c in range(X.shape[2]):
                X_warped[i, :, c] = np.interp(
                    np.arange(X.shape[1]),
                    np.linspace(0, X.shape[1]-1, num=end-start),
                    X[i, start:end, c]
                )
        X_aug.append(X_warped)
        y_aug.append(y)
    
    return np.vstack(X_aug), np.hstack(y_aug)

# Model definitions
def build_model1(input_shape):
    model = tf.keras.Sequential([
        Conv1D(32, 5, activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling1D(2),
        Conv1D(64, 5, activation='relu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Flatten(),
        Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
        Dropout(0.5),
        Dense(N_CLASSES, activation='softmax')
    ])
    return model

def build_model2(input_shape):
    model = tf.keras.Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dense(N_CLASSES, activation='softmax')
    ])
    return model

def build_model3(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv1D(32, 5, activation='elu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='elu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(128, 5, activation='elu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(256, 5, activation='elu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Flatten()(x)
    x = Dense(256, activation='elu', kernel_regularizer=regularizers.l2(0.001))(x)
    x = Dropout(0.5)(x)
    outputs = Dense(N_CLASSES, activation='softmax')(x)
    return Model(inputs, outputs)

def build_model4(input_shape):
    model = tf.keras.Sequential([
        Conv1D(32, 5, activation='elu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling1D(2),
        Conv1D(64, 5, activation='elu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Conv1D(128, 5, activation='elu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Flatten(),
        Dense(128, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
        Dropout(0.4),
        Dense(N_CLASSES, activation='softmax')
    ])
    return model

def build_model5(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv1D(32, 5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(128, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = LSTM(64, return_sequences=True)(x)
    x = LSTM(32)(x)
    x = Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    x = Dropout(0.5)(x)
    x = Dense(64, activation='relu')(x)
    outputs = Dense(N_CLASSES, activation='softmax')(x)
    return Model(inputs, outputs)

def build_model6(input_shape):
    model = tf.keras.Sequential([
        Conv1D(32, 5, activation='elu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling1D(2),
        Conv1D(64, 5, activation='elu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Conv1D(128, 5, activation='elu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Conv1D(256, 5, activation='elu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Flatten(),
        Dense(512, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
        Dropout(0.6),
        Dense(256, activation='elu'),
        Dense(128, activation='elu'),
        Dense(N_CLASSES, activation='softmax')
    ])
    return model

def build_model7(input_shape):
    inputs = Input(shape=input_shape)
    
    # CNN Branch
    x = Conv1D(32, 5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    
    # Self-Attention
    attention = Dense(1, activation='tanh')(x)
    attention = Flatten()(attention)
    attention = tf.keras.activations.softmax(attention)
    attention = Reshape((-1, 1))(attention)
    x = Multiply()([x, attention])
    
    x = Flatten()(x)
    x = Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    x = Dropout(0.5)(x)
    outputs = Dense(N_CLASSES, activation='softmax')(x)
    return Model(inputs, outputs)

def build_model8(input_shape):
    inputs = Input(shape=input_shape)
    
    # CNN Part
    x = Conv1D(32, 5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Flatten()(x)
    
    # MLP Part
    x = Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    x = Dropout(0.5)(x)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(N_CLASSES, activation='softmax')(x)
    return Model(inputs, outputs)

def build_model9(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv1D(32, 5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    regression = Dense(1, activation='linear')(x)
    outputs = Dense(N_CLASSES, activation='softmax')(x)
    return Model(inputs, [outputs, regression])

# Simplified Regression Model
def build_model9_simple(input_shape):
    """Single-output version without regression head"""
    inputs = Input(shape=input_shape)
    x = Conv1D(32, 5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    outputs = Dense(N_CLASSES, activation='softmax')(x)
    return Model(inputs, outputs)

def build_model10(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv1D(32, 5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Flatten()(x)
    feature_extractor = Model(inputs, x)
    return feature_extractor

def build_model11(input_shape):
    return build_model10(input_shape)

def build_model12(input_shape):
    # SVM model will use handcrafted features
    pass

def build_model13(input_shape):
    # Logistic Regression model will use handcrafted features
    pass

# Model configurations with updated STFT_CNN input shape
models_config = [
    {"name": "SimpleCNN", "build_func": build_model1, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "keras"},
    {"name": "STFT_CNN", "build_func": build_model2, "input_shape": (37, 129, N_CHANNELS), "rep": "stft", "type": "keras"},  # Updated shape
    {"name": "7Conv_ELU", "build_func": build_model3, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "keras"},
    {"name": "3Conv_ELU", "build_func": build_model4, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "keras"},
    {"name": "CNN_LSTM", "build_func": build_model5, "input_shape": (2250, 4), "rep": "csp", "type": "keras"},
    {"name": "CSP_CNN", "build_func": build_model6, "input_shape": (2250, 4), "rep": "csp", "type": "keras"},
    {"name": "CNN_Attention", "build_func": build_model7, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "keras"},
    {"name": "CNN_MLP", "build_func": build_model8, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "keras"},
    {"name": "CNN_Regression", "build_func": build_model9_simple, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "keras"},
    {"name": "CNN_RF", "build_func": build_model10, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "hybrid"},
    {"name": "CNN_XGB", "build_func": build_model11, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "hybrid"},
    {"name": "Handcrafted_SVM", "type": "handcrafted"},
    {"name": "Handcrafted_LR", "type": "handcrafted"},
]

# Training and evaluation functions
def train_evaluate_keras_model(model_cfg, X_train, y_train, X_val, y_val):
    model_name = model_cfg["name"]
    print(f"\n{'='*50}\nTraining {model_name}\n{'='*50}")
    
    # Data preparation
    if model_cfg["rep"] == "raw":
        X_tr, X_v = X_train, X_val
    elif model_cfg["rep"] == "stft":
        X_tr, X_v = X_train_stft, X_val_stft
    elif model_cfg["rep"] == "csp":
        X_tr, X_v = X_train_csp, X_val_csp
    
    # Data augmentation
    if model_cfg["rep"] == "raw":
        X_tr, y_tr = augment_data(X_tr, y_train, augmentation_factor=2)
    else:
        X_tr, y_tr = X_tr, y_train

    expected_shape = model_cfg["input_shape"]
    if X_tr.shape[1:] != expected_shape:
        raise ValueError(f"Input shape mismatch! Expected {expected_shape}, "f"got {X_tr.shape[1:]} for {model_cfg['name']}")
    
    # Build and compile model
    model = model_cfg["build_func"](model_cfg["input_shape"])
    
    # Special compilation for regression hybrid
    if model_name == "CNN_Regression":
        model.compile(optimizer=Adam(learning_rate=0.001),
                      loss=['sparse_categorical_crossentropy', 'mse'],
                      metrics={'dense_2': 'accuracy'},
                      loss_weights=[0.9, 0.1])
    else:
        model.compile(optimizer=Adam(learning_rate=0.001),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
    
    # Callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),
        ModelCheckpoint(os.path.join(RESULTS_DIR, f"{model_name}_best_model.keras"), 
                        save_best_only=True, monitor='val_accuracy'),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    ]
    
    # Train model
    history = model.fit(
        X_tr, y_tr,
        validation_data=(X_v, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=1
    )
    
    # Save final model
    model.save(os.path.join(RESULTS_DIR, f"{model_name}_final_model.keras"))
    
    y_proba = model.predict(X_v)
    y_pred = np.argmax(y_proba, axis=1)

    # Calculate weighted F1 score
    f1_weighted = f1_score(y_val, y_pred, average='weighted')
    print(f"\n✅ {model_cfg['name']} Validation Weighted F1: {f1_weighted:.4f}")
    
    return generate_reports(model_name, y_val, y_pred, y_proba)

def train_evaluate_hybrid_model(model_cfg, X_train, y_train, X_val, y_val):
    model_name = model_cfg["name"]
    print(f"\nTraining {model_name} with CNN feature extraction")
    
    # Feature extraction with batch processing
    feature_extractor = model_cfg["build_func"](model_cfg["input_shape"])
    
    # Process training data in batches
    batch_size = 64  # Reduced batch size to prevent memory issues
    X_train_features = []
    for i in range(0, len(X_train), batch_size):
        batch = X_train[i:i+batch_size]
        features = feature_extractor.predict(batch, verbose=0)
        X_train_features.append(features)
    X_train_features = np.vstack(X_train_features)
    
    # Process validation data in batches
    X_val_features = []
    for i in range(0, len(X_val), batch_size):
        batch = X_val[i:i+batch_size]
        features = feature_extractor.predict(batch, verbose=0)
        X_val_features.append(features)
    X_val_features = np.vstack(X_val_features)
    
    # Train traditional model
    if "RF" in model_name:
        model = RandomForestClassifier(n_estimators=300, max_depth=15, 
                                      min_samples_split=5, n_jobs=-1,
                                      class_weight='balanced', random_state=42)
    else:  # XGBoost
        # Calculate scale_pos_weight for binary classification
        num_pos = np.sum(y_train == 1)
        num_neg = len(y_train) - num_pos
        scale_pos_weight = num_neg / num_pos if num_pos > 0 else 1.0
        
        model = XGBClassifier(n_estimators=500, max_depth=8, learning_rate=0.05,
                             subsample=0.8, colsample_bytree=0.8, 
                             scale_pos_weight=scale_pos_weight,
                             use_label_encoder=False, eval_metric='logloss')
    
    model.fit(X_train_features, y_train)
    
    # Evaluate
    y_pred = model.predict(X_val_features)
    y_proba = model.predict_proba(X_val_features) if hasattr(model, "predict_proba") else None
    
    # Save model
    joblib.dump(model, os.path.join(RESULTS_DIR, f"{model_name}_model.joblib"))
    
    return generate_reports(model_name, y_val, y_pred, y_proba)

def train_evaluate_handcrafted_model(model_cfg, X_train, y_train, X_val, y_val):
    model_name = model_cfg["name"]
    print(f"\nTraining {model_name} with hand-crafted features")
    
    # Extract features
    X_train_feats = extract_handcrafted_features(X_train)
    X_val_feats = extract_handcrafted_features(X_val)
    
    # Build model
    if "SVM" in model_name:
        # SVM with class weighting
        num_pos = np.sum(y_train == 1)
        num_neg = len(y_train) - num_pos
        class_weight = {0: 1, 1: num_neg/num_pos} if num_pos > 0 else {0: 1, 1: 1}
        
        model = SVC(
            C=1.0,
            kernel='rbf',
            gamma='scale',
            class_weight=class_weight,
            probability=True,
            random_state=42
        )
    else:  # Logistic Regression
        model = LogisticRegression(
            penalty='elasticnet',
            solver='saga',
            C=1.0,
            l1_ratio=0.5,
            class_weight='balanced',
            max_iter=1000,
            random_state=42
        )
    
    # Train model
    model.fit(X_train_feats, y_train)
    
    # Evaluate
    y_pred = model.predict(X_val_feats)
    y_proba = model.predict_proba(X_val_feats) if hasattr(model, "predict_proba") else None
    
    # Save model
    joblib.dump(model, os.path.join(RESULTS_DIR, f"{model_name}_model.joblib"))
    
    return generate_reports(model_cfg['name'], y_val, y_pred, y_proba)

def generate_reports(model_name, y_true, y_pred, y_proba=None):
    # Generate classification report
    clf_report = classification_report(y_true, y_pred, target_names=CLASSES)
    cm = confusion_matrix(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    acc = accuracy_score(y_true, y_pred)
    
    print(f"\nClassification Report for {model_name}:\n{clf_report}")
    print(f"Confusion Matrix for {model_name}:\n{cm}")
    print(f"✅ Validation F1 Score: {f1:.4f}, Accuracy: {acc:.4f}")
    
    # Create enhanced confusion matrix with percentages and counts
    cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
    cm_percent = np.round(cm_percent, 1)  # Round to 1 decimal place
    
    plt.figure(figsize=(8, 6))
    ax = sns.heatmap(cm_percent, annot=False, fmt=".1f", cmap="Blues",
                    xticklabels=CLASSES, yticklabels=CLASSES,
                    cbar=True, linewidths=1, linecolor='gray')
    
    # Add percentage annotations
    for i in range(len(CLASSES)):
        for j in range(len(CLASSES)):
            color = "white" if cm_percent[i, j] > 50 else "black"
            ax.text(j + 0.5, i + 0.3, 
                    f"{cm_percent[i, j]:.1f}%", 
                    ha='center', va='center', 
                    color=color, fontsize=10)
            # Add count below percentage
            ax.text(j + 0.5, i + 0.7, 
                    f"({cm[i, j]})", 
                    ha='center', va='center', 
                    color=color, fontsize=9)
    
    plt.title(f'{model_name} Confusion Matrix\nAccuracy: {acc:.4f}, F1: {f1:.4f}', fontsize=14)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.ylabel('True Label', fontsize=12)
    plt.xticks(fontsize=10)
    plt.yticks(fontsize=10, rotation=0)
    
    # Add border
    for _, spine in ax.spines.items():
        spine.set_visible(True)
        spine.set_linewidth(1.5)
    
    plt.tight_layout()
    plt.savefig(os.path.join(RESULTS_DIR, f'{model_name}_cm.png'), dpi=300)
    plt.close()
    
    # Save classification report to text file
    with open(os.path.join(RESULTS_DIR, f'{model_name}_report.txt'), 'w') as f:
        f.write(f"Model: {model_name}\n")
        f.write(f"Accuracy: {acc:.4f}\n")
        f.write(f"F1 Score (weighted): {f1:.4f}\n\n")
        f.write("Classification Report:\n")
        f.write(clf_report)
        f.write("\n\nConfusion Matrix (counts):\n")
        f.write(np.array2string(cm, separator=', '))
    
    return {
        'model': model_name,
        'accuracy': acc,
        'f1_score': f1,
        'y_true': y_true,
        'y_pred': y_pred,
        'y_proba': y_proba
    }

# Main training and validation loop
results = []
for model_cfg in models_config:
    try:
        if model_cfg["type"] == "keras":
            model_result = train_evaluate_keras_model(
                model_cfg, 
                X_train_raw, y_train,
                X_val_raw, y_val
            )
        elif model_cfg["type"] == "hybrid":
            model_result = train_evaluate_hybrid_model(
                model_cfg, 
                X_train_raw, y_train,
                X_val_raw, y_val
            )
        elif model_cfg["type"] == "handcrafted":
            model_result = train_evaluate_handcrafted_model(
                model_cfg, 
                X_train_raw, y_train,
                X_val_raw, y_val
            )
        results.append(model_result)
    except Exception as e:
        print(f"Error training {model_cfg['name']}: {str(e)}")

# Save results
results_df = pd.DataFrame([{
    'model': r['model'],
    'accuracy': r['accuracy'],
    'f1_score': r['f1_score']
} for r in results])

results_df.to_csv(os.path.join(RESULTS_DIR, 'model_results.csv'), index=False)

# Find best models
best_acc_model = results_df.loc[results_df['accuracy'].idxmax()]
best_f1_model = results_df.loc[results_df['f1_score'].idxmax()]

print("\n\n=== FINAL RESULTS ===")
print(f"Best Accuracy Model: {best_acc_model['model']} (Accuracy: {best_acc_model['accuracy']:.4f})")
print(f"Best F1 Model: {best_f1_model['model']} (F1 Score: {best_f1_model['f1_score']:.4f})")

# Generate predictions on test set using the best model
print("\nGenerating predictions on test set...")
best_model_name = best_f1_model['model']
best_model_type = next((m['type'] for m in models_config if m['name'] == best_model_name), None)

def predict_in_batches(model, X, batch_size=64):
    """Make predictions in batches to avoid memory issues"""
    predictions = []
    for i in range(0, len(X), batch_size):
        batch = X[i:i+batch_size]
        batch_pred = model.predict(batch, verbose=0)
        predictions.append(batch_pred)
    return np.vstack(predictions)

if best_model_type == "keras":
    # Load the best keras model
    model = tf.keras.models.load_model(os.path.join(RESULTS_DIR, f"{best_model_name}_best_model.keras"))
    test_predictions = predict_in_batches(model, X_test_raw)
    test_pred_classes = np.argmax(test_predictions, axis=1)
elif best_model_type == "hybrid":
    # Load feature extractor and classifier
    model_cfg = next(m for m in models_config if m['name'] == best_model_name)
    feature_extractor = model_cfg["build_func"](model_cfg["input_shape"])
    
    # Extract test features in batches
    X_test_features = []
    batch_size = 64
    for i in range(0, len(X_test_raw), batch_size):
        batch = X_test_raw[i:i+batch_size]
        features = feature_extractor.predict(batch, verbose=0)
        X_test_features.append(features)
    X_test_features = np.vstack(X_test_features)
    
    model = joblib.load(os.path.join(RESULTS_DIR, f"{best_model_name}_model.joblib"))
    test_pred_classes = model.predict(X_test_features)
else:  # handcrafted
    model = joblib.load(os.path.join(RESULTS_DIR, f"{best_model_name}_model.joblib"))
    X_test_feats = extract_handcrafted_features(X_test_raw)
    test_pred_classes = model.predict(X_test_feats)

# Map predictions back to class names
test_pred_labels = [CLASSES[i] for i in test_pred_classes]

# Save test predictions
test_predictions_df = pd.DataFrame({
    'processed_path': test_mi['processed_path'],
    'prediction': test_pred_labels
})
test_predictions_df.to_csv(os.path.join(RESULTS_DIR, 'test_predictions.csv'), index=False)

print("\nTest predictions saved to:", os.path.join(RESULTS_DIR, 'test_predictions.csv'))
print("Detailed results saved to:", RESULTS_DIR)

In [None]:
# ============== PART 1: MODELS 1-6 ==============

import numpy as np
import pandas as pd
import os
import joblib
import pywt
from scipy import signal, stats
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from tensorflow.keras.models import Model, save_model
from tensorflow.keras.layers import (Input, Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, 
                                     Dense, Flatten, LSTM, Reshape, Dropout, BatchNormalization,
                                     Attention, Multiply, GlobalAveragePooling1D, Permute, 
                                     concatenate, SimpleRNN, GRU)
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers
from tensorflow.keras.activations import elu, relu
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from mne.decoding import CSP

# Configuration
DATA_ROOT = '/kaggle/input/preprocessed-mtc-aic'  # Root directory of the dataset
BASE_DATA_PATH = os.path.join(DATA_ROOT, 'mtc-aic3_dataset_preprocessed')  # Full path to preprocessed data
FS = 250  # Sampling rate
MI_CHANNELS = ['C3', 'CZ', 'C4']  # Focus on central channels for motor imagery
CHANNEL_INDICES = [1, 2, 3]  # Indices of C3, CZ, C4 in the original data
N_CHANNELS = len(MI_CHANNELS)
CLASSES = ['left', 'right']
N_CLASSES = len(CLASSES)
RESULTS_DIR = '/kaggle/working/results'
os.makedirs(RESULTS_DIR, exist_ok=True)

# Load dataframes
train_df = pd.read_csv(os.path.join(BASE_DATA_PATH, 'train.csv'))
val_df = pd.read_csv(os.path.join(BASE_DATA_PATH, 'validation.csv'))
test_df = pd.read_csv(os.path.join(BASE_DATA_PATH, 'test.csv'))

# Filter MI trials and focus on central channels
def filter_mi_channels(df):
    mi_df = df[df['task'] == 'MI'].copy()
    return mi_df

train_mi = filter_mi_channels(train_df)
val_mi = filter_mi_channels(val_df)
test_mi = filter_mi_channels(test_df)

# Load preprocessed data and select MI channels
def load_data(df):
    data = []
    for path in df['processed_path']:
        # Construct full path by joining with DATA_ROOT
        full_path = os.path.join(DATA_ROOT, path.lstrip('./'))
        with np.load(full_path) as npz_file:
            full_data = npz_file['data']
            # Select only central channels (C3, CZ, C4)
            data.append(full_data[:, CHANNEL_INDICES])
    return np.array(data)

X_train_raw = load_data(train_mi)
X_val_raw = load_data(val_mi)
X_test_raw = load_data(test_mi)  # Test data for final predictions

# Convert labels - handle case sensitivity
def map_label(label):
    # Normalize label to lowercase for case-insensitive matching
    normalized = label.strip().lower()
    if normalized == 'left':
        return 0
    elif normalized == 'right':
        return 1
    else:
        raise ValueError(f"Invalid label: {label}")

# Use case-insensitive column lookup
def get_label_column(df):
    for col in df.columns:
        if col.lower() == 'label':
            return col
    return None  # Return None if not found

# Get labels for train and validation
label_col_train = get_label_column(train_mi)
label_col_val = get_label_column(val_mi)

if label_col_train is None or label_col_val is None:
    raise KeyError("No 'label' column found in training or validation data")

y_train = train_mi[label_col_train].apply(map_label).values
y_val = val_mi[label_col_val].apply(map_label).values

# Standardize data
scaler = StandardScaler()
X_train_raw = scaler.fit_transform(
    X_train_raw.reshape(-1, X_train_raw.shape[-1])
).reshape(X_train_raw.shape)
X_val_raw = scaler.transform(
    X_val_raw.reshape(-1, X_val_raw.shape[-1])
).reshape(X_val_raw.shape)
X_test_raw = scaler.transform(
    X_test_raw.reshape(-1, X_test_raw.shape[-1])
).reshape(X_test_raw.shape)

# Frequency bands for feature extraction
FREQ_BANDS = {
    'delta': (1, 4),
    'theta': (4, 8),
    'alpha': (8, 13),
    'beta': (13, 30),
    'gamma': (30, 45)
}

# Feature extraction functions - UPDATED STFT FUNCTION
def compute_stft(data, nperseg=256, noverlap=192):
    n_trials, n_times, n_channels = data.shape
    # Desired time dimension (32)
    n_time_desired = 32
    
    # Initialize array with fixed dimensions
    stft_data = np.zeros((n_trials, n_time_desired, 129, n_channels), dtype=np.float32)
    
    for c in range(n_channels):
        # Compute STFT without boundary extension
        _, _, Zxx = signal.stft(
            data[:, :, c], 
            fs=FS, 
            nperseg=nperseg, 
            noverlap=noverlap,
            axis=1,
            boundary=None  # Disable boundary extension
        )
        Zxx = np.abs(Zxx)
        
        # Current time dimension
        current_time = Zxx.shape[-1]
        
        # Pad or truncate to exactly 32 time steps
        if current_time < n_time_desired:
            # Pad with zeros at the end
            pad_width = n_time_desired - current_time
            Zxx = np.pad(Zxx, ((0, 0), (0, 0), (0, pad_width)), mode='constant')
        elif current_time > n_time_desired:
            # Truncate to desired length
            Zxx = Zxx[..., :n_time_desired]
        
        # Transpose to (trials, time, freq)
        stft_data[..., c] = Zxx.transpose(0, 2, 1)
    
    return stft_data

# Fixed CSP feature extraction function - CORRECTED TO USE 3 COMPONENTS
def compute_csp_features(X, y, n_components=3):  # Changed to 3 components
    """Return full CSP time-series (n_trials, timesteps, n_components)"""
    # Create CSP with transform_into='csp_space' to get time-series output
    # Set log=None as required by transform_into='csp_space'
    csp = CSP(n_components=n_components, reg=None, log=None, norm_trace=False,
              transform_into='csp_space')
    # Input shape: (trials, channels, time)
    X_csp_time = csp.fit_transform(X.transpose(0, 2, 1), y)
    # Output is (trials, components, time) -> transpose to (trials, time, components)
    return X_csp_time.transpose(0, 2, 1), csp

# Compute STFT features
X_train_stft = compute_stft(X_train_raw)
X_val_stft = compute_stft(X_val_raw)

# Print STFT shapes for verification
print("\n" + "="*50)
print("STFT Shape Verification")
print("="*50)
print(f"STFT Training Shape: {X_train_stft.shape}")
print(f"STFT Validation Shape: {X_val_stft.shape}")
print("="*50 + "\n")

# Compute CSP features - now using 3 components
X_train_csp, csp = compute_csp_features(X_train_raw, y_train)
# Transform validation set (ensure same format as training)
X_val_csp = csp.transform(X_val_raw.transpose(0, 2, 1)).transpose(0, 2, 1)

# Print CSP shapes for verification
print("\n" + "="*50)
print("CSP Shape Verification")
print("="*50)
print(f"CSP Training Shape: {X_train_csp.shape}")
print(f"CSP Validation Shape: {X_val_csp.shape}")
print("="*50 + "\n")

# Create unified dataset dictionary for all representations
datasets = {
    'raw': (X_train_raw, X_val_raw),
    'stft': (X_train_stft, X_val_stft),
    'csp': (X_train_csp, X_val_csp)
}

# Handcrafted feature extraction (focused on MI channels)
def extract_handcrafted_features(X):
    """Extract domain-specific features for traditional ML models"""
    n_trials, n_timesteps, n_channels = X.shape
    features = []
    
    for i in range(n_trials):
        trial_features = []
        
        # Channel-specific features
        for ch in range(n_channels):
            channel_data = X[i, :, ch]
            
            # Time-domain features
            trial_features.append(np.mean(channel_data))
            trial_features.append(np.std(channel_data))
            trial_features.append(stats.skew(channel_data))
            trial_features.append(stats.kurtosis(channel_data))
            trial_features.append(np.median(np.abs(channel_data)))
            
            # Frequency-domain features
            f, Pxx = signal.welch(channel_data, fs=FS, nperseg=256)
            for band, (low, high) in FREQ_BANDS.items():
                band_mask = (f >= low) & (f <= high)
                trial_features.append(np.log1p(np.sum(Pxx[band_mask])))
        
        # Cross-channel features (C3-C4 asymmetry - most important for MI)
        c3_data = X[i, :, 0]  # C3 is first channel
        c4_data = X[i, :, 2]  # C4 is third channel
            
        # Time-domain asymmetry
        trial_features.append(np.mean(c3_data - c4_data))
        trial_features.append(np.mean(np.abs(c3_data) - np.mean(np.abs(c4_data))))
        
        # Frequency-domain asymmetry
        for band in FREQ_BANDS:
            c3_band = trial_features[5*0 + 4 + list(FREQ_BANDS.keys()).index(band) + 1]
            c4_band = trial_features[5*2 + 4 + list(FREQ_BANDS.keys()).index(band) + 1]
            trial_features.append(c3_band - c4_band)
        
        # Hjorth parameters
        def hjorth_parameters(data):
            diff1 = np.diff(data)
            diff2 = np.diff(diff1)
            var0 = np.var(data)
            var1 = np.var(diff1)
            var2 = np.var(diff2)
            activity = var0
            mobility = np.sqrt(var1 / var0)
            complexity = np.sqrt(var2 / var1) / mobility
            return activity, mobility, complexity
        
        for ch, name in zip([0, 2], ['C3', 'C4']):
            activity, mobility, complexity = hjorth_parameters(X[i, :, ch])
            trial_features.extend([activity, mobility, complexity])
        
        features.append(trial_features)
    
    return np.array(features)

# Data augmentation for CNN models
def augment_data(X, y, augmentation_factor=1):
    X_aug = [X]
    y_aug = [y]
    
    for _ in range(augmentation_factor):
        # Gaussian noise
        noise = np.random.normal(0, 0.05, X.shape)
        X_aug.append(X + noise)
        y_aug.append(y)
        
        # Time warping
        warp_factor = 0.2
        warp_points = int(X.shape[1] * warp_factor)
        X_warped = np.zeros_like(X)
        for i in range(X.shape[0]):
            start = np.random.randint(0, warp_points)
            end = np.random.randint(X.shape[1] - warp_points, X.shape[1])
            for c in range(X.shape[2]):
                X_warped[i, :, c] = np.interp(
                    np.arange(X.shape[1]),
                    np.linspace(0, X.shape[1]-1, num=end-start),
                    X[i, start:end, c]
                )
        X_aug.append(X_warped)
        y_aug.append(y)
    
    return np.vstack(X_aug), np.hstack(y_aug)

# Model definitions
def build_model1(input_shape):
    model = tf.keras.Sequential([
        Conv1D(32, 5, activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling1D(2),
        Conv1D(64, 5, activation='relu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Flatten(),
        Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
        Dropout(0.5),
        Dense(N_CLASSES, activation='softmax')
    ])
    return model

def build_model2(input_shape):
    model = tf.keras.Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dense(N_CLASSES, activation='softmax')
    ])
    return model

def build_model3(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv1D(32, 5, activation='elu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='elu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(128, 5, activation='elu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(256, 5, activation='elu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Flatten()(x)
    x = Dense(256, activation='elu', kernel_regularizer=regularizers.l2(0.001))(x)
    x = Dropout(0.5)(x)
    outputs = Dense(N_CLASSES, activation='softmax')(x)
    return Model(inputs, outputs)

def build_model4(input_shape):
    model = tf.keras.Sequential([
        Conv1D(32, 5, activation='elu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling1D(2),
        Conv1D(64, 5, activation='elu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Conv1D(128, 5, activation='elu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Flatten(),
        Dense(128, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
        Dropout(0.4),
        Dense(N_CLASSES, activation='softmax')
    ])
    return model

def build_model5(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv1D(32, 5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(128, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = LSTM(64, return_sequences=True)(x)
    x = LSTM(32)(x)
    x = Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    x = Dropout(0.5)(x)
    x = Dense(64, activation='relu')(x)
    outputs = Dense(N_CLASSES, activation='softmax')(x)
    return Model(inputs, outputs)

def build_model6(input_shape):
    model = tf.keras.Sequential([
        Conv1D(32, 5, activation='elu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling1D(2),
        Conv1D(64, 5, activation='elu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Conv1D(128, 5, activation='elu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Conv1D(256, 5, activation='elu'),
        BatchNormalization(),
        MaxPooling1D(2),
        Flatten(),
        Dense(512, activation='elu', kernel_regularizer=regularizers.l2(0.001)),
        Dropout(0.6),
        Dense(256, activation='elu'),
        Dense(128, activation='elu'),
        Dense(N_CLASSES, activation='softmax')
    ])
    return model

# Model configurations with corrected input shapes
models_config = [
    {"name": "SimpleCNN", "build_func": build_model1, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "keras"},
    {"name": "STFT_CNN", "build_func": build_model2, "input_shape": (32, 129, N_CHANNELS), "rep": "stft", "type": "keras"},  # Corrected shape
    {"name": "7Conv_ELU", "build_func": build_model3, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "keras"},
    {"name": "3Conv_ELU", "build_func": build_model4, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "keras"},
    {"name": "CNN_LSTM", "build_func": build_model5, "input_shape": (2250, 3), "rep": "csp", "type": "keras"},  # Corrected to 3 components
    {"name": "CSP_CNN", "build_func": build_model6, "input_shape": (2250, 3), "rep": "csp", "type": "keras"},  # Corrected to 3 components
]

# Training and evaluation functions
def train_evaluate_keras_model(model_cfg, datasets, y_train, y_val):
    model_name = model_cfg["name"]
    rep = model_cfg["rep"]
    print(f"\n{'='*50}\nTraining {model_name} (using {rep} representation)\n{'='*50}")
    
    # Get data from representation dictionary
    X_tr, X_v = datasets[rep]
    
    # Data augmentation only for raw data
    if rep == "raw":
        X_tr, y_tr = augment_data(X_tr, y_train, augmentation_factor=2)
    else:
        y_tr = y_train

    # Verify input shape
    expected_shape = model_cfg["input_shape"]
    print(f"Expected input shape: {expected_shape}, Actual shape: {X_tr.shape[1:]}")
    
    if X_tr.shape[1:] != expected_shape:
        raise ValueError(
            f"Input shape mismatch in {model_name}! Expected {expected_shape}, "
            f"got {X_tr.shape[1:]}. Full shape: {X_tr.shape}"
        )
    
    # Build and compile model
    model = model_cfg["build_func"](model_cfg["input_shape"])
    
    # Special compilation for regression hybrid
    if model_name == "CNN_Regression":
        model.compile(optimizer=Adam(learning_rate=0.001),
                      loss=['sparse_categorical_crossentropy', 'mse'],
                      metrics={'dense_2': 'accuracy'},
                      loss_weights=[0.9, 0.1])
    else:
        model.compile(optimizer=Adam(learning_rate=0.001),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
    
    # Callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),
        ModelCheckpoint(os.path.join(RESULTS_DIR, f"{model_name}_best_model.keras"), 
                        save_best_only=True, monitor='val_accuracy'),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    ]
    
    # Train model
    history = model.fit(
        X_tr, y_tr,
        validation_data=(X_v, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=1
    )
    
    # Save final model
    model.save(os.path.join(RESULTS_DIR, f"{model_name}_final_model.keras"))
    
    y_proba = model.predict(X_v)
    y_pred = np.argmax(y_proba, axis=1)

    # Calculate weighted F1 score
    f1_weighted = f1_score(y_val, y_pred, average='weighted')
    print(f"\n✅ {model_cfg['name']} Validation Weighted F1: {f1_weighted:.4f}")
    
    return generate_reports(model_name, y_val, y_pred, y_proba)

def generate_reports(model_name, y_true, y_pred, y_proba=None):
    # Generate classification report
    clf_report = classification_report(y_true, y_pred, target_names=CLASSES)
    cm = confusion_matrix(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    acc = accuracy_score(y_true, y_pred)
    
    print(f"\nClassification Report for {model_name}:\n{clf_report}")
    print(f"Confusion Matrix for {model_name}:\n{cm}")
    print(f"✅ Validation F1 Score: {f1:.4f}, Accuracy: {acc:.4f}")
    
    # Create enhanced confusion matrix with percentages and counts
    cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
    cm_percent = np.round(cm_percent, 1)  # Round to 1 decimal place
    
    plt.figure(figsize=(8, 6))
    ax = sns.heatmap(cm_percent, annot=False, fmt=".1f", cmap="Blues",
                    xticklabels=CLASSES, yticklabels=CLASSES,
                    cbar=True, linewidths=1, linecolor='gray')
    
    # Add percentage annotations
    for i in range(len(CLASSES)):
        for j in range(len(CLASSES)):
            color = "white" if cm_percent[i, j] > 50 else "black"
            ax.text(j + 0.5, i + 0.3, 
                    f"{cm_percent[i, j]:.1f}%", 
                    ha='center', va='center', 
                    color=color, fontsize=10)
            # Add count below percentage
            ax.text(j + 0.5, i + 0.7, 
                    f"({cm[i, j]})", 
                    ha='center', va='center', 
                    color=color, fontsize=9)
    
    plt.title(f'{model_name} Confusion Matrix\nAccuracy: {acc:.4f}, F1: {f1:.4f}', fontsize=14)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.ylabel('True Label', fontsize=12)
    plt.xticks(fontsize=10)
    plt.yticks(fontsize=10, rotation=0)
    
    # Add border
    for _, spine in ax.spines.items():
        spine.set_visible(True)
        spine.set_linewidth(1.5)
    
    plt.tight_layout()
    plt.savefig(os.path.join(RESULTS_DIR, f'{model_name}_cm.png'), dpi=300)
    plt.close()
    
    # Save classification report to text file
    with open(os.path.join(RESULTS_DIR, f'{model_name}_report.txt'), 'w') as f:
        f.write(f"Model: {model_name}\n")
        f.write(f"Accuracy: {acc:.4f}\n")
        f.write(f"F1 Score (weighted): {f1:.4f}\n\n")
        f.write("Classification Report:\n")
        f.write(clf_report)
        f.write("\n\nConfusion Matrix (counts):\n")
        f.write(np.array2string(cm, separator=', '))
    
    return {
        'model': model_name,
        'accuracy': acc,
        'f1_score': f1,
        'y_true': y_true,
        'y_pred': y_pred,
        'y_proba': y_proba
    }

# Main training and validation loop for models 1-6
results = []
for model_cfg in models_config:
    try:
        if model_cfg["type"] == "keras":
            model_result = train_evaluate_keras_model(
                model_cfg, 
                datasets,  # Pass representations dictionary
                y_train, 
                y_val
            )
            results.append(model_result)
    except Exception as e:
        print(f"\n❌ Error training {model_cfg['name']}: {str(e)}")
        import traceback
        traceback.print_exc()

# Save results
results_df = pd.DataFrame([{
    'model': r['model'],
    'accuracy': r['accuracy'],
    'f1_score': r['f1_score']
} for r in results])

results_df.to_csv(os.path.join(RESULTS_DIR, 'model_results_part1.csv'), index=False)
print("\nPart 1 completed. Results saved for models 1-6.")

In [None]:
# ============== PART 2: MODELS 7-13 ==============

import numpy as np
import pandas as pd
import os
import joblib
import pywt
from scipy import signal, stats
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from tensorflow.keras.models import Model, save_model
from tensorflow.keras.layers import (Input, Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, 
                                     Dense, Flatten, LSTM, Reshape, Dropout, BatchNormalization,
                                     Attention, Multiply, GlobalAveragePooling1D, Permute, 
                                     concatenate, SimpleRNN, GRU)
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers
from tensorflow.keras.activations import elu, relu
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from mne.decoding import CSP

# Configuration
DATA_ROOT = '/kaggle/input/preprocessed-mtc-aic'  # Root directory of the dataset
BASE_DATA_PATH = os.path.join(DATA_ROOT, 'mtc-aic3_dataset_preprocessed')  # Full path to preprocessed data
FS = 250  # Sampling rate
MI_CHANNELS = ['C3', 'CZ', 'C4']  # Focus on central channels for motor imagery
CHANNEL_INDICES = [1, 2, 3]  # Indices of C3, CZ, C4 in the original data
N_CHANNELS = len(MI_CHANNELS)
CLASSES = ['left', 'right']
N_CLASSES = len(CLASSES)
RESULTS_DIR = '/kaggle/working/results'
os.makedirs(RESULTS_DIR, exist_ok=True)

# Load dataframes
train_df = pd.read_csv(os.path.join(BASE_DATA_PATH, 'train.csv'))
val_df = pd.read_csv(os.path.join(BASE_DATA_PATH, 'validation.csv'))
test_df = pd.read_csv(os.path.join(BASE_DATA_PATH, 'test.csv'))

# Filter MI trials and focus on central channels
def filter_mi_channels(df):
    mi_df = df[df['task'] == 'MI'].copy()
    return mi_df

train_mi = filter_mi_channels(train_df)
val_mi = filter_mi_channels(val_df)
test_mi = filter_mi_channels(test_df)

# Load preprocessed data and select MI channels
def load_data(df):
    data = []
    for path in df['processed_path']:
        # Construct full path by joining with DATA_ROOT
        full_path = os.path.join(DATA_ROOT, path.lstrip('./'))
        with np.load(full_path) as npz_file:
            full_data = npz_file['data']
            # Select only central channels (C3, CZ, C4)
            data.append(full_data[:, CHANNEL_INDICES])
    return np.array(data)

X_train_raw = load_data(train_mi)
X_val_raw = load_data(val_mi)
X_test_raw = load_data(test_mi)  # Test data for final predictions

# Convert labels - handle case sensitivity
def map_label(label):
    # Normalize label to lowercase for case-insensitive matching
    normalized = label.strip().lower()
    if normalized == 'left':
        return 0
    elif normalized == 'right':
        return 1
    else:
        raise ValueError(f"Invalid label: {label}")

# Use case-insensitive column lookup
def get_label_column(df):
    for col in df.columns:
        if col.lower() == 'label':
            return col
    return None  # Return None if not found

# Get labels for train and validation
label_col_train = get_label_column(train_mi)
label_col_val = get_label_column(val_mi)

if label_col_train is None or label_col_val is None:
    raise KeyError("No 'label' column found in training or validation data")

y_train = train_mi[label_col_train].apply(map_label).values
y_val = val_mi[label_col_val].apply(map_label).values

# Standardize data
scaler = StandardScaler()
X_train_raw = scaler.fit_transform(
    X_train_raw.reshape(-1, X_train_raw.shape[-1])
).reshape(X_train_raw.shape)
X_val_raw = scaler.transform(
    X_val_raw.reshape(-1, X_val_raw.shape[-1])
).reshape(X_val_raw.shape)
X_test_raw = scaler.transform(
    X_test_raw.reshape(-1, X_test_raw.shape[-1])
).reshape(X_test_raw.shape)

# Frequency bands for feature extraction
FREQ_BANDS = {
    'delta': (1, 4),
    'theta': (4, 8),
    'alpha': (8, 13),
    'beta': (13, 30),
    'gamma': (30, 45)
}

# Feature extraction functions
def compute_stft(data, nperseg=256, noverlap=192):
    n_trials, n_times, n_channels = data.shape
    f, t, Zxx = signal.stft(data[:, :, 0], fs=FS, nperseg=nperseg, noverlap=noverlap, axis=1)
    n_freq, n_time = Zxx.shape[1:3]
    stft_data = np.zeros((n_trials, n_time, n_freq, n_channels), dtype=np.float32)
    
    for c in range(n_channels):
        _, _, Zxx = signal.stft(data[:, :, c], fs=FS, nperseg=nperseg, noverlap=noverlap, axis=1)
        stft_data[..., c] = np.abs(Zxx).transpose(0, 2, 1)
    return stft_data

# Fixed CSP feature extraction function
def compute_csp_features(X, y, n_components=4):
    """Return full CSP time-series (n_trials, timesteps, n_components)"""
    # Create CSP with transform_into='csp_space' to get time-series output
    # Set log=None as required by transform_into='csp_space'
    csp = CSP(n_components=n_components, reg=None, log=None, norm_trace=False,
              transform_into='csp_space')
    # Input shape: (trials, channels, time)
    X_csp_time = csp.fit_transform(X.transpose(0, 2, 1), y)
    # Output is (trials, components, time) -> transpose to (trials, time, components)
    return X_csp_time.transpose(0, 2, 1), csp

# Compute STFT features
X_train_stft = compute_stft(X_train_raw)
X_val_stft = compute_stft(X_val_raw)

# Compute CSP features
X_train_csp, csp = compute_csp_features(X_train_raw, y_train)
# Transform validation set (ensure same format as training)
X_val_csp = csp.transform(X_val_raw.transpose(0, 2, 1)).transpose(0, 2, 1)

# Handcrafted feature extraction (focused on MI channels)
def extract_handcrafted_features(X):
    """Extract domain-specific features for traditional ML models"""
    n_trials, n_timesteps, n_channels = X.shape
    features = []
    
    for i in range(n_trials):
        trial_features = []
        
        # Channel-specific features
        for ch in range(n_channels):
            channel_data = X[i, :, ch]
            
            # Time-domain features
            trial_features.append(np.mean(channel_data))
            trial_features.append(np.std(channel_data))
            trial_features.append(stats.skew(channel_data))
            trial_features.append(stats.kurtosis(channel_data))
            trial_features.append(np.median(np.abs(channel_data)))
            
            # Frequency-domain features
            f, Pxx = signal.welch(channel_data, fs=FS, nperseg=256)
            for band, (low, high) in FREQ_BANDS.items():
                band_mask = (f >= low) & (f <= high)
                trial_features.append(np.log1p(np.sum(Pxx[band_mask])))
        
        # Cross-channel features (C3-C4 asymmetry - most important for MI)
        c3_data = X[i, :, 0]  # C3 is first channel
        c4_data = X[i, :, 2]  # C4 is third channel
            
        # Time-domain asymmetry
        trial_features.append(np.mean(c3_data - c4_data))
        trial_features.append(np.mean(np.abs(c3_data) - np.mean(np.abs(c4_data))))
        
        # Frequency-domain asymmetry
        for band in FREQ_BANDS:
            c3_band = trial_features[5*0 + 4 + list(FREQ_BANDS.keys()).index(band) + 1]
            c4_band = trial_features[5*2 + 4 + list(FREQ_BANDS.keys()).index(band) + 1]
            trial_features.append(c3_band - c4_band)
        
        # Hjorth parameters
        def hjorth_parameters(data):
            diff1 = np.diff(data)
            diff2 = np.diff(diff1)
            var0 = np.var(data)
            var1 = np.var(diff1)
            var2 = np.var(diff2)
            activity = var0
            mobility = np.sqrt(var1 / var0)
            complexity = np.sqrt(var2 / var1) / mobility
            return activity, mobility, complexity
        
        for ch, name in zip([0, 2], ['C3', 'C4']):
            activity, mobility, complexity = hjorth_parameters(X[i, :, ch])
            trial_features.extend([activity, mobility, complexity])
        
        features.append(trial_features)
    
    return np.array(features)

# Model definitions for 7-13
def build_model7(input_shape):
    inputs = Input(shape=input_shape)
    
    # CNN Branch
    x = Conv1D(32, 5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    
    # Self-Attention
    attention = Dense(1, activation='tanh')(x)
    attention = Flatten()(attention)
    attention = tf.keras.activations.softmax(attention)
    attention = Reshape((-1, 1))(attention)
    x = Multiply()([x, attention])
    
    x = Flatten()(x)
    x = Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    x = Dropout(0.5)(x)
    outputs = Dense(N_CLASSES, activation='softmax')(x)
    return Model(inputs, outputs)

def build_model8(input_shape):
    inputs = Input(shape=input_shape)
    
    # CNN Part
    x = Conv1D(32, 5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Flatten()(x)
    
    # MLP Part
    x = Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    x = Dropout(0.5)(x)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(N_CLASSES, activation='softmax')(x)
    return Model(inputs, outputs)

def build_model9_simple(input_shape):
    """Single-output version without regression head"""
    inputs = Input(shape=input_shape)
    x = Conv1D(32, 5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    outputs = Dense(N_CLASSES, activation='softmax')(x)
    return Model(inputs, outputs)

def build_model10(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv1D(32, 5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Conv1D(64, 5, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(2)(x)
    x = Flatten()(x)
    feature_extractor = Model(inputs, x)
    return feature_extractor

def build_model11(input_shape):
    return build_model10(input_shape)

# Model configurations for 7-13
models_config = [
    {"name": "CNN_Attention", "build_func": build_model7, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "keras"},
    {"name": "CNN_MLP", "build_func": build_model8, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "keras"},
    {"name": "CNN_Regression", "build_func": build_model9_simple, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "keras"},
    {"name": "CNN_RF", "build_func": build_model10, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "hybrid"},
    {"name": "CNN_XGB", "build_func": build_model11, "input_shape": (2250, N_CHANNELS), "rep": "raw", "type": "hybrid"},
    {"name": "Handcrafted_SVM", "type": "handcrafted"},
    {"name": "Handcrafted_LR", "type": "handcrafted"},
]

# Training and evaluation functions
def train_evaluate_keras_model(model_cfg, X_train, y_train, X_val, y_val):
    model_name = model_cfg["name"]
    print(f"\n{'='*50}\nTraining {model_name}\n{'='*50}")
    
    # Data preparation
    if model_cfg["rep"] == "raw":
        X_tr, X_v = X_train, X_val
    elif model_cfg["rep"] == "stft":
        X_tr, X_v = X_train_stft, X_val_stft
    elif model_cfg["rep"] == "csp":
        X_tr, X_v = X_train_csp, X_val_csp
    
    # Data augmentation
    if model_cfg["rep"] == "raw":
        X_tr, y_tr = augment_data(X_tr, y_train, augmentation_factor=2)
    else:
        X_tr, y_tr = X_tr, y_train

    expected_shape = model_cfg["input_shape"]
    if X_tr.shape[1:] != expected_shape:
        raise ValueError(f"Input shape mismatch! Expected {expected_shape}, "f"got {X_tr.shape[1:]} for {model_cfg['name']}")
    
    # Build and compile model
    model = model_cfg["build_func"](model_cfg["input_shape"])
    
    # Special compilation for regression hybrid
    if model_name == "CNN_Regression":
        model.compile(optimizer=Adam(learning_rate=0.001),
                      loss=['sparse_categorical_crossentropy', 'mse'],
                      metrics={'dense_2': 'accuracy'},
                      loss_weights=[0.9, 0.1])
    else:
        model.compile(optimizer=Adam(learning_rate=0.001),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
    
    # Callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),
        ModelCheckpoint(os.path.join(RESULTS_DIR, f"{model_name}_best_model.keras"), 
                        save_best_only=True, monitor='val_accuracy'),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    ]
    
    # Train model
    history = model.fit(
        X_tr, y_tr,
        validation_data=(X_v, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=1
    )
    
    # Save final model
    model.save(os.path.join(RESULTS_DIR, f"{model_name}_final_model.keras"))
    
    y_proba = model.predict(X_v)
    y_pred = np.argmax(y_proba, axis=1)

    # Calculate weighted F1 score
    f1_weighted = f1_score(y_val, y_pred, average='weighted')
    print(f"\n✅ {model_cfg['name']} Validation Weighted F1: {f1_weighted:.4f}")
    
    return generate_reports(model_name, y_val, y_pred, y_proba)

def train_evaluate_hybrid_model(model_cfg, X_train, y_train, X_val, y_val):
    model_name = model_cfg["name"]
    print(f"\nTraining {model_name} with CNN feature extraction")
    
    # Feature extraction with batch processing
    feature_extractor = model_cfg["build_func"](model_cfg["input_shape"])
    
    # Process training data in batches
    batch_size = 64  # Reduced batch size to prevent memory issues
    X_train_features = []
    for i in range(0, len(X_train), batch_size):
        batch = X_train[i:i+batch_size]
        features = feature_extractor.predict(batch, verbose=0)
        X_train_features.append(features)
    X_train_features = np.vstack(X_train_features)
    
    # Process validation data in batches
    X_val_features = []
    for i in range(0, len(X_val), batch_size):
        batch = X_val[i:i+batch_size]
        features = feature_extractor.predict(batch, verbose=0)
        X_val_features.append(features)
    X_val_features = np.vstack(X_val_features)
    
    # Train traditional model
    if "RF" in model_name:
        model = RandomForestClassifier(n_estimators=300, max_depth=15, 
                                      min_samples_split=5, n_jobs=-1,
                                      class_weight='balanced', random_state=42)
    else:  # XGBoost
        # Calculate scale_pos_weight for binary classification
        num_pos = np.sum(y_train == 1)
        num_neg = len(y_train) - num_pos
        scale_pos_weight = num_neg / num_pos if num_pos > 0 else 1.0
        
        model = XGBClassifier(n_estimators=500, max_depth=8, learning_rate=0.05,
                             subsample=0.8, colsample_bytree=0.8, 
                             scale_pos_weight=scale_pos_weight,
                             use_label_encoder=False, eval_metric='logloss')
    
    model.fit(X_train_features, y_train)
    
    # Evaluate
    y_pred = model.predict(X_val_features)
    y_proba = model.predict_proba(X_val_features) if hasattr(model, "predict_proba") else None
    
    # Save model
    joblib.dump(model, os.path.join(RESULTS_DIR, f"{model_name}_model.joblib"))
    
    return generate_reports(model_name, y_val, y_pred, y_proba)

def train_evaluate_handcrafted_model(model_cfg, X_train, y_train, X_val, y_val):
    model_name = model_cfg["name"]
    print(f"\nTraining {model_name} with hand-crafted features")
    
    # Extract features
    X_train_feats = extract_handcrafted_features(X_train)
    X_val_feats = extract_handcrafted_features(X_val)
    
    # Build model
    if "SVM" in model_name:
        # SVM with class weighting
        num_pos = np.sum(y_train == 1)
        num_neg = len(y_train) - num_pos
        class_weight = {0: 1, 1: num_neg/num_pos} if num_pos > 0 else {0: 1, 1: 1}
        
        model = SVC(
            C=1.0,
            kernel='rbf',
            gamma='scale',
            class_weight=class_weight,
            probability=True,
            random_state=42
        )
    else:  # Logistic Regression
        model = LogisticRegression(
            penalty='elasticnet',
            solver='saga',
            C=1.0,
            l1_ratio=0.5,
            class_weight='balanced',
            max_iter=1000,
            random_state=42
        )
    
    # Train model
    model.fit(X_train_feats, y_train)
    
    # Evaluate
    y_pred = model.predict(X_val_feats)
    y_proba = model.predict_proba(X_val_feats) if hasattr(model, "predict_proba") else None
    
    # Save model
    joblib.dump(model, os.path.join(RESULTS_DIR, f"{model_name}_model.joblib"))
    
    return generate_reports(model_cfg['name'], y_val, y_pred, y_proba)

def generate_reports(model_name, y_true, y_pred, y_proba=None):
    # Generate classification report
    clf_report = classification_report(y_true, y_pred, target_names=CLASSES)
    cm = confusion_matrix(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    acc = accuracy_score(y_true, y_pred)
    
    print(f"\nClassification Report for {model_name}:\n{clf_report}")
    print(f"Confusion Matrix for {model_name}:\n{cm}")
    print(f"✅ Validation F1 Score: {f1:.4f}, Accuracy: {acc:.4f}")
    
    # Create enhanced confusion matrix with percentages and counts
    cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
    cm_percent = np.round(cm_percent, 1)  # Round to 1 decimal place
    
    plt.figure(figsize=(8, 6))
    ax = sns.heatmap(cm_percent, annot=False, fmt=".1f", cmap="Blues",
                    xticklabels=CLASSES, yticklabels=CLASSES,
                    cbar=True, linewidths=1, linecolor='gray')
    
    # Add percentage annotations
    for i in range(len(CLASSES)):
        for j in range(len(CLASSES)):
            color = "white" if cm_percent[i, j] > 50 else "black"
            ax.text(j + 0.5, i + 0.3, 
                    f"{cm_percent[i, j]:.1f}%", 
                    ha='center', va='center', 
                    color=color, fontsize=10)
            # Add count below percentage
            ax.text(j + 0.5, i + 0.7, 
                    f"({cm[i, j]})", 
                    ha='center', va='center', 
                    color=color, fontsize=9)
    
    plt.title(f'{model_name} Confusion Matrix\nAccuracy: {acc:.4f}, F1: {f1:.4f}', fontsize=14)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.ylabel('True Label', fontsize=12)
    plt.xticks(fontsize=10)
    plt.yticks(fontsize=10, rotation=0)
    
    # Add border
    for _, spine in ax.spines.items():
        spine.set_visible(True)
        spine.set_linewidth(1.5)
    
    plt.tight_layout()
    plt.savefig(os.path.join(RESULTS_DIR, f'{model_name}_cm.png'), dpi=300)
    plt.close()
    
    # Save classification report to text file
    with open(os.path.join(RESULTS_DIR, f'{model_name}_report.txt'), 'w') as f:
        f.write(f"Model: {model_name}\n")
        f.write(f"Accuracy: {acc:.4f}\n")
        f.write(f"F1 Score (weighted): {f1:.4f}\n\n")
        f.write("Classification Report:\n")
        f.write(clf_report)
        f.write("\n\nConfusion Matrix (counts):\n")
        f.write(np.array2string(cm, separator=', '))
    
    return {
        'model': model_name,
        'accuracy': acc,
        'f1_score': f1,
        'y_true': y_true,
        'y_pred': y_pred,
        'y_proba': y_proba
    }

# Main training and validation loop for models 7-13
results = []
for model_cfg in models_config:
    try:
        if model_cfg["type"] == "keras":
            model_result = train_evaluate_keras_model(
                model_cfg, 
                X_train_raw, y_train,
                X_val_raw, y_val
            )
        elif model_cfg["type"] == "hybrid":
            model_result = train_evaluate_hybrid_model(
                model_cfg, 
                X_train_raw, y_train,
                X_val_raw, y_val
            )
        elif model_cfg["type"] == "handcrafted":
            model_result = train_evaluate_handcrafted_model(
                model_cfg, 
                X_train_raw, y_train,
                X_val_raw, y_val
            )
        results.append(model_result)
    except Exception as e:
        print(f"Error training {model_cfg['name']}: {str(e)}")

# Save results for Part 2
results_df_part2 = pd.DataFrame([{
    'model': r['model'],
    'accuracy': r['accuracy'],
    'f1_score': r['f1_score']
} for r in results])
results_df_part2.to_csv(os.path.join(RESULTS_DIR, 'model_results_part2.csv'), index=False)

# Load Part 1 results and combine
results_df_part1 = pd.read_csv(os.path.join(RESULTS_DIR, 'model_results_part1.csv'))
results_df = pd.concat([results_df_part1, results_df_part2], ignore_index=True)

# Find best models
best_acc_model = results_df.loc[results_df['accuracy'].idxmax()]
best_f1_model = results_df.loc[results_df['f1_score'].idxmax()]

print("\n\n=== FINAL RESULTS ===")
print(f"Best Accuracy Model: {best_acc_model['model']} (Accuracy: {best_acc_model['accuracy']:.4f})")
print(f"Best F1 Model: {best_f1_model['model']} (F1 Score: {best_f1_model['f1_score']:.4f})")

# Generate predictions on test set using the best model
print("\nGenerating predictions on test set...")
best_model_name = best_f1_model['model']
best_model_type = next((m['type'] for m in models_config if m['name'] == best_model_name), None)

def predict_in_batches(model, X, batch_size=64):
    """Make predictions in batches to avoid memory issues"""
    predictions = []
    for i in range(0, len(X), batch_size):
        batch = X[i:i+batch_size]
        batch_pred = model.predict(batch, verbose=0)
        predictions.append(batch_pred)
    return np.vstack(predictions)

if best_model_type == "keras":
    # Load the best keras model
    model = tf.keras.models.load_model(os.path.join(RESULTS_DIR, f"{best_model_name}_best_model.keras"))
    test_predictions = predict_in_batches(model, X_test_raw)
    test_pred_classes = np.argmax(test_predictions, axis=1)
elif best_model_type == "hybrid":
    # Load feature extractor and classifier
    model_cfg = next(m for m in models_config if m['name'] == best_model_name)
    feature_extractor = model_cfg["build_func"](model_cfg["input_shape"])
    
    # Extract test features in batches
    X_test_features = []
    batch_size = 64
    for i in range(0, len(X_test_raw), batch_size):
        batch = X_test_raw[i:i+batch_size]
        features = feature_extractor.predict(batch, verbose=0)
        X_test_features.append(features)
    X_test_features = np.vstack(X_test_features)
    
    model = joblib.load(os.path.join(RESULTS_DIR, f"{best_model_name}_model.joblib"))
    test_pred_classes = model.predict(X_test_features)
else:  # handcrafted
    model = joblib.load(os.path.join(RESULTS_DIR, f"{best_model_name}_model.joblib"))
    X_test_feats = extract_handcrafted_features(X_test_raw)
    test_pred_classes = model.predict(X_test_feats)

# Map predictions back to class names
test_pred_labels = [CLASSES[i] for i in test_pred_classes]

# Save test predictions
test_predictions_df = pd.DataFrame({
    'processed_path': test_mi['processed_path'],
    'prediction': test_pred_labels
})
test_predictions_df.to_csv(os.path.join(RESULTS_DIR, 'test_predictions.csv'), index=False)

print("\nTest predictions saved to:", os.path.join(RESULTS_DIR, 'test_predictions.csv'))
print("Detailed results saved to:", RESULTS_DIR)

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
import tensorflow as tf

# Function to compute best threshold for a model
def find_best_threshold(model, val_x, y_true_bin):
    probs = model.predict(val_x)[:, 1]  # Probabilities for class "Right"
    thresholds = np.linspace(0.1, 0.9, 81)
    best_f1 = 0
    best_threshold = 0.5
    for t in thresholds:
        preds = (probs > t).astype(int)
        f1 = f1_score(y_true_bin, preds)
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = t
    return best_threshold, best_f1

# Configuration for new preprocessed data
PREPROCESSED_DATASET_ROOT = '/kaggle/input/preprocessed-mtc-aic'
PREPROCESSED_BASE = os.path.join(PREPROCESSED_DATASET_ROOT, 'mtc-aic3_dataset_preprocessed')
VAL_CSV_PATH = os.path.join(PREPROCESSED_BASE, 'validation.csv')

# Load validation metadata
val_df = pd.read_csv(VAL_CSV_PATH)

# Filter for MI tasks only
mi_val_df = val_df[val_df['task'] == 'MI']

# Prepare data storage
X_list = []
y_list = []

# Load and prepare each MI trial from preprocessed files
for _, row in mi_val_df.iterrows():
    # Extract and clean the processed path
    raw_path = row['processed_path']
    
    # Remove leading './' if present
    if raw_path.startswith('./'):
        raw_path = raw_path[2:]
    
    # Construct full path to preprocessed trial data
    trial_path = os.path.join(PREPROCESSED_DATASET_ROOT, raw_path)
    
    # Load trial data (shape: [timesteps, channels])
    trial_data = np.load(trial_path)['data']
    
    # FIX: Select only C3 and C4 channels (indices 1 and 3)
    # Channel order: ['FZ', 'C3', 'CZ', 'C4', 'PZ', 'PO7', 'OZ', 'PO8']
    trial_subset = trial_data[:, [1, 3]]  # Extract ONLY C3 and C4
    
    # Reshape to [1, timesteps, channels] and add to list
    X_list.append(trial_subset[np.newaxis, ...])
    y_list.append(row['label'])

# Combine all trials
X_val_all = np.concatenate(X_list, axis=0)  # [n_trials, 2250, 2] - MATCHES MODEL INPUT
y_val = np.array(y_list)

# Convert labels to binary: "Right" = 1, "Left" = 0
y_val_bin = (y_val == "Right").astype(int)

# Model directories to evaluate
model_dirs = [
    "/kaggle/working/models3",
    "/kaggle/working/models",
    "/kaggle/working/models2"
]

# Evaluate all models
results = {}
for model_dir in model_dirs:
    if not os.path.exists(model_dir):
        print(f"Directory not found: {model_dir}")
        continue
        
    for model_file in os.listdir(model_dir):
        if model_file.endswith(".h5"):
            model_path = os.path.join(model_dir, model_file)
            try:
                print(f"Loading model: {model_path}")
                model = tf.keras.models.load_model(model_path, compile=False)
                print(f"Evaluating model: {model_path}")
                threshold, f1 = find_best_threshold(model, X_val_all, y_val_bin)
                results[model_path] = (threshold, f1)
                print(f"Completed: {model_path} → threshold={threshold:.3f}, F1={f1:.4f}")
            except Exception as e:
                results[model_path] = f"Error: {e}"
                print(f"Error with {model_path}: {e}")

# Print evaluation results
print("\nFinal Results:")
for model_path, result in results.items():
    if isinstance(result, tuple):
        threshold, f1 = result
        print(f"{model_path} → Best threshold = {threshold:.3f}, F1 score = {f1:.4f}")
    else:
        print(f"{model_path} → {result}")

In [54]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import random

# Configuration
PREPROCESSED_DATASET_ROOT = '/kaggle/input/preprocessed-mtc-aic'
PREPROCESSED_BASE = os.path.join(PREPROCESSED_DATASET_ROOT, 'mtc-aic3_dataset_preprocessed')
MODEL_PATH = '/kaggle/working/best/best_simple_raw.h5'  # Your best MI model
TEST_CSV_PATH = os.path.join(PREPROCESSED_BASE, 'test.csv')
SAMPLE_SUBMISSION_PATH = os.path.join(PREPROCESSED_BASE, 'sample_submission.csv')
OUTPUT_PATH = '/kaggle/working/submission.csv'

# Load the trained model
model = tf.keras.models.load_model(MODEL_PATH, compile=False)
print(f"Loaded model: {MODEL_PATH}")

# Load test metadata
test_df = pd.read_csv(TEST_CSV_PATH)
print(f"Loaded test metadata with {len(test_df)} trials")

# Prepare to collect predictions
predictions = []

# Process each test trial
for _, row in test_df.iterrows():
    trial_id = row['id']
    
    if row['task'] == 'MI':
        # Load and preprocess MI trial data
        raw_path = row['processed_path']
        if raw_path.startswith('./'):
            raw_path = raw_path[2:]
        trial_path = os.path.join(PREPROCESSED_DATASET_ROOT, raw_path)
        
        # Load EEG data and select C3/C4 channels
        trial_data = np.load(trial_path)['data']
        trial_subset = trial_data[:, [1, 3]]  # C3 and C4 only
        
        # Prepare for model input
        X = trial_subset[np.newaxis, ...].astype('float32')  # Shape: (1, 2250, 2)
        
        # Make prediction
        prob = model.predict(X, verbose=0)[0][1]  # Probability of "Right"
        label = "Right" if prob > 0.520 else "Left"  # Using 0.5 threshold
    
    else:  # SSVEP task
        # Randomly select a label
        label = random.choice(["Left", "Right", "Forward", "Backward"])
    
    predictions.append((trial_id, label))
    if len(predictions) % 20 == 0:
        print(f"Processed {len(predictions)}/{len(test_df)} trials")

# Create submission DataFrame
submission_df = pd.DataFrame(predictions, columns=['id', 'label'])

# Ensure correct ordering by id
submission_df = submission_df.sort_values('id')

# Save to CSV
submission_df.to_csv(OUTPUT_PATH, index=False)
print(f"Submission saved to {OUTPUT_PATH}")
print("\nFirst 5 predictions:")
print(submission_df.head())

Loaded model: /kaggle/working/best/best_simple_raw.h5
Loaded test metadata with 100 trials
Processed 20/100 trials
Processed 40/100 trials
Processed 60/100 trials
Processed 80/100 trials
Processed 100/100 trials
Submission saved to /kaggle/working/submission.csv

First 5 predictions:
     id  label
0  4901  Right
1  4902  Right
2  4903  Right
3  4904  Right
4  4905  Right


In [44]:
mkdir ./best

In [45]:
cp /kaggle/working/models/best_simple_raw.h5 /kaggle/working/best