In [3]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Conv1D, MaxPooling1D, Dense, Activation, Flatten, Input, 
    GlobalAveragePooling1D, Multiply, Add, Concatenate, BatchNormalization,
    Reshape, Dropout
)
from tensorflow.keras.models import Model
from tensorflow.keras import regularizers, optimizers
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt, iirnotch
from sklearn.preprocessing import LabelEncoder

# Configuration
BASE_PATH = '/kaggle/input/mtcaic3'
EEG_CHANNELS = ['C3', 'CZ', 'C4']  # Matching BCI Competition 2008 channels
FS = 250  # Sampling frequency
NOTCH_FREQ = 50.0  # Notch frequency (Hz)
LOWCUT = 4.0       # Bandpass low cutoff (Hz)
HIGHCUT = 40.0     # Bandpass high cutoff (Hz)
TRIAL_LENGTH = 1000  # Use first 4 seconds (1000 samples)

# Preprocessing functions
def butter_bandpass(lowcut, highcut, fs, order=4):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

def butter_notch(f0, fs, Q=30):
    w0 = f0 / (fs / 2)
    b, a = iirnotch(w0, Q)
    return b, a

def apply_filters(data, fs=FS):
    """Apply notch and bandpass filters to EEG data"""
    # Notch filter
    b_notch, a_notch = butter_notch(NOTCH_FREQ, fs)
    data = filtfilt(b_notch, a_notch, data, axis=0)
    
    # Bandpass filter
    b_band, a_band = butter_bandpass(LOWCUT, HIGHCUT, fs)
    data = filtfilt(b_band, a_band, data, axis=0)
    
    return data

def normalize(data):
    """Z-score normalization per channel"""
    return (data - np.mean(data, axis=0)) / np.std(data, axis=0)

# Load dataset index
train_df = pd.read_csv(f'{BASE_PATH}/train.csv')
val_df = pd.read_csv(f'{BASE_PATH}/validation.csv')

# Filter MI tasks only
train_mi = train_df[train_df['task'] == 'MI'].copy()
val_mi = val_df[val_df['task'] == 'MI'].copy()

# Encode labels
le = LabelEncoder()
train_mi['label_encoded'] = le.fit_transform(train_mi['label'])
val_mi['label_encoded'] = le.transform(val_mi['label'])

# Function to load and preprocess trial data
def load_and_preprocess_trial(row):
    # Determine dataset split
    id_num = row['id']
    split = 'train' if id_num <= 4800 else 'validation'
    
    # Build EEG file path
    eeg_path = f"{BASE_PATH}/{row['task']}/{split}/{row['subject_id']}/{row['trial_session']}/EEGdata.csv"
    eeg_data = pd.read_csv(eeg_path)
    
    # Extract trial (MI: 2250 samples)
    start_idx = (row['trial'] - 1) * 2250
    end_idx = start_idx + 2250
    trial_data = eeg_data.iloc[start_idx:end_idx]
    
    # Select relevant channels
    trial_eeg = trial_data[EEG_CHANNELS].values
    
    # Apply preprocessing
    trial_eeg = apply_filters(trial_eeg)
    trial_eeg = normalize(trial_eeg)
    
    # Use first 4 seconds (1000 samples)
    return trial_eeg[:TRIAL_LENGTH]

# Preprocess all trials
def preprocess_dataset(df):
    X = []
    y = []
    for _, row in df.iterrows():
        try:
            trial = load_and_preprocess_trial(row)
            X.append(trial)
            y.append(row['label_encoded'])
        except Exception as e:
            print(f"Error processing {row}: {str(e)}")
    return np.array(X), np.array(y)

# Preprocess training and validation data
print("Preprocessing training data...")
X_train, y_train = preprocess_dataset(train_mi)
print("Preprocessing validation data...")
X_val, y_val = preprocess_dataset(val_mi)

# Convert to categorical for sigmoid output
y_train_cat = tf.keras.utils.to_categorical(y_train)
y_val_cat = tf.keras.utils.to_categorical(y_val)

# Define model components (exact match to original)
def inception_block(x, ince_filter, ince_length, stride, activation):
    k1, k2, k3, k4 = ince_filter
    l1, l2, l3, l4 = ince_length
    inception = []

    # Branch 1
    x1 = Conv1D(k1, l1, strides=stride, padding='same', 
               kernel_regularizer=regularizers.l2(0.01))(x)
    x1 = BatchNormalization()(x1)
    x1 = Activation(activation)(x1)
    inception.append(x1)

    # Branch 2
    x2 = Conv1D(k2, l2, strides=stride, padding='same', 
               kernel_regularizer=regularizers.l2(0.01))(x)
    x2 = BatchNormalization()(x2)
    x2 = Activation(activation)(x2)
    inception.append(x2)

    # Branch 3
    x3 = Conv1D(k3, l3, strides=stride, padding='same', 
               kernel_regularizer=regularizers.l2(0.01))(x)
    x3 = BatchNormalization()(x3)
    x3 = Activation(activation)(x3)
    inception.append(x3)

    # Branch 4
    x4 = MaxPooling1D(pool_size=l4, strides=stride, padding='same')(x)
    x4 = Conv1D(k4, 1, strides=1, padding='same')(x4)
    x4 = BatchNormalization()(x4)
    x4 = Activation(activation)(x4)
    inception.append(x4)
    
    return Concatenate(axis=-1)(inception)

def conv_block(x, nb_filter, length, activation):
    k1, k2, k3 = nb_filter

    # Main path
    out = Conv1D(k1, length, strides=1, padding='same', 
                kernel_regularizer=regularizers.l2(0.002))(x)
    out = BatchNormalization()(out)
    out = Activation(activation)(out)

    out = Conv1D(k2, length, strides=1, padding='same', 
                kernel_regularizer=regularizers.l2(0.002))(out)
    out = BatchNormalization()(out)
    out = Activation(activation)(out)

    out = Conv1D(k3, length, strides=1, padding='same', 
                kernel_regularizer=regularizers.l2(0.002))(out)
    out = BatchNormalization()(out)

    # Shortcut path
    x = Conv1D(k3, 1, strides=1, padding='same')(x)
    x = BatchNormalization()(x)

    # Combine paths
    out = Add()([out, x])
    out = Activation(activation)(out)
    out = Dropout(0.8)(out)  # Fixed dropout from original
    return out

def squeeze_excitation_layer(x, out_dim, activation, ratio=8):
    squeeze = GlobalAveragePooling1D()(x)
    excitation = Dense(units=out_dim//ratio)(squeeze)
    excitation = Activation(activation)(excitation)
    excitation = Dense(units=out_dim, activation='sigmoid')(excitation)
    excitation = Reshape((1, out_dim))(excitation)
    return Multiply()([x, excitation])

# Rebuild model architecture with original hyperparameters
def build_adapted_model():
    input_tensor = Input(shape=(TRIAL_LENGTH, len(EEG_CHANNELS)))
    output_conns = []
    
    # Hyperparameters from original model
    inception_filters = [16, 16, 16, 16]
    inception_kernel_length = [
        [5, 10, 15, 10],
        [40, 45, 50, 100],
        [60, 65, 70, 100],
        [80, 85, 90, 100],
        [160, 180, 200, 180]
    ]
    inception_stride = [2, 4, 4, 4, 16]
    res_block_filters = [16, 16, 16]
    res_block_kernel_stride = [8, 7, 7, 7, 6]
    second_maxpooling_size = [4, 3, 3, 3, 2]
    second_maxpooling_stride = [4, 3, 3, 3, 2]
    activation = 'elu'
    dropout = 0.8

    # Branch 1 (EIN-a)
    x = inception_block(input_tensor, inception_filters, inception_kernel_length[0], inception_stride[0], activation)
    x = MaxPooling1D(pool_size=4, strides=4, padding='same')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = conv_block(x, res_block_filters, res_block_kernel_stride[0], activation)
    x = squeeze_excitation_layer(x, 16, activation, ratio=8)  # out_dim = 16 (k3 from conv_block)
    x = MaxPooling1D(pool_size=second_maxpooling_size[0], strides=second_maxpooling_stride[0], padding='same')(x)
    x = Flatten()(x)
    output_conns.append(x)

    # Branch 2 (EIN-b)
    y1 = inception_block(input_tensor, inception_filters, inception_kernel_length[1], inception_stride[1], activation)
    y1 = MaxPooling1D(pool_size=4, strides=4, padding='same')(y1)
    y1 = BatchNormalization()(y1)
    y1 = Dropout(dropout)(y1)
    y1 = conv_block(y1, res_block_filters, res_block_kernel_stride[1], activation)
    y1 = squeeze_excitation_layer(y1, 16, activation, ratio=8)
    y1 = MaxPooling1D(pool_size=second_maxpooling_size[1], strides=second_maxpooling_stride[1], padding='same')(y1)
    y1 = Flatten()(y1)
    output_conns.append(y1)

    # Branch 3 (EIN-c)
    y2 = inception_block(input_tensor, inception_filters, inception_kernel_length[2], inception_stride[2], activation)
    y2 = MaxPooling1D(pool_size=4, strides=4, padding='same')(y2)
    y2 = BatchNormalization()(y2)
    y2 = Dropout(dropout)(y2)
    y2 = conv_block(y2, res_block_filters, res_block_kernel_stride[2], activation)
    y2 = squeeze_excitation_layer(y2, 16, activation, ratio=8)
    y2 = MaxPooling1D(pool_size=second_maxpooling_size[2], strides=second_maxpooling_stride[2], padding='same')(y2)
    y2 = Flatten()(y2)
    output_conns.append(y2)

    # Branch 4 (EIN-d)
    y3 = inception_block(input_tensor, inception_filters, inception_kernel_length[3], inception_stride[3], activation)
    y3 = MaxPooling1D(pool_size=4, strides=4, padding='same')(y3)
    y3 = BatchNormalization()(y3)
    y3 = Dropout(dropout)(y3)
    y3 = conv_block(y3, res_block_filters, res_block_kernel_stride[3], activation)
    y3 = squeeze_excitation_layer(y3, 16, activation, ratio=8)
    y3 = MaxPooling1D(pool_size=second_maxpooling_size[3], strides=second_maxpooling_stride[3], padding='same')(y3)
    y3 = Flatten()(y3)
    output_conns.append(y3)

    # Branch 5 (EIN-e)
    z = inception_block(input_tensor, inception_filters, inception_kernel_length[4], inception_stride[4], activation)
    z = MaxPooling1D(pool_size=4, strides=4, padding='same')(z)
    z = BatchNormalization()(z)
    z = Dropout(dropout)(z)
    z = conv_block(z, res_block_filters, res_block_kernel_stride[4], activation)
    z = squeeze_excitation_layer(z, 16, activation, ratio=8)
    z = MaxPooling1D(pool_size=second_maxpooling_size[4], strides=second_maxpooling_stride[4], padding='same')(z)
    z = Flatten()(z)
    output_conns.append(z)

    # Concatenate all branches
    concat = Concatenate(axis=-1)(output_conns)
    concat = Dropout(dropout)(concat)
    output_tensor = Dense(2, activation='sigmoid')(concat)
    
    return Model(input_tensor, output_tensor)

# Build and load pre-trained model
print("Building model...")
model = build_adapted_model()
model.load_weights('/kaggle/input/trained-bcic-iv-bs2b/final_model_fold_1_2a.h5')

# Compile with original settings
model.compile(
    optimizer=optimizers.Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['mae', 'binary_accuracy']
)

# Train with early stopping
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

print("Starting fine-tuning...")
history = model.fit(
    X_train, y_train_cat,
    validation_data=(X_val, y_val_cat),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping]
)

# Evaluate on validation set
val_loss, val_mae, val_acc = model.evaluate(X_val, y_val_cat)
print(f'\nValidation Accuracy: {val_acc:.4f}')

Preprocessing training data...
Preprocessing validation data...
Building model...


2025-06-23 11:06:18.000300: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Starting fine-tuning...
Epoch 1/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 171ms/step - binary_accuracy: 0.5110 - loss: 1.5482 - mae: 0.4923 - val_binary_accuracy: 0.5200 - val_loss: 0.8799 - val_mae: 0.5086
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 112ms/step - binary_accuracy: 0.5339 - loss: 1.0350 - mae: 0.4845 - val_binary_accuracy: 0.5000 - val_loss: 0.9155 - val_mae: 0.5072
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 113ms/step - binary_accuracy: 0.5295 - loss: 0.9273 - mae: 0.4758 - val_binary_accuracy: 0.5200 - val_loss: 0.8003 - val_mae: 0.4992
Epoch 4/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 114ms/step - binary_accuracy: 0.5261 - loss: 0.9105 - mae: 0.4849 - val_binary_accuracy: 0.5000 - val_loss: 0.7537 - val_mae: 0.4904
Epoch 5/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 119ms/step - binary_accuracy: 0.5316 - loss: 0.8669 - mae: 0.48