In [None]:
import scipy.io as sio
import numpy as np
from google.colab import drive
drive.mount('/content/drive')
data = sio.loadmat('/content/drive/My Drive/BCI_Comp_III_Wads_2004/Subject_A_Train.mat')



Mounted at /content/drive


In [None]:
# TODO: Identify which variables correspond to
# 1. Continuous EEG signal
# 2. Stimulus onset information
# 3. Target vs non-target labels
# 1. Continuous EEG signal: Reshape from (85, 7794, 64) to (662490, 64)
# The dataset stores Signal as (85, 7794, 64)
# 85 characters, 7794 time samples per character, 64 channels.
raw_signal = data['Signal']

# RESHAPE IS MANDATORY: Flatten characters and time into one long timeline
# New shape: (662490, 64)
eeg_signal = raw_signal.reshape(-1, raw_signal.shape[-1])

# Flatten these so their indices (0 to 662489) match the reshaped signal
flashing = data['Flashing'].flatten()
stimulus_type = data['StimulusType'].flatten()

fs = 240


In [None]:
def extract_epochs(signal, stimulus_onsets, labels, fs, t_start=0.0, t_end=0.8):
    """
    Extract EEG epochs around each stimulus onset.

    Parameters:
    - signal: continuous EEG array of shape (time, channels)
    - stimulus_onsets: indices where stimuli occur
    - labels: target/non-target labels per stimulus
    - fs: sampling frequency in Hz
    - t_start: start time (seconds) relative to stimulus
    - t_end: end time (seconds) relative to stimulus

    Returns:
    - epochs: array of shape (num_trials, channels, time)
    - y: corresponding labels
    """
    # Ensure 1D arrays
    stimulus_onsets = stimulus_onsets.flatten()
    labels = labels.flatten()

    start_samp = int(t_start * fs)
    end_samp = int(t_end * fs)
    expected_len = end_samp - start_samp

    # Find where the flash begins
    onsets = np.where(np.diff(stimulus_onsets.astype(int)) == 1)[0] + 1

    epochs = []
    y = []

    for idx in onsets:
        start = idx + start_samp
        end = idx + end_samp

        # This check failed before because signal.shape[0] was 85.
        # Now signal.shape[0] is 662,490, so it will PASS.
        if start >= 0 and end <= signal.shape[0]:
            epoch = signal[start:end, :].T # Result: (Channels, Time)

            if epoch.shape[1] == expected_len:
                epochs.append(epoch)
                y.append(labels[idx])

    return np.array(epochs), np.array(y)

# Execute extraction
X, y = extract_epochs(eeg_signal, flashing, stimulus_type, fs)

# Verify counts before moving to Part 4
print(f"X samples: {len(X)}, y samples: {len(y)}")


X samples: 15299, y samples: 15299


In [None]:
def prepare_for_eegnet(epochs):
    """
    Prepare EEG epochs for input into EEGNet.

    Expected input shape: (trials, channels, time)
    Expected output shape: (trials, 1, channels, time)
    """
    # TODO: Add singleton dimension required by Conv2D
    # Add the singleton dimension (kernels/channels_first)
    # Resulting shape: (trials, 1, channels, time)
    return np.expand_dims(epochs, axis=-1)

X_prepared = prepare_for_eegnet(X)
print(f"New X_prepared shape: {X_prepared.shape}")
# Hint: Use numpy.expand_dims

New X_prepared shape: (15299, 64, 192, 1)


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv2D, DepthwiseConv2D, SeparableConv2D,
    BatchNormalization, AveragePooling2D, Dropout, Flatten, Dense, Activation
)
from tensorflow.keras.constraints import max_norm

# Set the image data format globally for Keras backend
tf.keras.backend.set_image_data_format('channels_first')

def EEGNet(nb_classes, Chans, Samples, F1=8, D=2, F2=16, dropoutRate=0.5):
    """
    EEGNet architecture.

    Parameters:
    - nb_classes: number of output classes
    - Chans: number of EEG channels
    - Samples: number of time samples per epoch
    - F1: number of temporal filters
    - D: depth multiplier for spatial filters
    - F2: number of pointwise filters
    """
    # Switching to channels_last to solve the NHWC error
    input_format = 'channels_last'
    bn_axis = -1 # Matches the last dimension

    # New input shape: (64, 192, 1)
    inputs = Input(shape=(Chans, Samples, 1))

    # Block 1: Temporal Convolution
    # We convolve across the 'Samples' (192) dimension
    block1 = Conv2D(F1, (1, 120), padding='same', use_bias=False, data_format=input_format)(inputs)
    block1 = BatchNormalization(axis=bn_axis)(block1)

    # Block 1: Spatial Convolution (Depthwise)
    # We convolve across the 'Chans' (64) dimension
    block1 = DepthwiseConv2D((Chans, 1), use_bias=False,
                               depth_multiplier=D,
                               depthwise_constraint=max_norm(1.),
                               data_format=input_format)(block1)
    block1 = BatchNormalization(axis=bn_axis)(block1)
    block1 = Activation('elu')(block1)
    block1 = AveragePooling2D((1, 4), data_format=input_format)(block1)
    block1 = Dropout(dropoutRate)(block1)

    # Block 2: Separable Convolution
    block2 = SeparableConv2D(F2, (1, 16), use_bias=False, padding='same', data_format=input_format)(block1)
    block2 = BatchNormalization(axis=bn_axis)(block2)
    block2 = Activation('elu')(block2)
    block2 = AveragePooling2D((1, 8), data_format=input_format)(block2)
    block2 = Dropout(dropoutRate)(block2)

    # Classification
    flatten = Flatten()(block2)
    dense = Dense(nb_classes, name='dense', kernel_constraint=max_norm(0.25))(flatten)
    softmax = Activation('softmax', name='softmax')(dense)

    return Model(inputs=inputs, outputs=softmax)

# Instantiate with the same parameters
model = EEGNet(nb_classes=2, Chans=64, Samples=192)

In [6]:
# TODO: Split the dataset into training and validation sets
# TODO: Compile the model with an appropriate loss and optimizer
# Hint: Use categorical cross-entropy and Adam optimizer
# TODO: Train the model and store the training history
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Split data
X_train, X_val, y_train, y_val = train_test_split(X_prepared, y, test_size=0.2, random_state=42, stratify=y)

# Convert labels to one-hot encoding
y_train_cat = to_categorical(y_train, num_classes=2)
y_val_cat = to_categorical(y_val, num_classes=2)
# Double check shapes
print(f"X_train shape: {X_train.shape}")
print(f"y_train_cat shape: {y_train_cat.shape}")
print(f"X_train shape: {X_train.shape}")
print(f"Model expected input: {model.input_shape}")

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Use a smaller batch size initially to isolate memory errors
history = model.fit(X_train, y_train_cat,
                    batch_size=16,
                    epochs=50,
                    validation_data=(X_val, y_val_cat),
                    verbose=1)

X_train shape: (12239, 64, 192, 1)
y_train_cat shape: (12239, 2)
X_train shape: (12239, 64, 192, 1)
Model expected input: (None, 64, 192, 1)
Epoch 1/50
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m365s[0m 473ms/step - accuracy: 0.8046 - loss: 0.5045 - val_accuracy: 0.8363 - val_loss: 0.4251
Epoch 2/50
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m377s[0m 467ms/step - accuracy: 0.8339 - loss: 0.4207 - val_accuracy: 0.8350 - val_loss: 0.4093
Epoch 3/50
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m360s[0m 471ms/step - accuracy: 0.8385 - loss: 0.3999 - val_accuracy: 0.8389 - val_loss: 0.4048
Epoch 4/50
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m381s[0m 470ms/step - accuracy: 0.8429 - loss: 0.3935 - val_accuracy: 0.8382 - val_loss: 0.4049
Epoch 5/50
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m349s[0m 456ms/step - accuracy: 0.8386 - loss: 0.3979 - val_accuracy: 0.8444 - val_loss: 0.3961
Epoch 6/50
[1m765/765[0m [