In [1]:
all_brain_seen, all_image_seen, all_text_seen, all_label_seen = [], [], [], []
all_brain_unseen, all_image_unseen, all_text_unseen, all_label_unseen = [], [], [], []

In [2]:

# load data
import mmbra
import mmbracategories
import torch
import os
import scipy.io as sio
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler

import mmbra
import mmbracategories
import torch
import os
import scipy.io as sio
import numpy as np

data_dir_root = os.path.join('./data', 'ThingsEEG-Text')

sbj_list = ['sub-01', 'sub-02', 'sub-03']

image_model = 'pytorch/cornet_s'
text_model = 'CLIPText'
roi = '17channels'

for sbj in sbj_list:
    brain_dir = os.path.join(data_dir_root, 'brain_feature', roi, sbj)
    image_dir_seen = os.path.join(data_dir_root, 'visual_feature/ThingsTrain', image_model, sbj)
    image_dir_unseen = os.path.join(data_dir_root, 'visual_feature/ThingsTest', image_model, sbj)
    text_dir_seen = os.path.join(data_dir_root, 'textual_feature/ThingsTrain/text', text_model, sbj)
    text_dir_unseen = os.path.join(data_dir_root, 'textual_feature/ThingsTest/text', text_model, sbj)

    # ---- seen ----
    brain_seen = sio.loadmat(os.path.join(brain_dir, 'eeg_train_data_within.mat'))['data'].astype('double') * 2.0
    brain_seen = brain_seen[:, :, 27:60]
    brain_seen = np.reshape(brain_seen, (brain_seen.shape[0], -1))

    image_seen = sio.loadmat(os.path.join(image_dir_seen, 'feat_pca_train.mat'))['data'].astype('double') * 50.0
    image_seen = image_seen[:, 0:100]

    text_seen = sio.loadmat(os.path.join(text_dir_seen, 'text_feat_train.mat'))['data'].astype('double') * 2.0

    label_seen = sio.loadmat(os.path.join(brain_dir, 'eeg_train_data_within.mat'))['class_idx'].T.astype('int')

    # ---- unseen ----
    brain_unseen = sio.loadmat(os.path.join(brain_dir, 'eeg_test_data.mat'))['data'].astype('double') * 2.0
    brain_unseen = brain_unseen[:, :, 27:60]
    brain_unseen = np.reshape(brain_unseen, (brain_unseen.shape[0], -1))

    image_unseen = sio.loadmat(os.path.join(image_dir_unseen, 'feat_pca_test.mat'))['data'].astype('double') * 50.0
    image_unseen = image_unseen[:, 0:100]

    text_unseen = sio.loadmat(os.path.join(text_dir_unseen, 'text_feat_test.mat'))['data'].astype('double') * 2.0

    label_unseen = sio.loadmat(os.path.join(brain_dir, 'eeg_test_data.mat'))['class_idx'].T.astype('int')

    # collect
    all_brain_seen.append(brain_seen)
    all_image_seen.append(image_seen)
    all_text_seen.append(text_seen)
    all_label_seen.append(label_seen)

    all_brain_unseen.append(brain_unseen)
    all_image_unseen.append(image_unseen)
    all_text_unseen.append(text_unseen)
    all_label_unseen.append(label_unseen)

# stack across subjects
brain_seen  = torch.from_numpy(np.vstack(all_brain_seen))
image_seen  = torch.from_numpy(np.vstack(all_image_seen))
text_seen   = torch.from_numpy(np.vstack(all_text_seen))
label_seen  = torch.from_numpy(np.vstack(all_label_seen))

brain_unseen = torch.from_numpy(np.vstack(all_brain_unseen))
image_unseen = torch.from_numpy(np.vstack(all_image_unseen))
text_unseen  = torch.from_numpy(np.vstack(all_text_unseen))
label_unseen = torch.from_numpy(np.vstack(all_label_unseen))

print('seen_brain_samples=', brain_seen.shape[0], ', seen_brain_features=', brain_seen.shape[1])
print('seen_image_samples=', image_seen.shape[0], ', seen_image_features=', image_seen.shape[1])
print('seen_text_samples=', text_seen.shape[0], ', seen_text_features=', text_seen.shape[1])
print('seen_label=', label_seen.shape)

print('unseen_brain_samples=', brain_unseen.shape[0], ', unseen_brain_features=', brain_unseen.shape[1])
print('unseen_image_samples=', image_unseen.shape[0], ', unseen_image_features=', image_unseen.shape[1])
print('unseen_text_samples=', text_unseen.shape[0], ', unseen_text_features=', text_unseen.shape[1])
print('unseen_label=', label_unseen.shape)


seen_brain_samples= 49620 , seen_brain_features= 561
seen_image_samples= 49620 , seen_image_features= 100
seen_text_samples= 49620 , seen_text_features= 512
seen_label= torch.Size([49620, 1])
unseen_brain_samples= 48000 , unseen_brain_features= 561
unseen_image_samples= 48000 , unseen_image_features= 100
unseen_text_samples= 48000 , unseen_text_features= 512
unseen_label= torch.Size([48000, 1])


In [3]:
'''   
To ensure a strict zero-shot learning setup, we perform a 
class-level split of the label space into disjoint seen and unseen sets. 
All model development, including embedding refinement, 
uses only samples from seen classes, while evaluation is performed exclusively 
on unseen classes. This prevents information leakage and aligns with the formal 
definition of zero-shot learning.
'''

'   \nTo ensure a strict zero-shot learning setup, we perform a \nclass-level split of the label space into disjoint seen and unseen sets. \nAll model development, including embedding refinement, \nuses only samples from seen classes, while evaluation is performed exclusively \non unseen classes. This prevents information leakage and aligns with the formal \ndefinition of zero-shot learning.\n'

In [4]:
#to avoid data leakage, we must split by classes (80% seen, 20% unseen)
# ============================
# ZERO-SHOT CLASS-LEVEL SPLIT
# ============================

# flatten labels to 1D numpy arrays
y_seen_all = label_seen.numpy().reshape(-1)
y_unseen_all = label_unseen.numpy().reshape(-1)

all_classes = np.unique(np.concatenate([y_seen_all, y_unseen_all]))

print(f"Total number of classes: {len(all_classes)}")

#reproducible split
np.random.seed(0)
np.random.shuffle(all_classes)

n_seen_classes = int(0.8 * len(all_classes))
seen_classes = all_classes[:n_seen_classes]
unseen_classes = all_classes[n_seen_classes:]

print(f"Seen classes: {len(seen_classes)}")
print(f"Unseen classes: {len(unseen_classes)}")

#create masks, ensure no leakage

seen_mask = np.isin(y_seen_all, seen_classes)
unseen_mask = np.isin(y_unseen_all, unseen_classes)

#final zero-shot datasets

X_seen = brain_seen[seen_mask]
y_seen = label_seen[seen_mask]

X_unseen = brain_unseen[unseen_mask]
y_unseen = label_unseen[unseen_mask]

print("Final zero-shot split:")
print("Seen samples:", X_seen.shape[0])
print("Unseen samples:", X_unseen.shape[0])

Total number of classes: 1654
Seen classes: 1323
Unseen classes: 331
Final zero-shot split:
Seen samples: 39690
Unseen samples: 11520


## Creating baseline zero-shot model

In [5]:
#EEG features (unseen only, as baseline does not train)
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import numpy as np

X_seenNP = X_seen.numpy()
X_unseenNP = X_unseen.numpy()

scaler = StandardScaler()
X_seen_scaled = scaler.fit_transform(X_seenNP)
X_unseen_scaled = scaler.transform(X_unseenNP)

#project EEG to 512 dims to match text (fit on seen only)
pca = PCA(n_components=512, random_state=0)
X_seen_512 = pca.fit_transform(X_seen_scaled)
X_test = pca.transform(X_unseen_scaled)

print("EEG test shape:", X_test.shape)


EEG test shape: (11520, 512)


In [None]:
#flatten labels
y_unseenNP = y_unseen.numpy().reshape(-1).astype(int)

unseen_classes = np.unique(y_unseenNP)
print('Unseen classes (in eval set):', len(unseen_classes))

#semantic prototypes is a dict {class_id : vector}
#mean text embedding per unseen class for the prototype vector for each class
text_unseen_filtered = text_unseen[unseen_mask]
text_unseen_np = text_unseen_filtered.numpy()


semantic_proto = {}
for i in unseen_classes:
    idx = np.where(y_unseenNP == i)[0]
    semantic_proto[i] = text_unseen_np[idx[0]] #first instance for no peaking

Unseen classes (in eval set): 48


In [7]:
#cosine similarity inference
from numpy.linalg import norm

#stack prototypes into matrix
proto_label = list(semantic_proto.keys())
proto_matrix = np.stack([semantic_proto[i] for i in proto_label])

#normalise for cosine
Xn = X_test / (np.linalg.norm(X_test, axis=1, keepdims=True)+1e-8)
Pn = proto_matrix / (np.linalg.norm(proto_matrix, axis=1, keepdims=True)+1e-8)

#cosine similarities: (N_unseen, C_unseen)

S = Xn @ Pn.T

y_pred = np.array([proto_label[j] for j in np.argmax(S, axis=1)])

In [8]:
from sklearn.metrics import accuracy_score

acc = accuracy_score(y_unseenNP, y_pred)
print(f"Zero-shot baseline accuracy: {acc:.4f}")

Zero-shot baseline accuracy: 0.0191
