In [5]:
import os, json, sys
import numpy as np
from sklearn.metrics import roc_auc_score, precision_score, recall_score, accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim

# Data Loading

In [6]:
eye_class = {
    "class_name": "EyeNet",
    "time_col": "time",
    "prefix_filepath": "/itet-stor/maxihuber/deepeye_storage/foundation_prepared/",
    "load_mode": 1
}

eye_dir_amp = {
    "task_name": "EyeNetDirectionAmp",
    "task_type": "Regression",
    "json_path": ['/itet-stor/maxihuber/deepeye_storage/eegeyenet_tasks/EEGEyeNet_Direction_Amp_train.json',
                 '/itet-stor/maxihuber/deepeye_storage/eegeyenet_tasks/EEGEyeNet_Direction_Amp_val.json',
                 '/itet-stor/maxihuber/deepeye_storage/eegeyenet_tasks/EEGEyeNet_Direction_Amp_test.json'],
    "out_dim": 1,
}

used_class = eye_class
class_name = used_class["class_name"]
time_col = used_class["time_col"]
prefix_filepath = used_class["prefix_filepath"]
load_mode = used_class["load_mode"]

used_task = eye_dir_amp 
task_name = used_task["task_name"]
task_type = used_task["task_type"]
json_path = used_task["json_path"]
out_dim = used_task["out_dim"]

def load_index0(data_index_path):
    with open(data_index_path, 'r') as f:
        train_test_dict = json.load(f)
    train_samples = train_test_dict['train']
    test_samples = train_test_dict['test']
    return train_samples, test_samples

def load_index1(data_index_paths):
    all_samples = []
    for data_index_path in data_index_paths:
        with open(data_index_path, 'r') as f:
            subset_dict = json.load(f)
        all_samples.append(list(subset_dict.values())[0])
    return all_samples[0], all_samples[1], all_samples[2]

dataset_dict = {
    "ERP_ERP_ANA": 0, "RS_RS_ALPHA": 1, "ERP_ERP_BISC": 2, "ERP_ERP_BBI": 3, 
    "ERP_ERP_BICF": 4, "ERP_ERP_BICD": 5, "RS_RS_SPIS": 6, "MI_MI_HGD": 7, 
    "MI_MI_SCP": 8, "ErrP_ErrP_MERP": 9, "MI_MI_ULM": 10, "MI_MI_VEP": 11, 
    "MI_MI_LR": 12, "MI_BBCI_IV_Graz_b": 13, "MI_MI_EB": 14, "MI_BBCI_IV_Graz_a": 15, 
    "MI_MI_GVH_V": 16, "MI_MI_GAL": 17, "MI_MI_Two": 18, "MI_MI_GVH_H": 19, 
    "MI_MI_II": 20, "ErrP_ErrP_BCI": 21, "MI_MI_GVH_G": 22, "MI_MI_Limb": 23, 
    "MI_MI_SCI": 24, "MI_BBCI_IV_Berlin": 25, "MI_eegmmidb": 26, "ERP_ERP_FHD": 27, 
    "RS_RS_EID": 28
}

def extract_dataset_name(file_path, dataset_dict):
    for name in dataset_dict.keys():
        if name in file_path:
            return name
    return "Unknown"


def load_file_data(data_index, task_channels):
    num_samples = 0
    data = {}
    outputs = {}
    srs = {}
    durs = {}
    channels = {}
    datasets = {}
    failed_samples = []

    for sample in tqdm(data_index, desc="Loading data", position=0, leave=True):
        try:
            # Load and concatenate dataframe
            input_files = sample["input"]
            df = pd.DataFrame()
            for file in input_files:
                if load_mode != 1:
                    file = prefix_filepath + file
                else:
                    file = file.replace("/itet-stor/kard", "/itet-stor/maxihuber")
                with open(file, 'rb') as f:
                    df_new = pd.read_pickle(f)
                    df = pd.concat([df, df_new], axis=0)
                dataset_name = extract_dataset_name(file, dataset_dict)
                datasets[num_samples] = dataset_name

            start = int(sample["start"])
            length = int(sample["length"]) if "length" in sample else int(sample["end"])
            if load_mode != 1:
                df = df.iloc[start:length, :]
            else:
                df = df.loc[start:start+length, :]
            
            # Add metadata
            if len(df) <= 1:
                continue
            sr = int(1 / float(float(df[time_col].iloc[1]) - float(df[time_col].iloc[0])))
            if load_mode != 1:
                outputs[num_samples] = sample["output"] if "output" in sample else sample["label"]
            else:
                if task_name == "EyeNetPosition":
                    outputs[num_samples] = list(sample["output"].values())
                else:
                    outputs[num_samples] = list(sample["output"].values())[0]
            srs[num_samples] = sr
            durs[num_samples] = len(df) / sr
            channels[num_samples] = list(set(df.columns) & task_channels)
            df = df[channels[num_samples]].astype(float)
            signals = torch.tensor(df.to_numpy(), dtype=torch.float32)
            data[num_samples] = signals
            num_samples += 1
        
        except Exception as e:
            print(f"Failed to process sample: {sample}. Error: {e}")
            failed_samples.append(sample)
    
    return data, outputs, srs, durs, channels, datasets

if load_mode == 0:
    train_index, test_index = load_index0(json_path)
elif load_mode == 1:
    train_index, val_index, test_index = load_index1(json_path)
else:
    pass

print(f"Full train size: {len(train_index)}")
print(f"Full test size: {len(test_index)}")

if load_mode == 0:
    train_index = train_index
    test_index = test_index
elif load_mode == 1:
    train_index = train_index
    val_index = val_index
    test_index = test_index
else:
    pass

if load_mode == 0:
    train_data, train_outputs, train_sr, train_dur, train_channels, train_datasets = load_file_data(train_index, task_channels)
    test_data, test_outputs, test_sr, test_dur, test_channels, test_datasets = load_file_data(test_index, task_channels)
elif load_mode == 1:
    train_data, train_outputs, train_sr, train_dur, train_channels, train_datasets = load_file_data(train_index, task_channels)
    val_data, val_outputs, val_sr, val_dur, val_channels, val_datasets = load_file_data(val_index, task_channels)
    test_data, test_outputs, test_sr, test_dur, test_channels, test_datasets = load_file_data(test_index, task_channels)
else:
    pass

Full train size: 12275
Full test size: 2719


NameError: name 'task_channels' is not defined

# Data Processing

In [3]:
# Label Encoder
from sklearn.preprocessing import LabelEncoder

if isinstance(list(train_outputs.values())[0], str):
    all_outputs = list(set(list(train_outputs.values()) + list(test_outputs.values())))
    label_encoder = LabelEncoder()
    label_encoder.fit(all_outputs)

    print(f"Train classes: {set(train_outputs.values())}")
    print(f"Test classes: {set(test_outputs.values())}")
else:
    label_encoder = None

# TODO (potentially): Class Weights

# Ensure fixed input size for train/test set
durs = [df.shape[1] for idx, df in train_data.items()] + [df.shape[1] for idx, df in test_data.items()]
n_chns = [df.shape[0] for idx, df in train_data.items()] + [df.shape[0] for idx, df in test_data.items()]
dur_90 = int(np.percentile(durs, 90))
chn_90 = int(np.percentile(n_chns, 90))

def pad_tensor(tensor, target_height, target_width):
    current_height, current_width = tensor.shape
    # Pad height if necessary
    if current_height < target_height:
        padding_height = target_height - current_height
        padding = torch.zeros((padding_height, current_width), dtype=tensor.dtype)
        tensor = torch.cat((tensor, padding), dim=0)
    else:
        tensor = tensor[:target_height, :]
    # Pad width if necessary
    if current_width < target_width:
        padding_width = target_width - current_width
        padding = torch.zeros((tensor.shape[0], padding_width), dtype=tensor.dtype)
        tensor = torch.cat((tensor, padding), dim=1)
    else:
        tensor = tensor[:, :target_width]
    # Return
    return tensor

train_data_pad = {k: pad_tensor(tensor=signals, target_width=chn_90, target_height=dur_90) for k, signals in train_data.items()}
val_data_pad = {k: pad_tensor(tensor=signals, target_width=chn_90, target_height=dur_90) for k, signals in val_data.items()}
test_data_pad = {k: pad_tensor(tensor=signals, target_width=chn_90, target_height=dur_90) for k, signals in test_data.items()}

X_train = torch.stack(list(train_data_pad.values()), dim=0)
y_train = torch.stack(list(train_outputs.values()), dim=0)

X_val = torch.stack(list(val_data_pad.values()), dim=0)
y_val = torch.stack(list(val_outpupts.values()), dim=0)

X_test = torch.stack(list(test_data_pad.values()), dim=0)
y_test = torch.stack(list(test_outputs.values()), dim=0)

NameError: name 'train_outputs' is not defined

# Network

In [None]:

class EEGNet(nn.Module):
    def __init__(self):
        super(EEGNet, self).__init__()
        self.T = 120
        
        # Layer 1
        self.conv1 = nn.Conv2d(1, 16, (1, 64), padding = 0)
        self.batchnorm1 = nn.BatchNorm2d(16, False)
        
        # Layer 2
        self.padding1 = nn.ZeroPad2d((16, 17, 0, 1))
        self.conv2 = nn.Conv2d(1, 4, (2, 32))
        self.batchnorm2 = nn.BatchNorm2d(4, False)
        self.pooling2 = nn.MaxPool2d(2, 4)
        
        # Layer 3
        self.padding2 = nn.ZeroPad2d((2, 1, 4, 3))
        self.conv3 = nn.Conv2d(4, 4, (8, 4))
        self.batchnorm3 = nn.BatchNorm2d(4, False)
        self.pooling3 = nn.MaxPool2d((2, 4))
        
        # FC Layer
        # NOTE: This dimension will depend on the number of timestamps per sample in your data.
        # I have 120 timepoints. 
        self.fc1 = nn.Linear(4*2*7, 1)
        

    def forward(self, x):
        # Layer 1
        x = F.elu(self.conv1(x))
        x = self.batchnorm1(x)
        x = F.dropout(x, 0.25)
        x = x.permute(0, 3, 1, 2)
        
        # Layer 2
        x = self.padding1(x)
        x = F.elu(self.conv2(x))
        x = self.batchnorm2(x)
        x = F.dropout(x, 0.25)
        x = self.pooling2(x)
        
        # Layer 3
        x = self.padding2(x)
        x = F.elu(self.conv3(x))
        x = self.batchnorm3(x)
        x = F.dropout(x, 0.25)
        x = self.pooling3(x)
        
        # FC Layer
        x = x.view(-1, 4*2*7)
        x = F.sigmoid(self.fc1(x))
        return x