In [55]:
import torch
import os
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision.models.feature_extraction import create_feature_extractor, get_graph_node_names
from sklearn.decomposition import PCA

In [56]:
path_mod = '../trained_models/Classification_EEG_Concepts/'

In [57]:
n_classes = 100
names = [i for i in os.listdir(path_mod) if i[6:9] == str(n_classes)]
best_name = sorted(names, reverse=True)[0]
print(best_name)

model_100_acc9.38_LSTM.pt


In [58]:
model = torch.load(os.path.join(path_mod, best_name), map_location='cuda')
model

easy(
  (lstm_layer): LSTM(17, 32, bias=False, batch_first=True)
  (block1): Sequential(
    (0): ReLU()
    (1): AvgPool1d(kernel_size=(2,), stride=(2,), padding=(0,))
    (2): Dropout(p=0.5, inplace=False)
  )
  (block2): Sequential(
    (0): Conv1d(32, 64, kernel_size=(10,), stride=(1,), padding=(5,), bias=False)
    (1): ReLU()
    (2): AvgPool1d(kernel_size=(5,), stride=(5,), padding=(0,))
  )
  (lin): Linear(in_features=640, out_features=100, bias=False)
)

In [59]:
# print(get_graph_node_names(model), '\n')
model = create_feature_extractor(model, return_nodes={'flatten': 'last_layer'}).to('cuda')

In [60]:
path_THINGSEEG2 = '../data/THINGS-EEG2/'

In [61]:
class Dataset(Dataset):
  # Class to create the dataset
  def __init__(self, partition):
    path = os.path.join(path_THINGSEEG2, f'preprocessed_data/sub-01/preprocessed_eeg_{partition}.npy')
    test_EEG = np.load(path, allow_pickle=True).item()['preprocessed_eeg_data']
    data = test_EEG.reshape(-1, test_EEG.shape[2], test_EEG.shape[3])
    # data = data.mean(axis=1) # media o mediana

    self.X = torch.from_numpy(data).type(torch.FloatTensor).to('cuda')
    self.len = self.X.shape[0]

  def __len__(self):
    return self.len
  
  def __getitem__(self, index):
    return self.X[index]

In [62]:
train = DataLoader(Dataset('training'), batch_size=40, shuffle=False, drop_last=False)
test = DataLoader(Dataset('test'), batch_size=80, shuffle=False, drop_last=False)

In [63]:
train.__len__(), test.__len__()

(1654, 200)

In [64]:
model.eval()

train_feat = []
test_feat = []

for batch in train:
    train_feat.append(model(batch)['last_layer'].detach().cpu().numpy())

for batch in test:
    test_feat.append(model(batch)['last_layer'].detach().cpu().numpy())

train_feat = np.concatenate(train_feat) # sarebbe da cambiare e far usicre in output shape simile a eeg
test_feat = np.concatenate(test_feat)

train_feat.shape, test_feat.shape

((66160, 640), (16000, 640))

In [65]:
print(np.mean(train_feat), np.min(train_feat), np.max(train_feat))
print(np.mean(test_feat), np.min(test_feat), np.max(test_feat))

1.1973361 0.0 7.2006836
1.1641643 0.0 7.0202494


### PCA

In [66]:
n_components = 100
pca = PCA(n_components=n_components)

In [67]:
train_feat_tr = pca.fit_transform(train_feat)
test_feat_tr = pca.transform(test_feat)
print(pca.explained_variance_ratio_)
print(pca.singular_values_)

[1.91264451e-01 1.41673857e-01 8.38659008e-02 5.74373345e-02
 5.59337140e-02 4.48563686e-02 3.50602578e-02 3.19379469e-02
 2.78493296e-02 2.09154884e-02 1.98773949e-02 1.86738221e-02
 1.63519714e-02 1.57672485e-02 1.46367179e-02 1.38969822e-02
 1.24862437e-02 1.19519404e-02 1.13603638e-02 1.04933630e-02
 1.01174563e-02 9.42645416e-03 9.06485380e-03 8.70005990e-03
 8.41165806e-03 7.80425530e-03 6.99391963e-03 6.60153835e-03
 6.42254775e-03 5.52706466e-03 5.21015537e-03 4.87808236e-03
 4.82618202e-03 4.68578364e-03 4.33198143e-03 3.72794361e-03
 3.22517628e-03 3.16299984e-03 3.03335462e-03 2.70434340e-03
 2.60463197e-03 2.41169698e-03 2.39014596e-03 2.31423948e-03
 2.12570964e-03 1.85656889e-03 1.75180016e-03 1.64759575e-03
 1.37149790e-03 1.32966483e-03 1.16607010e-03 1.09162043e-03
 9.75366122e-04 9.29353859e-04 8.80224562e-04 8.40581303e-04
 7.87542776e-04 7.37045756e-04 7.13362099e-04 6.41519368e-04
 6.15154771e-04 5.76827533e-04 5.52506783e-04 4.91083216e-04
 4.85723174e-04 4.416091

In [68]:
train_feat_tr.shape, test_feat_tr.shape

((66160, 100), (16000, 100))

In [69]:
print(np.mean(train_feat_tr), np.min(train_feat_tr), np.max(train_feat_tr))
print(np.mean(test_feat_tr), np.min(test_feat_tr), np.max(test_feat_tr))

1.3698315e-07 -17.328005 27.69034
-0.01766345 -16.795797 26.587872


In [70]:
# max = np.max(train_feat_tr)
# train_feat_tr = train_feat_tr / max
# test_feat_tr = test_feat_tr / max

In [71]:
print(np.mean(train_feat_tr), np.min(train_feat_tr), np.max(train_feat_tr))
print(np.mean(test_feat_tr), np.min(test_feat_tr), np.max(test_feat_tr))

1.3698315e-07 -17.328005 27.69034
-0.01766345 -16.795797 26.587872


### Save

In [72]:
save_dir = os.path.join(path_THINGSEEG2, 'preprocessed_data_features/sub-01/')
os.makedirs(save_dir, exist_ok=True)

np.save(os.path.join(save_dir, 'preprocessed_eeg_training.npy'), train_feat_tr)
np.save(os.path.join(save_dir, 'preprocessed_eeg_test.npy'), test_feat_tr)