<a   href="https://colab.research.google.com/github//N-Nieto/Inner_Speech_Dataset/blob/master/Database_load_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Set up - Download and import required libraries

In [None]:
!git clone https://github.com/N-Nieto/Inner_Speech_Dataset -q
!pip3 install mne -q

In [None]:
import os 
import sys

sys.path.append(os.getcwd())

In [None]:
#@title Imports 
import pickle
from tqdm import tqdm
import random
import warnings
import pandas as pd
import numpy as np
from joblib import Parallel, delayed
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import mne
from mne.decoding import CSP

from Python_Processing.Data_extractions import  Extract_data_from_subject
from Python_Processing.Data_processing import  Select_time_window, Transform_for_classificator, Split_trial_in_time

np.random.seed(23)

mne.set_log_level(verbose='warning') #to avoid info at terminal
warnings.filterwarnings(action = "ignore", category = DeprecationWarning ) 
warnings.filterwarnings(action = "ignore", category = FutureWarning ) 

## Data Loading.

In [None]:
### Hyperparameters

# The root dir have to point to the folder that cointains the database
root_dir = "./"

# Data Type
datatype = "EEG"

# Sampling rate
fs = 256

# Select the useful par of each trial. Time in seconds
t_start = 1.5
t_end = 3.5

# Subjects
# SUBJECTS = [i for i in range(1,10)] #all subjects, max=10
SUBJECTS = [1] #to add any number of subjects in any order

#Classes
classes = ["Up", "Down", "Right", "Left"]

In [None]:
#Fetches data pertaining to a subject to any one condition, multiple classes
def extract_of_subject(N_S, condition):
    Conditions = [[condition],[condition],[condition],[condition]]
    Classes = [[class_] for class_ in classes]
    X, Y = Extract_data_from_subject(root_dir, N_S, datatype)
    X = Select_time_window(X = X, t_start = t_start, t_end = t_end, fs = fs)
    X , Y =  Transform_for_classificator(X, Y, Classes, Conditions)
    print(f"Data of subject-{N_S} for condition-{condition} extracted")
    return X, Y

In [None]:
#Fetches data pertaining to multiple conditions, multiple classes of multiple subjects
#Y corresponds to conditions
def fetch_cross_cond_data(conditions):
    X = []
    Y = []

    for i, condition in enumerate(conditions):
        for N_S in SUBJECTS:
            X_S, _ = extract_of_subject(N_S, condition)
            X.append(X_S)
            Y.extend([i for _ in range(len(X_S))])

    X = np.vstack(X)
    Y = np.array(Y)
    
    return X, Y

In [None]:
conditions = ["Inner", "Pronounced", "Visualized"]

X_list = []
Y_list = []
SUBJECTS = [0]

for i in range(1, 11):
    SUBJECTS[0] = i
    X, Y = fetch_cross_cond_data(conditions)
    print(X.shape)
    print(Y.shape)
    X_list.append(X)
    Y_list.append(Y)

In [None]:
#CSP saving for csp-lda script

def extract_data(i):
    
    print(f"Subject - {i}")
    
    X = X_list[i]
    Y = Y_list[i].flatten()
    
    print("X shape: ", X.shape)
    print("Y shape: ", Y.shape)
    
    with open(f"X_{i}.npy", "wb") as f:
        np.save(f, X)
    
    with open(f"Y_{i}.npy", "wb") as f:
        np.save(f, Y)
    
Parallel(n_jobs=-1, verbose=1)(delayed(extract_data)(i) for i in tqdm(range(10)))

In [None]:
#Saving cross subject data

X = np.vstack(X_list)
Y = np.vstack(Y_list).flatten()

print("X shape: ", X.shape)
print("Y shape: ", Y.shape)

with open("X_cross_subject.npy", "wb") as f:
    np.save(f, X)

with open(f"Y_cross_subject.npy", "wb") as f:
    np.save(f, Y)