In [1]:
import pandas as pd
import numpy as np
import glob
import os
import mne

# Load data

In [2]:
# example 1_al_ciplv_theta.npy
def parse_filename(filename):
    s = filename.split("_")
    pId = s[0]
    label = s[1]
    method = s[2]
    freq = s[-1].split(".")[0]
    
    return {"pId": pId, "label": label, "method": method, "freq": freq}

def read_file(filename):
    return np.load(filename)

In [3]:
# get all the files in the folder output/
def get_files(folder, filter = None):
    '''
    filter is a dictionary, has 2 keys: method and freq to filter the files
    '''
    files = glob.glob(os.path.join(folder, "*.npy"))
    ret_files = []
    if filter:
        for f in files:
            # get the filename
            filename = os.path.basename(f)
            f_info = parse_filename(filename)
            if (f_info["method"] == filter["method"]) and (f_info["freq"] == filter["freq"]):
                ret_files.append(f)
    else:
        ret_files = files

    return ret_files

In [4]:
# get the labels for electrodes
fs_dir = mne.datasets.fetch_fsaverage(verbose=True)
subjects_dir = os.path.dirname(fs_dir)
labels = mne.read_labels_from_annot('fsaverage', parc='aparc',
                                    subjects_dir=subjects_dir)
labels.pop(-1)
label_colors = [label.color for label in labels]

0 files missing from root.txt in C:\Users\habom\mne_data\MNE-fsaverage-data
0 files missing from bem.txt in C:\Users\habom\mne_data\MNE-fsaverage-data\fsaverage
Reading labels from parcellation...
   read 35 labels from C:\Users\habom\mne_data\MNE-fsaverage-data\fsaverage\label\lh.aparc.annot
   read 34 labels from C:\Users\habom\mne_data\MNE-fsaverage-data\fsaverage\label\rh.aparc.annot


# Preprocessing

In [5]:
filter = {"method": "ciplv", "freq": "theta"}
files = get_files("output", filter)

In [6]:
# take one file and read it
file = files[0]
data = read_file(file)

In [7]:
def feature_extraction(data):
    """
    get lower part of diagonal matrix
    """
    data_lower = data[np.tril_indices(data.shape[0], k=-1)]
    return data_lower

# Data preparation

In [26]:
# get files
methods = ['pli', 'wpli2', 'ciplv']
freqs = ['delta', 'theta', 'alpha', 'beta', 'gamma']

filter = {"method": "wpli2", "freq": "delta"}
files = get_files("output", filter)

In [27]:
X = []
Y = []

In [28]:
labels = {"al": 0, "fa":1}

In [29]:
for f in files:
    data = read_file(f)
    X.append(feature_extraction(data))
    Y.append(labels.get(parse_filename(os.path.basename(f))["label"]))

In [30]:
X = np.array(X)
Y = np.array(Y)

In [31]:
# feature selection
from sklearn.svm import LinearSVC
from sklearn.feature_selection import SelectFromModel

lsvc = LinearSVC(C=0.001, penalty="l2", dual=False).fit(X, Y)
model = SelectFromModel(lsvc, prefit=True)
X_new = model.transform(X)

In [14]:
X_new = X

In [15]:
X_new.shape

(96, 2278)

In [16]:
# split into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X_new, Y, test_size=0.3, random_state=12)

# Training model

In [17]:
# Use classification model
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVC
from sklearn.metrics import mean_squared_error

# fit model
model = SVC(kernel='linear')
model.fit(X_train, Y_train)

# evaluate model
Y_pred = model.predict(X_test)
mse = mean_squared_error(Y_test, Y_pred)
print("MSE: %.4f" % mse)

# y predicted to binary
Y_pred_binary = np.where(Y_pred > 0.5, 1, 0)

# print classification report
from sklearn.metrics import classification_report
print(classification_report(Y_test, Y_pred_binary))


MSE: 0.4483
              precision    recall  f1-score   support

           0       0.53      0.57      0.55        14
           1       0.57      0.53      0.55        15

    accuracy                           0.55        29
   macro avg       0.55      0.55      0.55        29
weighted avg       0.55      0.55      0.55        29



In [19]:
## tensoflow neural network
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.optimizers import SGD

# create model
model = Sequential()
model.add(Dense(units=256, input_dim=X_new.shape[1]))
model.add(Activation('relu'))
model.add(Dropout(0.3))
model.add(Dense(units=128))
model.add(Activation('relu'))
model.add(Dropout(0.3))
model.add(Dense(units=1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer=SGD(lr=0.01), metrics=['accuracy'])
# fit model with validation data
model.fit(X_train, Y_train, epochs=100, batch_size=8, verbose=1, validation_data=(X_test, Y_test))

# clasification report
Y_pred = model.predict(X_test)
Y_pred_binary = np.where(Y_pred > 0.5, 1, 0)

Epoch 1/100


  super(SGD, self).__init__(name, **kwargs)


ValueError: in user code:

    File "c:\Users\habom\anaconda3\envs\kia\lib\site-packages\keras\engine\training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\habom\anaconda3\envs\kia\lib\site-packages\keras\engine\training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\habom\anaconda3\envs\kia\lib\site-packages\keras\engine\training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\habom\anaconda3\envs\kia\lib\site-packages\keras\engine\training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\habom\anaconda3\envs\kia\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\habom\anaconda3\envs\kia\lib\site-packages\keras\engine\input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 973), found shape=(None, 2278)


In [69]:
print(classification_report(Y_test, Y_pred_binary))


              precision    recall  f1-score   support

           0       0.92      0.86      0.89        14
           1       0.88      0.93      0.90        15

    accuracy                           0.90        29
   macro avg       0.90      0.90      0.90        29
weighted avg       0.90      0.90      0.90        29



In [72]:
from sklearn.metrics import accuracy_score
acc = accuracy_score(Y_test, Y_pred_binary).round(2)

In [55]:
from airtable.airtable import AirTableClient
from airtable.config import config


In [63]:
atc = AirTableClient(**config["airtable"])
atc

<airtable.airtable.AirTableClient at 0x2a05390c388>

In [76]:
atc.add_row({ "method": "pli", "frequency": "theta", "accuracy": str(acc), "full accuracy report": classification_report(Y_test, Y_pred_binary)})

# Full Pipeline