# Extraemos todas las features de todos los pacientes

In [None]:
# Importación de librerías
# generales
import os
import pickle
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mne import io
# propias
from FVfunctions import getMeData, getMeFeatures
from dataset_reader import get_seizure_events, get_seizure_array
from online_signal_test import online_signal_test
# sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.svm import LinearSVC, SVC
from sklearn import model_selection as ms

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
DATASET_DIR = r"../eeg_dataset/physionet.org/files/siena-scalp-eeg/1.0.0/"
edf_names = []
seizure_events = []

for directory in os.listdir(DATASET_DIR):
    print('='*50)
    print(f"{directory: ^50}")
    print('='*50)
    
    for filename in os.listdir(os.path.join(DATASET_DIR, directory)):
        if '.edf' in filename:
            edf_names.append(f"{DATASET_DIR}/{directory}/{filename}")
        elif '.txt' in filename:
            new_seizures = get_seizure_events(f"{DATASET_DIR}/{directory}/{filename}")
            seizure_events.extend(new_seizures)
            for event in new_seizures:
                print('\n'.join([f"{key}: {val}" for key, val in event.items()]))
                print()

print(edf_names)

In [None]:
channels = ['EEG T3', 'EEG T5'] #, 'EEG F7', 'EEG F3', 'EEG C3', 'EEG P3']
feature_vectors = []
labels = []
fs = None

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for edf in edf_names:
        # Paso 1: duracion de señal y seizures
        name = edf.split('/')[-1]

        print('='*50)
        print(f"{name: ^50}")
        print('='*50)

        this_seizure_events = [seiz for seiz in seizure_events if seiz["file_name"]==name]
        if len(this_seizure_events) == 0:
            print('nada en', name)
            continue
            
        ex_seiz = this_seizure_events[0]
        mtx_t_reg = np.array([ex_seiz['registration_start_time'], ex_seiz['registration_end_time']])
        arr_mtx_t_epi = get_seizure_array(this_seizure_events)
    
        # Paso 2: extraemos la señal
        raw = io.read_raw_edf(edf)
        print('\n\n')
        if fs is None:
            fs = raw.info['sfreq']
        raw_filt = raw.pick(channels)
        data_namefilt = raw_filt.get_data() * 1e6
    
        # Paso 3: sacamos segmentos y features
        new_seg_list, new_label = getMeData(data_namefilt,
                                            mtx_t_reg,
                                            arr_mtx_t_epi,
                                            winlen=2,
                                            proportion=0.5)
        new_features = getMeFeatures(new_seg_list, channels, fs)
        feature_vectors.append(new_features)
        labels.append(new_label)

labels_arr = np.concatenate(labels)
df_fv = pd.concat(feature_vectors)
df_fv

# MACHINE LEARNING

In [None]:
train_x, test_x, train_y, test_y = train_test_split(df_fv, labels_arr, test_size=0.2, random_state=0)

In [None]:
C = np.arange(1e-15,1,0.005) # Range of hyperparameter values 1E-15 to 1 by 0.005
val_scores = np.zeros((len(C),1)) # Initialize validation score for each alpha value


with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for i in range(len(C)): # for each C value
        # Set up SVM with hinge loss and l2 norm regularization
        model = LinearSVC(dual=True,C=C[i])
        # Calculate cross validation scores for 5-fold cross-validation
        score = ms.cross_val_score(model, df_fv, labels_arr, cv=5)
        val_scores[i] = score.mean() # Calculate mean of the 5 scores

# Determine the alpha that maximizes the cross-validation score
ind = np.argmax(val_scores)
C_star = C[ind]
print('alpha_star =', C_star)

# Plot accuracy
plt.plot(C,val_scores)
plt.plot(np.ones(11)*C_star,np.arange(0,1.1,0.1),'--r')
plt.xlabel('C')
plt.ylabel('Mean Cross-Validation Accuracy')
plt.grid(True)
plt.show()

In [None]:
# testeamos en un modelo lineal
linear_model = LinearSVC(dual=True,max_iter=2000,C=C_star)
linear_model.fit(train_x, train_y)
predictions = linear_model.predict(test_x)

In [None]:
Accuracy = accuracy_score(test_y, predictions)
F1_score = f1_score(test_y, predictions)
print(f"{Accuracy=:.3f}" + '\n' + f"{F1_score=:.3f}")

In [None]:
cm = confusion_matrix(test_y, predictions)
clasificaciones = ["No epilepsia","Epilepsia"]
cm_df = pd.DataFrame(data=cm,index=clasificaciones,columns=clasificaciones)
ax = sns.heatmap(cm_df, cmap='Blues', annot=True)
ax.set_xlabel("Predicted label")
ax.set_ylabel("True label");

In [None]:
# kernel
rbf_model = SVC(kernel='rbf')
rbf_model.fit(train_x, train_y)
rbf_pred = rbf_model.predict(test_x)
print(f"{accuracy_score(test_y, rbf_pred)=}")
print(f"{f1_score(test_y, rbf_pred)=}")
cm_rbf = confusion_matrix(test_y, rbf_pred)
sns.heatmap(cm_rbf, annot=True, cmap='Blues')

In [None]:
# with open('pretrained_models/2-ch-gonza.pkl', 'wb') as file:
#     pickle.dump(linear_model, file)

In [None]:
with open(r'pretrained_models/2-ch-gonza.pkl', 'rb') as file:
    modelo_leido = pickle.load(file)

clasificaciones = ["No epilepsia","Epilepsia"]
predictions = modelo_leido.predict(test_x)
print(accuracy_score(test_y, predictions))

cm = confusion_matrix(test_y, predictions)
cm_df = pd.DataFrame(data=cm,index=clasificaciones,columns=clasificaciones)
ax = sns.heatmap(cm_df, cmap='Blues', annot=True)
ax.set_xlabel("Predicted label")
ax.set_ylabel("True label")

# LUKITESTING

In [None]:
modelos = []
scores = []

iteraciones = 100

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for i in range(iteraciones):
        linear_model = LinearSVC(dual=True,max_iter=2000)
        linear_model.fit(train_x, train_y)
        predictions = linear_model.predict(test_x)
        score = accuracy_score(test_y, predictions)
        
        modelos.append(linear_model)
        scores.append(score)
        
arr_n = np.arange(len(scores))

In [None]:
ind = np.argmax(scores)
n = arr_n[ind]

plt.figure(figsize=(7,5))
plt.plot(arr_n,scores)
plt.axvline(x=n,linestyle="--",color="red")
plt.show()



In [None]:
ind = np.argmax(scores)
modelo_star = modelos[ind]

predictions = modelo_star.predict(test_x)
print(accuracy_score(test_y, predictions))
cm = confusion_matrix(test_y, predictions)

clasificaciones = ["No epilepsia","Epilepsia"]
cm_df = pd.DataFrame(data=cm,index=clasificaciones,columns=clasificaciones)

ax = sns.heatmap(cm_df, cmap='Blues', annot=True)
ax.set_xlabel("Predicted label")
ax.set_ylabel("True label")

# LUKITESTING - PT.2 - ML para segmentar EEGs

In [None]:
with open(r'pretrained_models/2-ch-gonza.pkl', 'rb') as file:
    modelo_leido = pickle.load(file)

In [None]:
plt.style.use('ggplot')

In [None]:
channels = ['EEG T3', 'EEG T5'] #, 'EEG F7', 'EEG F3', 'EEG C3', 'EEG P3']
indexes = []
labels = []
fs = None

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for edf in edf_names:
        # Paso 1: duracion de señal y seizures
        name = edf.split('/')[-1]
        this_seizure_events = [seiz for seiz in seizure_events if seiz["file_name"]==name]
        
        if len(this_seizure_events) == 0:
            print('nada en', name)
            continue

        # Paso 2: extraemos la señal
        raw = io.read_raw_edf(edf)
        print('\n\n')
        if fs is None:
            fs = raw.info['sfreq']
        raw_filt = raw.pick(channels)
        data_namefilt = raw_filt.get_data() * 1e6

        # Ploteamos la clasificación aplicada "en tiempo real"
        for seiz in this_seizure_events:
            online_signal_test(modelo_leido, data_namefilt, channels, seiz, name, fs)
