In [1]:
import pandas as pd
import numpy as np

import scipy.io as sio
from mat4py import loadmat

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split

import pickle

# Criando os métodos

In [2]:
def geraDatas(iniPessoas = 1, qtdPessoas = 1):
    #Pegandos os dados
    datas = []
    
    for i in range(iniPessoas, iniPessoas + qtdPessoas):
        try:
            datas.append((loadmat(str(i).zfill(2) + "_session1.mat"), loadmat(str(i).zfill(2) + "_session2.mat")))
        except:
            print("Arquivo " + str(i).zfill(2) + " não existe")
        
    return datas

In [35]:
def geraDf(datas):    
    eletrodos = ["F3", "F4", "AF3", "AF4", "C5", "C6", "C3", "C4", "C1", "C2", "P3", "P4", "P5", "P6", "Fz", "Cz"]
    
    #Encontrando os dados
    lisRes = []
    lisCon = []
    
    for pessoa in datas:
        try:
            lisRes.append(pd.DataFrame(pessoa[0]["dataBeforeSession"], columns = eletrodos))
            lisRes.append(pd.DataFrame(pessoa[0]["dataAfterSession"], columns = eletrodos))
            lisRes.append(pd.DataFrame(pessoa[1]["dataBeforeSession"], columns = eletrodos)) 
            lisRes.append(pd.DataFrame(pessoa[1]["dataAfterSession"], columns = eletrodos))

            lisCon.append(pd.DataFrame(pessoa[0]["dataDuringSession"], columns = eletrodos))
            
        except:
            print("Erro na Pessoa " + str(pessoa[2]).zfill(2))
        
    #Gerando os dfs separados
    dfRes = pd.concat(lisRes)
    dfCon = pd.concat(lisCon)
        
    #Classificando os dfs
    dfRes['State'] = 0 #Relaxado
    dfCon['State'] = 1 #Concentrado
    
    #Juntando os dfs
    df = pd.concat([dfRes, dfCon])
    df.reset_index(inplace = True)
    df.drop(['index'], axis=1, inplace = True)
    
    return df

In [18]:
def testeEspecificidade(modelo, iniPessoas = 1, qtdPessoas = 1):
    df = geraDf(geraDatas(5, 2))
    testePrecisao(modelo, df.drop('State',axis=1), df['State'])

In [19]:
def testePrecisao(modelo, df_x, df_y):
    predict = modelo.predict(df_x)
    print(classification_report(df_y, predict))

In [20]:
def geraModeloML(iniPessoas = 1, qtdPessoas = 1, estimators = 50):
    #Pegandos os dados
    datas = geraDatas(iniPessoas, qtdPessoas)
    print("Dados coletados")
        
    #Criando o df
    df = geraDf(datas)
    print("---------------")
    print("DataFrame criado")
    print("---------------")
    
    #Dividindo os dados
    x_train, x_test, y_train, y_test = train_test_split(df.drop('State',axis=1), 
                                                    df['State'], test_size=0.20)
        
    #Treinando
    dtree = RandomForestClassifier(n_estimators = estimators,verbose = 2, n_jobs = -1)
    dtree.fit(x_train, y_train)
    print("---------------")
    print("Modelo treinado")
    print("---------------")
    
    #Teste de precisao
    testePrecisao(dtree, x_test, y_test)
    
    return dtree

# Usando ML

In [30]:
dtree = geraModeloML(1, 6, 25)

Arquivo 03 não existe
Dados coletados


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


---------------
DataFrame criado
---------------


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.


building tree 1 of 25
building tree 2 of 25
building tree 3 of 25
building tree 4 of 25
building tree 5 of 25
building tree 6 of 25building tree 7 of 25building tree 8 of 25


building tree 9 of 25
building tree 10 of 25
building tree 11 of 25
building tree 12 of 25
building tree 13 of 25
building tree 14 of 25
building tree 15 of 25
building tree 16 of 25
building tree 17 of 25
building tree 18 of 25
building tree 19 of 25
building tree 20 of 25
building tree 21 of 25
building tree 22 of 25
building tree 23 of 25
building tree 24 of 25
building tree 25 of 25


[Parallel(n_jobs=-1)]: Done  23 out of  25 | elapsed:  1.1min remaining:    5.7s
[Parallel(n_jobs=-1)]: Done  25 out of  25 | elapsed:  1.2min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.


---------------
Modelo treinado
---------------


[Parallel(n_jobs=8)]: Done  23 out of  25 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=8)]: Done  25 out of  25 | elapsed:    0.9s finished


              precision    recall  f1-score   support

           0       0.90      0.88      0.89    122690
           1       0.90      0.92      0.91    137250

    accuracy                           0.90    259940
   macro avg       0.90      0.90      0.90    259940
weighted avg       0.90      0.90      0.90    259940



In [31]:
#Teste Especificidade
testeEspecificidade(dtree, 7, 5)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  23 out of  25 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=8)]: Done  25 out of  25 | elapsed:    1.1s finished


              precision    recall  f1-score   support

           0       0.97      0.96      0.96    245761
           1       0.96      0.97      0.96    244126

    accuracy                           0.96    489887
   macro avg       0.96      0.96      0.96    489887
weighted avg       0.96      0.96      0.96    489887



In [14]:
#Salvando os modelos
nome = 'FRANCESCO_VIRGOLINI'
pickle.dump(dtree, open(nome + '.sat', 'wb'))

# Testes de arquivos com erro

In [16]:
mats = (loadmat("24_session1.mat"), loadmat("24_session2.mat"))

In [17]:
for i in mats[0]:
    print(i)

dataAfterSession
dataBeforeSession
dataDuringSession
date1
date2


In [18]:
for i in mats[1]:
    print(i)

dataAfterSession
dataBeforeSession
dataDuringSession
date1
date2


# Testes de erros

In [108]:
df

Unnamed: 0,F3,F4,AF3,AF4,C5,C6,C3,C4,C1,C2,P3,P4,P5,P6,Fz,Cz,State
0,-0.441269,0.097275,-0.392340,0.063542,-0.377676,0.067796,-0.532907,-0.400908,-0.085700,-0.035990,-0.047912,-0.031253,-0.063908,-0.055511,-0.100999,-0.128917,1
1,-0.460934,0.190246,-0.353629,0.119839,-0.359585,0.116139,-0.559365,-0.381257,-0.037594,0.053051,-0.013619,0.035111,-0.025323,0.012902,0.007747,-0.066189,1
2,-0.435201,0.374306,-0.165010,0.240844,-0.215249,0.167403,-0.522595,-0.271136,0.041321,0.240608,0.039850,0.137700,0.085469,0.121966,0.239666,0.067196,1
3,-0.181488,0.482804,0.178046,0.351339,-0.003752,0.093290,-0.232500,-0.022617,0.096894,0.402355,0.080995,0.157154,0.225024,0.159010,0.456962,0.168131,1
4,0.064328,0.467191,0.318048,0.438523,0.021823,-0.020052,0.061440,0.054075,0.076219,0.382023,0.088977,0.061610,0.234157,0.105605,0.484027,0.113281,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
489882,0.115062,0.343209,0.173172,0.334370,0.164720,0.309254,0.142191,0.101277,0.322326,0.250737,0.134285,0.206882,0.176317,0.306857,0.341084,0.323647,0
489883,0.195988,0.326492,0.216806,0.319161,0.208428,0.285438,0.217054,0.177621,0.296716,0.235304,0.124468,0.196079,0.174263,0.282294,0.323843,0.300442,0
489884,0.272219,0.305049,0.244604,0.300065,0.236844,0.263191,0.254883,0.217745,0.273738,0.223791,0.115687,0.185164,0.186798,0.262393,0.304680,0.275629,0
489885,0.288149,0.293758,0.278124,0.289324,0.242794,0.251438,0.261904,0.241958,0.262298,0.218120,0.104999,0.175924,0.213346,0.249173,0.294479,0.260069,0
