In [None]:
# pip install -U scikit-learn

In [2]:
import pandas as pd
import numpy as np

import import_ipynb
import modelofuncoes as mf

from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier

In [3]:
hiperparametrosAB = AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0, n_estimators=50, random_state=410)

hiperparametrosRF = RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None, 
                                           criterion='gini', max_depth=None, max_features='auto', 
                                           max_leaf_nodes=None, max_samples=None, 
                                           min_impurity_decrease=0.0,
                                           min_samples_leaf=1, min_samples_split=2,
                                           min_weight_fraction_leaf=0.0, n_estimators=100,
                                           n_jobs=-1, oob_score=False, random_state=7374, verbose=0,
                                           warm_start=False)

hiperparametrosLR = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                                       intercept_scaling=1, l1_ratio=None, max_iter=1000, 
                                       multi_class='auto', n_jobs=None, penalty='l2', 
                                       random_state=2576, solver='lbfgs', tol=0.0001, verbose=0, 
                                       warm_start=False)
                    

hiperparametrosGB = GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None,
                           random_state=2333, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [4]:
modelos = {
            "AB": hiperparametrosAB,
            "RF": hiperparametrosRF,
            "LR": hiperparametrosLR,
            "GB": hiperparametrosGB,
          }

In [13]:
bases = {
    "Sao Paulo - Lote 1 (60/40)": "SP/sp-l1-6040-2022.xlsx",
    "Sao Paulo - Lote 1 (70/30)": "SP/sp-l1-7030-2022.xlsx",
    "Sao Paulo - Lote 2 (60/40)": "SP/sp-l2-6040-2022.xlsx",
    "Sao Paulo - Lote 2 (70/30)": "SP/sp-l2-7030-2022.xlsx",
    "Sao Paulo - Lote 3 (60/40)": "SP/sp-l3-6040-2022.xlsx",
    "Sao Paulo - Lote 3 (70/30)": "SP/sp-l3-7030-2022.xlsx",
    "Sao Paulo - Lote 4 (60/40)": "SP/sp-l4-6040-2022.xlsx",
    "Sao Paulo - Lote 4 (70/30)": "SP/sp-l4-7030-2022.xlsx",
    "Sao Paulo - Lote 5 (60/40)": "SP/sp-l5-6040-2022.xlsx",
    "Sao Paulo - Lote 5 (70/30)": "SP/sp-l5-7030-2022.xlsx",   
}

In [15]:
alvo = 'evolucaoCaso'

colunasRemovidas = ['disturbiosOlfativos', 'disturbiosGustatorios', 'puerpera', 'fragilidadeImuno', 
                    'gestante', 'obesidade']

In [16]:
def converterMapas(baseMap, metricasMap, kfoldsMap):
    baseMap.update(metricasMap)
    baseMap.update(kfoldsMap)
    return pd.DataFrame([baseMap])

def juntarBases (base_1, base_2):
    return pd.concat([base_1, base_2])

In [18]:
desempenhoDosModelos = pd.DataFrame()
for nomeBase in bases.keys():
    covidData = mf.carregarBase(bases[nomeBase], True, colunasRemovidas)
    x_train, x_test, y_train, y_test = mf.criarTreinamentoTeste(0.3, covidData, alvo)
    
    for nomeModelo in modelos.keys():

        modelo = mf.criarModelo(nomeModelo, modelos["AB"])
        baseMap = {'Base': nomeBase, 'Modelo': nomeModelo, 'Hiperparâmetros': modelo.get_params()}

        metricasMap = mf.calcularMetricas(x_train, x_test, y_train, y_test, covidData, modelo)
        kfoldsMap = mf.calcularKfolds(covidData, alvo, modelo)
        desempenhoDosModelos = juntarBases (desempenhoDosModelos, converterMapas(baseMap, metricasMap, kfoldsMap))
    
desempenhoDosModelos

Unnamed: 0,Base,Modelo,Hiperparâmetros,Acurácia,Precisão (0),Recall (0),F1-Score (0),Precisão (1),Recall (1),F1-Score (1),3 Kfolds,5 Kfolds
0,Sao Paulo - Lote 1 (60/40),AB,"{'algorithm': 'SAMME.R', 'base_estimator': Non...",81.9%,74.545%,61.194%,67.213%,84.337%,90.909%,87.5%,82.449%,83.673%
0,Sao Paulo - Lote 1 (60/40),RF,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",82.805%,80.851%,56.716%,66.667%,83.333%,94.156%,88.415%,81.497%,82.449%
0,Sao Paulo - Lote 1 (60/40),LR,"{'C': 1.0, 'class_weight': None, 'dual': False...",83.71%,79.245%,62.687%,70.0%,85.119%,92.857%,88.82%,83.673%,85.034%
0,Sao Paulo - Lote 1 (60/40),GB,"{'ccp_alpha': 0.0, 'criterion': 'friedman_mse'...",83.258%,81.25%,58.209%,67.826%,83.815%,94.156%,88.685%,83.81%,83.946%
0,Sao Paulo - Lote 1 (70/30),AB,"{'algorithm': 'SAMME.R', 'base_estimator': Non...",88.645%,81.579%,56.364%,66.667%,89.787%,96.789%,93.157%,85.167%,85.385%
0,Sao Paulo - Lote 1 (70/30),RF,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",86.447%,70.455%,56.364%,62.626%,89.52%,94.037%,91.723%,83.519%,83.626%
0,Sao Paulo - Lote 1 (70/30),LR,"{'C': 1.0, 'class_weight': None, 'dual': False...",87.912%,80.556%,52.727%,63.736%,89.03%,96.789%,92.747%,85.716%,85.934%
0,Sao Paulo - Lote 1 (70/30),GB,"{'ccp_alpha': 0.0, 'criterion': 'friedman_mse'...",86.447%,73.684%,50.909%,60.215%,88.511%,95.413%,91.832%,85.496%,85.604%
0,Sao Paulo - Lote 2 (60/40),AB,"{'algorithm': 'SAMME.R', 'base_estimator': Non...",82.796%,76.087%,62.5%,68.627%,85.0%,91.538%,88.148%,82.882%,83.681%
0,Sao Paulo - Lote 2 (60/40),RF,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",83.333%,76.596%,64.286%,69.903%,85.612%,91.538%,88.476%,79.971%,78.666%


In [20]:
write = pd.ExcelWriter("desempenho-modelos.xlsx")
desempenhoDosModelos.to_excel(write, 'dados', index = False)
write.save()

In [None]:
pd.DataFrame([mapa])

# minhaBase = pd.DataFrame(mapa ).transpose()