<a href="https://colab.research.google.com/github/gmsj/Projeto-1-Ciencia-de-dados/blob/main/Projeto_2_CD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Projeto 2 Ciência de Dados

### Especificação

1. Escolher uma	das	colunas	dos	dados	utilizados	no	projeto	1	para
predição	(classificação ou regressão)	
2. Separar os	dados	em treinamento,	validação	e	teste
3. Selecionar	4	algoritmos	de	acordo	com	a	tarefa escolhida	no	
passo	1	
4. Adicionar MLFlow	no	treinamento	dos	modelos para
rastreamento
5. Executar uma ferramenta	de	seleção	de	hiper-parâmetros sobre	o	
conjunto	de	validação
1. Selecionar poucos hiper-parâmetros por algoritmo	(max.	3)	
2. Selecionar	o	modelo	(algoritmo+valores	de	hiper-parâmetros)	com	
melhor resultado na métrica	de	avaliação		
3. Executar	o	melhor modelo	de	cada algoritmo	no	conjunto	de	teste	e	
selecionar	o	melhor modelo geral na métrica	de	avaliação		
6. Realizar diagnóstico	do	melhor modelo geral	da	etapa	5	e	
melhorá-lo	a	partir	do	diagnóstico
• Bônus:	Utilizar	auto-sklearn,	clustering	para entendimento	dos	
dados

### Imports

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats

!pip install optuna --quiet
!pip install mlflow --quiet
!pip install pyngrok --quiet
!pip install scikit-learn --quiet
!pip install auto-sklearn --quiet

###Coleta dos Dados



In [2]:
# Fonte: http://dados.recife.pe.gov.br/dataset/acidentes-de-transito-com-e-sem-vitimas
#dataframe = pd.read_csv("http://dados.recife.pe.gov.br/dataset/44087d2d-73b5-4ab3-9bd8-78da7436eed1/resource/2caa8f41-ccd9-4ea5-906d-f66017d6e107/download/acidentes2021.csv", encoding = "UTF-8", delimiter=';')

In [3]:
dataframe2018 = pd.read_csv("http://dados.recife.pe.gov.br/dataset/44087d2d-73b5-4ab3-9bd8-78da7436eed1/resource/2485590a-3b35-4ad0-b955-8dfc36b61021/download/acidentes_2018.csv", encoding = "UTF-8", delimiter=';')
dataframe2019 = pd.read_csv("http://dados.recife.pe.gov.br/dataset/44087d2d-73b5-4ab3-9bd8-78da7436eed1/resource/3531bafe-d47d-415e-b154-a881081ac76c/download/acidentes-2019.csv", encoding = "UTF-8", delimiter=';')
dataframe2020 = pd.read_csv("http://dados.recife.pe.gov.br/dataset/44087d2d-73b5-4ab3-9bd8-78da7436eed1/resource/fc1c8460-0406-4fff-b51a-e79205d1f1ab/download/acidentes_2020-novo.csv", encoding = "UTF-8", delimiter=';')
dataframe2021 = pd.read_csv("http://dados.recife.pe.gov.br/dataset/44087d2d-73b5-4ab3-9bd8-78da7436eed1/resource/2caa8f41-ccd9-4ea5-906d-f66017d6e107/download/acidentes2021.csv", encoding = "UTF-8", delimiter=';')

In [4]:
dataframe2018.drop(['endereco_cruzamento', 'numero_cruzamento', 'referencia_cruzamento', 'descricao'], axis=1, inplace=True)
dataframe2019.drop(['endereco_cruzamento', 'numero_cruzamento', 'referencia_cruzamento', 'descricao'], axis=1, inplace=True)
dataframe2020.drop(['descricao'], axis=1, inplace=True)

dataframe2018.rename(columns={'DATA': 'data'}, inplace=True)
dataframe2019.rename(columns={'DATA': 'data'}, inplace=True)

In [5]:
dataframe = pd.concat([dataframe2018, dataframe2019, dataframe2020, dataframe2021], ignore_index=True)

#### Removendo colunas que não possuem informações tão úteis para análise

In [6]:
dataframe.drop(['endereco', 'numero', 'detalhe_endereco_acidente', 'complemento', 'bairro_cruzamento', 'num_semaforo', 'sentido_via', 'velocidade_max_via', 'divisao_via2', 'divisao_via3'], axis=1, inplace=True)

#### Definição de tipos

In [7]:
regex_pattern = '((?<!\d)[01]{1}?[0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9]'

filter = dataframe['hora'].str.contains(regex_pattern, na=False)

dataframe = dataframe[filter]

  This is separate from the ipykernel package so we can avoid doing imports until


In [8]:
dataframe['data'] = dataframe['data'].astype('datetime64')
dataframe['hora'] = dataframe['hora'].astype('datetime64')
dataframe['natureza_acidente'] = dataframe['natureza_acidente'].astype('category')
dataframe['situacao'] = dataframe['situacao'].astype('category')
dataframe['bairro'] = dataframe['bairro'].astype('category')
dataframe['tipo'] = dataframe['tipo'].astype('category')
dataframe['acidente_verificado'] = dataframe['acidente_verificado'].astype('category')
dataframe['tempo_clima'] = dataframe['tempo_clima'].astype('category')
dataframe['situacao_semaforo'] = dataframe['situacao_semaforo'].astype('category')
dataframe['sinalizacao'] = dataframe['sinalizacao'].astype('category')
dataframe['condicao_via'] = dataframe['condicao_via'].astype('category')
dataframe['conservacao_via'] = dataframe['conservacao_via'].astype('category')
dataframe['ponto_controle'] = dataframe['ponto_controle'].astype('category')
dataframe['situacao_placa'] = dataframe['situacao_placa'].astype('category')
dataframe['mao_direcao'] = dataframe['mao_direcao'].astype('category')
dataframe['divisao_via1'] = dataframe['divisao_via1'].astype('category')

In [9]:
categories = dataframe.select_dtypes('category').apply(lambda col: col.cat.categories)
categories.to_frame().style

Unnamed: 0,0
natureza_acidente,"Index(['APOIO', 'COM VÍTIMA', 'ENTRADA E SAÍDA', 'OUTROS', 'SEM VÍTIMA',  'VÍTIMA FATAL'],  dtype='object')"
situacao,"Index(['CANCELADA', 'DUPLICIDADE', 'EM ABERTO', 'EQUIPE EM DESLOCAMENTO',  'EQUIPE NO LOCAL', 'EVADIU-SE', 'FINALIZADA', 'PENDENTE'],  dtype='object')"
bairro,"Index(['AFLITOS', 'AFOGADOS', 'ALTO DO MANDU', 'ALTO JOSÉ BONIFÁCIO',  'ALTO JOSÉ DO PINHO', 'ALTO SANTA TERESINHA', 'APIPUCOS', 'AREIAS',  'ARRUDA', 'BAIRRO DO RECIFE', 'BARRO', 'BEBERIBE', 'BOA VIAGEM',  'BOA VISTA', 'BOMBA DO HEMETERIO', 'BOMBA DO HEMETÉRIO', 'BONGI',  'BRASÍLIA TEIMOSA', 'BREJO DA GUABIRABA', 'BREJO DE BEBERIBE',  'CABANGA', 'CAJUEIRO', 'CAMPINA DO BARRETO', 'CAMPO GRANDE',  'CASA AMARELA', 'CASA FORTE', 'CAXANGÁ', 'CAÇOTE',  'CIDADE UNIVERSITÁRIA', 'COELHOS', 'COHAB', 'COQUEIRAL', 'CORDEIRO',  'CURADO', 'CÓRREGO DO JENIPAPO', 'DERBY', 'DOIS IRMÃOS', 'DOIS UNIDOS',  'ENCRUZILHADA', 'ENGENHO DO MEIO', 'ESPINHEIRO', 'ESTÂNCIA', 'FUNDÃO',  'GRAÇAS', 'GUABIRABA', 'HIPÓDROMO', 'IBURA', 'ILHA DO LEITE',  'ILHA DO RETIRO', 'ILHA JOANA BEZERRA', 'IMBIRIBEIRA', 'IPESEP',  'IPSEP', 'IPUTINGA', 'JAQUEIRA', 'JARDIM SÃO PAULO', 'JIQUIÁ',  'JOANA BEZERRA', 'JORDÃO', 'LINHA DO TIRO', 'MACAXEIRA', 'MADALENA',  'MANGABEIRA', 'MANGUEIRA', 'MONTEIRO', 'MORRO DA CONCEIÇÃO',  'MUSTARDINHA', 'NOVA DESCOBERTA', 'PAISSANDU', 'PARNAMIRIM',  'PASSARINHO', 'PINA', 'PONTO DE PARADA', 'PORTO DA MADEIRA',  'POÇO DA PANELA', 'PRADO', 'ROSARINHO', 'SAN MARTIN', 'SANCHO',  'SANTANA', 'SANTO AMARO', 'SANTO ANTÔNIO', 'SOLEDADE', 'SÃO JOSÉ',  'SÍTIO DOS PINTOS', 'TAMARINEIRA', 'TEJIPIÓ', 'TORRE', 'TORREÃO',  'TORRÕES', 'TOTÓ', 'VASCO DA GAMA', 'VÁRZEA', 'ZUMBI', 'ÁGUA FRIA'],  dtype='object')"
tipo,"Index(['0', 'ABALROAMENTO LONGITUDINAL', 'ABALROAMENTO TRANSVERSAL',  'ACID. DE PERCURSO', 'ALAGAMENTO', 'APOIO COMPESA', 'ATROPELAMENTO',  'ATROPELAMENTO ANIMAL', 'ATROPELAMENTO DE ANIMAL',  'ATROPELAMENTO DE PESSOA', 'CAPOTAMENTO', 'CHOQUE',  'CHOQUE OBJETO FIXO', 'CHOQUE VEÍCULO PARADO', 'COLISÃO',  'COLISÃO COM CICLISTA', 'COLISÃO FRONTAL', 'COLISÃO LATERAL',  'COLISÃO TRANSVERSAL', 'COLISÃO TRASEIRA', 'ENGAVETAMENTO',  'MONITORAMENTO', 'OUTROS', 'OUTROS APOIOS', 'QUEDA', 'SANTO AMARO',  'SEMÁFORO', 'SUBURBIO', 'TOMBAMENTO'],  dtype='object')"
acidente_verificado,"Index(['APÓS O TREVO', 'Cruzamento', 'Esquina', 'Longo da via',  'Não informado', 'Outros', 'Ponte', 'Rotatória', 'Viaduto'],  dtype='object')"
tempo_clima,"Index(['Bom', 'Chuvoso', 'Nublado'], dtype='object')"
situacao_semaforo,"Index(['Com defeito', 'Desligado', 'Intermitente', 'Não existe',  'Sem defeito'],  dtype='object')"
sinalizacao,"Index(['Ilegível', 'Incompleta', 'Não existente', 'Perfeito estado'], dtype='object')"
condicao_via,"Index(['Molhada', 'Oleosa', 'Outros', 'Seca'], dtype='object')"
conservacao_via,"Index(['Mal conservada', 'Mal iluminada', 'Não há', 'Outros',  'Perfeito estado'],  dtype='object')"


#### Tratamento de dados ausentes

In [10]:
#Verificando a quantidade de nulos
dataframe.isna().sum().to_frame().style

Unnamed: 0,0
data,0
hora,0
natureza_acidente,108
situacao,9
bairro,212
tipo,58
auto,2524
moto,20267
ciclom,27408
ciclista,26992


#### Remoção de colunas com dados ausentes

In [11]:
target_columns = [
      'natureza_acidente', 'situacao', 'bairro', 'tipo', 'acidente_verificado',
      'tempo_clima', 'situacao_semaforo', 'sinalizacao', 'condicao_via', 'conservacao_via', 
      'ponto_controle', 'situacao_placa', 'mao_direcao', 'divisao_via1'
]

In [12]:
clean_dataframe = dataframe.dropna(subset=target_columns).reset_index(drop=True)

In [13]:
results = clean_dataframe.apply(lambda col: col.unique().tolist() if col.name in target_columns else None).dropna()
results.to_frame().style

Unnamed: 0,0
natureza_acidente,"['SEM VÍTIMA', 'COM VÍTIMA', 'VÍTIMA FATAL', 'OUTROS']"
situacao,"['FINALIZADA', 'CANCELADA', 'EQUIPE EM DESLOCAMENTO', 'DUPLICIDADE', 'EQUIPE NO LOCAL', 'EM ABERTO', 'PENDENTE']"
bairro,"['TEJIPIÓ', 'IMBIRIBEIRA', 'CAMPO GRANDE', 'DERBY', 'GRAÇAS', 'SANTO AMARO', 'BOA VIAGEM', 'ÁGUA FRIA', 'IBURA', 'IPSEP', 'ESPINHEIRO', 'JARDIM SÃO PAULO', 'MADALENA', 'CASA FORTE', 'PARNAMIRIM', 'ILHA DO LEITE', 'AFOGADOS', 'PAISSANDU', 'MUSTARDINHA', 'PINA', 'TORRÕES', 'ROSARINHO', 'ILHA DO RETIRO', 'BEBERIBE', 'BARRO', 'CIDADE UNIVERSITÁRIA', 'PRADO', 'ESTÂNCIA', 'AREIAS', 'AFLITOS', 'ARRUDA', 'TAMARINEIRA', 'BAIRRO DO RECIFE', 'JAQUEIRA', 'SAN MARTIN', 'BONGI', 'CASA AMARELA', 'IPUTINGA', 'CAXANGÁ', 'ENCRUZILHADA', 'BOA VISTA', 'SÃO JOSÉ', 'VÁRZEA', 'BOMBA DO HEMETÉRIO', 'ILHA JOANA BEZERRA', 'COELHOS', 'MANGUEIRA', 'ENGENHO DO MEIO', 'NOVA DESCOBERTA', 'SOLEDADE', 'CABANGA', 'SANTO ANTÔNIO', 'DOIS UNIDOS', 'TORRE', 'TORREÃO', 'CORDEIRO', 'MACAXEIRA', 'HIPÓDROMO', 'JIQUIÁ', 'MANGABEIRA', 'DOIS IRMÃOS', 'JORDÃO', 'VASCO DA GAMA', 'LINHA DO TIRO', 'APIPUCOS', 'ALTO JOSÉ DO PINHO', 'BRASÍLIA TEIMOSA', 'COQUEIRAL', 'ZUMBI', 'JOANA BEZERRA', 'CAÇOTE', 'CAJUEIRO', 'FUNDÃO', 'SANCHO', 'MONTEIRO', 'TOTÓ', 'PONTO DE PARADA', 'GUABIRABA', 'ALTO DO MANDU', 'PORTO DA MADEIRA', 'CAMPINA DO BARRETO', 'SANTANA', 'POÇO DA PANELA', 'ALTO JOSÉ BONIFÁCIO', 'MORRO DA CONCEIÇÃO', 'CÓRREGO DO JENIPAPO', 'PASSARINHO', 'BREJO DA GUABIRABA', 'SÍTIO DOS PINTOS', 'CURADO', 'BREJO DE BEBERIBE', 'COHAB', 'ALTO SANTA TERESINHA', 'BOMBA DO HEMETERIO']"
tipo,"['COLISÃO TRASEIRA', 'ABALROAMENTO LONGITUDINAL', 'CHOQUE VEÍCULO PARADO', 'ABALROAMENTO TRANSVERSAL', 'CAPOTAMENTO', 'COLISÃO FRONTAL', 'COLISÃO COM CICLISTA', 'ATROPELAMENTO', 'CHOQUE OBJETO FIXO', 'ACID. DE PERCURSO', 'CHOQUE', 'COLISÃO', 'TOMBAMENTO', 'ATROPELAMENTO ANIMAL', 'ENGAVETAMENTO', 'COLISÃO LATERAL', 'COLISÃO TRANSVERSAL', 'ATROPELAMENTO DE PESSOA', 'QUEDA', 'ATROPELAMENTO DE ANIMAL', 'OUTROS']"
acidente_verificado,"['Longo da via', 'Outros', 'Cruzamento', 'Ponte', 'Viaduto', 'Esquina', 'Não informado', 'Rotatória']"
tempo_clima,"['Bom', 'Chuvoso', 'Nublado']"
situacao_semaforo,"['Não existe', 'Sem defeito', 'Intermitente', 'Com defeito', 'Desligado']"
sinalizacao,"['Não existente', 'Perfeito estado', 'Ilegível', 'Incompleta']"
condicao_via,"['Seca', 'Molhada', 'Outros', 'Oleosa']"
conservacao_via,"['Perfeito estado', 'Mal iluminada', 'Mal conservada', 'Não há', 'Outros']"


In [14]:
clean_dataframe

Unnamed: 0,data,hora,natureza_acidente,situacao,bairro,tipo,auto,moto,ciclom,ciclista,...,acidente_verificado,tempo_clima,situacao_semaforo,sinalizacao,condicao_via,conservacao_via,ponto_controle,situacao_placa,mao_direcao,divisao_via1
0,2018-10-12,2022-10-04 11:32:00,SEM VÍTIMA,FINALIZADA,TEJIPIÓ,COLISÃO TRASEIRA,2.0,,,,...,Longo da via,Bom,Não existe,Não existente,Seca,Perfeito estado,Não existe,Não há placas,Dupla,Faixa contínua
1,2018-10-12,2022-10-04 12:50:00,SEM VÍTIMA,FINALIZADA,IMBIRIBEIRA,ABALROAMENTO LONGITUDINAL,,,,,...,Longo da via,Bom,Não existe,Perfeito estado,Seca,Perfeito estado,Não existe,Não há placas,Dupla,Canteiro central
2,2018-10-12,2022-10-04 13:23:00,SEM VÍTIMA,FINALIZADA,CAMPO GRANDE,CHOQUE VEÍCULO PARADO,2.0,,,,...,Outros,Bom,Não existe,Não existente,Seca,Perfeito estado,Não existe,Não há placas,Única,Não existe
3,2018-10-12,2022-10-04 17:53:00,SEM VÍTIMA,FINALIZADA,DERBY,ABALROAMENTO LONGITUDINAL,1.0,,,,...,Outros,Bom,Não existe,Perfeito estado,Seca,Perfeito estado,Não existe,"Placas ""Pare""",Única,Não existe
4,2018-10-12,2022-10-04 18:25:00,SEM VÍTIMA,FINALIZADA,GRAÇAS,ABALROAMENTO LONGITUDINAL,1.0,1.0,,,...,Cruzamento,Bom,Sem defeito,Perfeito estado,Seca,Perfeito estado,Não existe,Não há placas,Única,Faixa seccionada
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12260,2021-12-30,2022-10-04 12:34:00,COM VÍTIMA,FINALIZADA,PINA,COLISÃO LATERAL,1.0,1.0,0.0,0.0,...,Longo da via,Bom,Sem defeito,Perfeito estado,Seca,Perfeito estado,Não existe,Não há placas,Única,Não existe
12261,2021-12-31,2022-10-04 10:13:00,COM VÍTIMA,FINALIZADA,SAN MARTIN,COLISÃO COM CICLISTA,1.0,0.0,0.0,1.0,...,Longo da via,Bom,Sem defeito,Perfeito estado,Seca,Perfeito estado,Não existe,Não há placas,Dupla,Não existe
12262,2021-12-31,2022-10-04 11:36:00,COM VÍTIMA,FINALIZADA,ESTÂNCIA,COLISÃO TRASEIRA,1.0,1.0,0.0,0.0,...,Longo da via,Bom,Sem defeito,Perfeito estado,Seca,Perfeito estado,Não existe,R-6a,Dupla,Faixa contínua
12263,2021-12-31,2022-10-04 12:56:00,COM VÍTIMA,FINALIZADA,VÁRZEA,COLISÃO FRONTAL,0.0,1.0,0.0,0.0,...,Cruzamento,Bom,Não existe,Perfeito estado,Seca,Perfeito estado,Não existe,Não há placas,Dupla,Faixa seccionada


#### Imputação de dados ausentes

In [15]:
clean_dataframe.isna().sum().to_frame().style

Unnamed: 0,0
data,0
hora,0
natureza_acidente,0
situacao,0
bairro,0
tipo,0
auto,1007
moto,7614
ciclom,10814
ciclista,10572


In [16]:
#clean_dataframe['auto'] = clean_dataframe['auto'].fillna(0)
results = clean_dataframe.apply(lambda col: col.fillna(0) if col.dtype == float else col)
results.isna().sum().to_frame().style
#clean_dataframe = clean_dataframe.select_dtypes(float).apply(lambda col: col.fillna(0))

Unnamed: 0,0
data,0
hora,0
natureza_acidente,0
situacao,0
bairro,0
tipo,0
auto,0
moto,0
ciclom,0
ciclista,0


In [17]:
results.head().style

Unnamed: 0,data,hora,natureza_acidente,situacao,bairro,tipo,auto,moto,ciclom,ciclista,pedestre,onibus,caminhao,viatura,outros,vitimas,vitimasfatais,acidente_verificado,tempo_clima,situacao_semaforo,sinalizacao,condicao_via,conservacao_via,ponto_controle,situacao_placa,mao_direcao,divisao_via1
0,2018-10-12 00:00:00,2022-10-04 11:32:00,SEM VÍTIMA,FINALIZADA,TEJIPIÓ,COLISÃO TRASEIRA,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Longo da via,Bom,Não existe,Não existente,Seca,Perfeito estado,Não existe,Não há placas,Dupla,Faixa contínua
1,2018-10-12 00:00:00,2022-10-04 12:50:00,SEM VÍTIMA,FINALIZADA,IMBIRIBEIRA,ABALROAMENTO LONGITUDINAL,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,Longo da via,Bom,Não existe,Perfeito estado,Seca,Perfeito estado,Não existe,Não há placas,Dupla,Canteiro central
2,2018-10-12 00:00:00,2022-10-04 13:23:00,SEM VÍTIMA,FINALIZADA,CAMPO GRANDE,CHOQUE VEÍCULO PARADO,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Outros,Bom,Não existe,Não existente,Seca,Perfeito estado,Não existe,Não há placas,Única,Não existe
3,2018-10-12 00:00:00,2022-10-04 17:53:00,SEM VÍTIMA,FINALIZADA,DERBY,ABALROAMENTO LONGITUDINAL,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,Outros,Bom,Não existe,Perfeito estado,Seca,Perfeito estado,Não existe,"Placas ""Pare""",Única,Não existe
4,2018-10-12 00:00:00,2022-10-04 18:25:00,SEM VÍTIMA,FINALIZADA,GRAÇAS,ABALROAMENTO LONGITUDINAL,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Cruzamento,Bom,Sem defeito,Perfeito estado,Seca,Perfeito estado,Não existe,Não há placas,Única,Faixa seccionada


#### Remoção de outliers

#####Tratamento do dataset para poder aplicar os métodos

In [18]:
dataframeCodes = results.copy()

In [19]:
dataframeCodes.drop(['data', 'hora'], axis=1, inplace=True)

In [20]:
dataframeCodes.head().style

Unnamed: 0,natureza_acidente,situacao,bairro,tipo,auto,moto,ciclom,ciclista,pedestre,onibus,caminhao,viatura,outros,vitimas,vitimasfatais,acidente_verificado,tempo_clima,situacao_semaforo,sinalizacao,condicao_via,conservacao_via,ponto_controle,situacao_placa,mao_direcao,divisao_via1
0,SEM VÍTIMA,FINALIZADA,TEJIPIÓ,COLISÃO TRASEIRA,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Longo da via,Bom,Não existe,Não existente,Seca,Perfeito estado,Não existe,Não há placas,Dupla,Faixa contínua
1,SEM VÍTIMA,FINALIZADA,IMBIRIBEIRA,ABALROAMENTO LONGITUDINAL,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,Longo da via,Bom,Não existe,Perfeito estado,Seca,Perfeito estado,Não existe,Não há placas,Dupla,Canteiro central
2,SEM VÍTIMA,FINALIZADA,CAMPO GRANDE,CHOQUE VEÍCULO PARADO,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Outros,Bom,Não existe,Não existente,Seca,Perfeito estado,Não existe,Não há placas,Única,Não existe
3,SEM VÍTIMA,FINALIZADA,DERBY,ABALROAMENTO LONGITUDINAL,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,Outros,Bom,Não existe,Perfeito estado,Seca,Perfeito estado,Não existe,"Placas ""Pare""",Única,Não existe
4,SEM VÍTIMA,FINALIZADA,GRAÇAS,ABALROAMENTO LONGITUDINAL,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Cruzamento,Bom,Sem defeito,Perfeito estado,Seca,Perfeito estado,Não existe,Não há placas,Única,Faixa seccionada


In [21]:
dataframeCodes['natureza_acidente'] = dataframeCodes['natureza_acidente'].cat.codes
dataframeCodes['situacao'] = dataframeCodes['situacao'].cat.codes
dataframeCodes['bairro'] = dataframeCodes['bairro'].cat.codes
dataframeCodes['tipo'] = dataframeCodes['tipo'].cat.codes
dataframeCodes['acidente_verificado'] = dataframeCodes['acidente_verificado'].cat.codes
dataframeCodes['tempo_clima'] = dataframeCodes['tempo_clima'].cat.codes
dataframeCodes['situacao_semaforo'] = dataframeCodes['situacao_semaforo'].cat.codes
dataframeCodes['sinalizacao'] = dataframeCodes['sinalizacao'].cat.codes
dataframeCodes['condicao_via'] = dataframeCodes['condicao_via'].cat.codes
dataframeCodes['conservacao_via'] = dataframeCodes['conservacao_via'].cat.codes
dataframeCodes['ponto_controle'] = dataframeCodes['ponto_controle'].cat.codes
dataframeCodes['situacao_placa'] = dataframeCodes['situacao_placa'].cat.codes
dataframeCodes['mao_direcao'] = dataframeCodes['mao_direcao'].cat.codes
dataframeCodes['divisao_via1'] = dataframeCodes['divisao_via1'].cat.codes

##### Isolation Forests

In [22]:
dataframeTest = dataframeCodes.copy()

In [23]:
from sklearn.ensemble import IsolationForest
rng = np.random.RandomState(42)
clf = IsolationForest(max_samples=100, random_state=rng)
clf.fit(dataframeTest)

IsolationForest(max_samples=100,
                random_state=RandomState(MT19937) at 0x7F4CC731CC00)

In [24]:
scores = clf.predict(dataframeTest)
print(scores)

[1 1 1 ... 1 1 1]


In [25]:
dataframeTest['outlier'] = scores
dataframeTest.head().style

Unnamed: 0,natureza_acidente,situacao,bairro,tipo,auto,moto,ciclom,ciclista,pedestre,onibus,caminhao,viatura,outros,vitimas,vitimasfatais,acidente_verificado,tempo_clima,situacao_semaforo,sinalizacao,condicao_via,conservacao_via,ponto_controle,situacao_placa,mao_direcao,divisao_via1,outlier
0,4,6,86,19,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,0,3,2,3,4,3,1,0,6,1
1,4,6,50,1,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,3,0,3,3,3,4,3,1,0,3,1
2,4,6,23,13,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0,3,2,3,4,3,1,2,8,1
3,4,6,35,1,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,5,0,3,3,3,4,3,6,2,8,1
4,4,6,43,1,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,4,3,3,4,3,1,2,7,1


In [26]:
dataframeTest[dataframeTest['outlier'] == -1]

Unnamed: 0,natureza_acidente,situacao,bairro,tipo,auto,moto,ciclom,ciclista,pedestre,onibus,...,tempo_clima,situacao_semaforo,sinalizacao,condicao_via,conservacao_via,ponto_controle,situacao_placa,mao_direcao,divisao_via1,outlier
29,4,6,71,19,2.0,0.0,0.0,0.0,0.0,0.0,...,0,4,0,3,4,1,1,2,3,-1
64,4,6,12,19,1.0,1.0,0.0,0.0,0.0,1.0,...,0,3,3,3,4,3,1,2,7,-1
80,1,6,7,15,1.0,0.0,0.0,1.0,0.0,0.0,...,0,3,3,3,4,3,4,0,3,-1
86,1,6,26,6,0.0,1.0,0.0,0.0,1.0,0.0,...,0,4,3,3,4,1,4,0,3,-1
89,1,6,40,15,1.0,0.0,0.0,1.0,0.0,0.0,...,0,0,3,3,4,3,36,0,3,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12249,1,6,12,18,1.0,1.0,0.0,0.0,0.0,0.0,...,1,4,3,0,4,2,1,2,7,-1
12250,1,6,85,18,1.0,0.0,0.0,0.0,0.0,1.0,...,1,4,3,0,4,1,4,0,3,-1
12251,1,6,90,16,1.0,1.0,0.0,0.0,0.0,0.0,...,1,3,1,0,4,3,1,0,8,-1
12254,1,6,94,9,0.0,0.0,0.0,0.0,1.0,1.0,...,0,3,3,3,4,1,24,0,6,-1


In [27]:
dataframeTest = dataframeTest[dataframeTest['outlier'] != -1]
len(dataframeTest)

10102

#### Ajustes no dataset para implementação dos Algoritmos

Vamos partir da seguinte legenda para criamos uma nova coluna (gravidade) a partir dos dados presentes na coluna "vitimas"
- 0: Sem Gravidade
- 1: Gravidade Moderada
- 2: Muito Grave

##### Remoção da coluna "outlier" gerada no passo anterior e geração da coluna "gravidade"

In [28]:
dataframeTest.drop(['outlier'], axis=1, inplace=True)
dataframeTest.head().style

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,natureza_acidente,situacao,bairro,tipo,auto,moto,ciclom,ciclista,pedestre,onibus,caminhao,viatura,outros,vitimas,vitimasfatais,acidente_verificado,tempo_clima,situacao_semaforo,sinalizacao,condicao_via,conservacao_via,ponto_controle,situacao_placa,mao_direcao,divisao_via1
0,4,6,86,19,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,0,3,2,3,4,3,1,0,6
1,4,6,50,1,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,3,0,3,3,3,4,3,1,0,3
2,4,6,23,13,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0,3,2,3,4,3,1,2,8
3,4,6,35,1,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,5,0,3,3,3,4,3,6,2,8
4,4,6,43,1,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,4,3,3,4,3,1,2,7


In [29]:
# Criação da lista com as condições especificas
conditions = [
    (dataframeTest['vitimas'] == 0),
    (dataframeTest['vitimas'] == 1),
    (dataframeTest['vitimas'] >= 2)
    ]

# Criação de uma lista dos valores que queremos associar para cada condição
# values = ['Sem Gravidade', 'Gravidade Moderada', 'Muito Grave']
values = [    0   ,     1   ,     2   ]

# Criação de uma nova coluna usando o np.select para associar os valores com a lista de argumentos
dataframeTest['gravidade'] = np.select(conditions, values)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


In [30]:
dataframeTest.head().style

Unnamed: 0,natureza_acidente,situacao,bairro,tipo,auto,moto,ciclom,ciclista,pedestre,onibus,caminhao,viatura,outros,vitimas,vitimasfatais,acidente_verificado,tempo_clima,situacao_semaforo,sinalizacao,condicao_via,conservacao_via,ponto_controle,situacao_placa,mao_direcao,divisao_via1,gravidade
0,4,6,86,19,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,0,3,2,3,4,3,1,0,6,0
1,4,6,50,1,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,3,0,3,3,3,4,3,1,0,3,0
2,4,6,23,13,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0,3,2,3,4,3,1,2,8,0
3,4,6,35,1,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,5,0,3,3,3,4,3,6,2,8,0
4,4,6,43,1,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,4,3,3,4,3,1,2,7,0


### Implementação dos modelos



#### Separação do dataset em treinamento (60%), validação (20%) e testes (20%) 

In [31]:
from sklearn.model_selection import train_test_split

x = dataframeTest.drop(['vitimas', 'gravidade'], axis=1)
y = dataframeTest['gravidade']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=7)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=7)

#### Imports usados nos modelos

In [32]:
import optuna
import mlflow
from pyngrok import ngrok

In [33]:
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

from optuna.integration.mlflow import MLflowCallback
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [34]:
def eval_metrics(actual, pred):
  accuracy = accuracy_score(actual, pred)
  f1score = f1_score(actual, pred, average='macro')
  recallscore = recall_score(actual, pred, average='macro')
  precisionscore = precision_score(actual, pred, average='macro')

  return accuracy, f1score, recallscore, precisionscore

In [35]:
get_ipython().system_raw("mlflow ui --port 5000 &")
ngrok.kill()
NGROK_AUTH_TOKEN = ''
ngrok.set_auth_token(NGROK_AUTH_TOKEN)
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

MLflow Tracking UI: https://0762-34-143-128-204.ngrok.io


#### Algoritmos utilizados:



- Naive Bayes
- MLP (Multilayer Perceptron)
- SMV (Support vector machines)
- Logistic regression

#### Naive Bayes

In [36]:
def gaussianNaiveBayes(trial):
  var_smoothing = trial.suggest_float('var_smoothing', 1e-11, 1e-8)

  with mlflow.start_run(run_name = str('gaussianBayes')):
    model = GaussianNB(var_smoothing = var_smoothing)
    model.fit(x_train, y_train)

    y_model = model.predict(x_val)

    (accuracy, f1score, recallscore, precisionscore) = eval_metrics(y_val, y_model)

    print('GaussianBayes model (var_smoothing=%s):'%(var_smoothing))
    print('accuracy: %s'%(accuracy))
    print('f1 score: %s'%(f1score))
    print('recall: %s'%(recallscore))
    print('precision: %s'%(precisionscore))

    mlflow.log_param("var_smoothing", var_smoothing)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1 score", f1score)
    mlflow.log_metric("recall", recallscore)
    mlflow.log_metric("precision", precisionscore)

    mlflow.end_run()

  return accuracy

In [37]:
studyNB = optuna.create_study(direction="maximize")
studyNB.optimize(gaussianNaiveBayes, n_trials=50)

[32m[I 2022-10-04 00:01:18,788][0m A new study created in memory with name: no-name-3fbd22a6-d284-4ba2-adbf-38acf4bc06ae[0m


GaussianBayes model (var_smoothing=6.703694781102883e-09):
accuracy: 0.7723899059871351
f1 score: 0.4758132721515555
recall: 0.6757251056476873
precision: 0.6651780698797888


[32m[I 2022-10-04 00:01:18,949][0m Trial 0 finished with value: 0.7723899059871351 and parameters: {'var_smoothing': 6.703694781102883e-09}. Best is trial 0 with value: 0.7723899059871351.[0m
[32m[I 2022-10-04 00:01:19,010][0m Trial 1 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 8.900601574850796e-09}. Best is trial 1 with value: 0.7738743196437408.[0m
[32m[I 2022-10-04 00:01:19,058][0m Trial 2 finished with value: 0.7714002968827314 and parameters: {'var_smoothing': 2.8029126713899185e-09}. Best is trial 1 with value: 0.7738743196437408.[0m
[32m[I 2022-10-04 00:01:19,101][0m Trial 3 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 8.011412110684684e-09}. Best is trial 1 with value: 0.7738743196437408.[0m


GaussianBayes model (var_smoothing=8.900601574850796e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155
GaussianBayes model (var_smoothing=2.8029126713899185e-09):
accuracy: 0.7714002968827314
f1 score: 0.47533032022830835
recall: 0.6752643823120503
precision: 0.6650159476246432
GaussianBayes model (var_smoothing=8.011412110684684e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155


[32m[I 2022-10-04 00:01:19,163][0m Trial 4 finished with value: 0.7714002968827314 and parameters: {'var_smoothing': 3.4700249997113263e-09}. Best is trial 1 with value: 0.7738743196437408.[0m


GaussianBayes model (var_smoothing=3.4700249997113263e-09):
accuracy: 0.7714002968827314
f1 score: 0.47533032022830835
recall: 0.6752643823120503
precision: 0.6650159476246432
GaussianBayes model (var_smoothing=2.6517160560409724e-09):

[32m[I 2022-10-04 00:01:19,216][0m Trial 5 finished with value: 0.7714002968827314 and parameters: {'var_smoothing': 2.6517160560409724e-09}. Best is trial 1 with value: 0.7738743196437408.[0m
[32m[I 2022-10-04 00:01:19,263][0m Trial 6 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 8.391968669081178e-09}. Best is trial 1 with value: 0.7738743196437408.[0m
[32m[I 2022-10-04 00:01:19,314][0m Trial 7 finished with value: 0.7714002968827314 and parameters: {'var_smoothing': 4.0280429192957226e-09}. Best is trial 1 with value: 0.7738743196437408.[0m



accuracy: 0.7714002968827314
f1 score: 0.47533032022830835
recall: 0.6752643823120503
precision: 0.6650159476246432
GaussianBayes model (var_smoothing=8.391968669081178e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155
GaussianBayes model (var_smoothing=4.0280429192957226e-09):
accuracy: 0.7714002968827314
f1 score: 0.47533032022830835
recall: 0.6752643823120503
precision: 0.6650159476246432
GaussianBayes model (var_smoothing=1.1366147336077812e-09):
accuracy: 0.7714002968827314
f1 score: 0.47533032022830835
recall: 0.6752643823120503


[32m[I 2022-10-04 00:01:19,365][0m Trial 8 finished with value: 0.7714002968827314 and parameters: {'var_smoothing': 1.1366147336077812e-09}. Best is trial 1 with value: 0.7738743196437408.[0m


precision: 0.6650159476246432
GaussianBayes model (var_smoothing=2.194275428472088e-09):
accuracy: 0.7714002968827314
f1 score: 0.47533032022830835
recall: 0.6752643823120503
precision: 0.6650159476246432


[32m[I 2022-10-04 00:01:19,417][0m Trial 9 finished with value: 0.7714002968827314 and parameters: {'var_smoothing': 2.194275428472088e-09}. Best is trial 1 with value: 0.7738743196437408.[0m
[32m[I 2022-10-04 00:01:19,474][0m Trial 10 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.589617519348033e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:19,526][0m Trial 11 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.797522289196476e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=9.589617519348033e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347
GaussianBayes model (var_smoothing=9.797522289196476e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347


[32m[I 2022-10-04 00:01:19,587][0m Trial 12 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.61641507637299e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:19,642][0m Trial 13 finished with value: 0.7718951014349332 and parameters: {'var_smoothing': 5.905667660346748e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=9.61641507637299e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347
GaussianBayes model (var_smoothing=5.905667660346748e-09):
accuracy: 0.7718951014349332
f1 score: 0.47557162333607267
recall: 0.6754947439798688
precision: 0.6650968519608668


[32m[I 2022-10-04 00:01:19,699][0m Trial 14 finished with value: 0.7728847105393369 and parameters: {'var_smoothing': 7.292923384873077e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:19,752][0m Trial 15 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.900170433440881e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=7.292923384873077e-09):
accuracy: 0.7728847105393369
f1 score: 0.47605526894105044
recall: 0.6759554673155056
precision: 0.6652596032081046
GaussianBayes model (var_smoothing=9.900170433440881e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347


[32m[I 2022-10-04 00:01:19,809][0m Trial 16 finished with value: 0.7714002968827314 and parameters: {'var_smoothing': 5.23927614553305e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:19,858][0m Trial 17 finished with value: 0.7728847105393369 and parameters: {'var_smoothing': 7.2351491776413225e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=5.23927614553305e-09):
accuracy: 0.7714002968827314
f1 score: 0.47533032022830835
recall: 0.6752643823120503
precision: 0.6650159476246432
GaussianBayes model (var_smoothing=7.2351491776413225e-09):
accuracy: 0.7728847105393369
f1 score: 0.47605526894105044
recall: 0.6759554673155056
precision: 0.6652596032081046


[32m[I 2022-10-04 00:01:19,921][0m Trial 18 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.711901597318052e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:19,973][0m Trial 19 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 8.602465688545164e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=9.711901597318052e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347
GaussianBayes model (var_smoothing=8.602465688545164e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155


[32m[I 2022-10-04 00:01:20,026][0m Trial 20 finished with value: 0.7718951014349332 and parameters: {'var_smoothing': 5.824071866038026e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:20,080][0m Trial 21 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.98032852462582e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=5.824071866038026e-09):
accuracy: 0.7718951014349332
f1 score: 0.47557162333607267
recall: 0.6754947439798688
precision: 0.6650968519608668
GaussianBayes model (var_smoothing=9.98032852462582e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347


[32m[I 2022-10-04 00:01:20,140][0m Trial 22 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.203534932498801e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:20,194][0m Trial 23 finished with value: 0.7733795150915388 and parameters: {'var_smoothing': 7.570715775105289e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=9.203534932498801e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347
GaussianBayes model (var_smoothing=7.570715775105289e-09):
accuracy: 0.7733795150915388
f1 score: 0.47629761598565756
recall: 0.6761858289833241
precision: 0.6653414537867253


[32m[I 2022-10-04 00:01:20,258][0m Trial 24 finished with value: 0.7060860959920832 and parameters: {'var_smoothing': 9.854060651230915e-11}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:20,320][0m Trial 25 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 8.971356503346539e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=9.854060651230915e-11):
accuracy: 0.7060860959920832
f1 score: 0.4457829627653076
recall: 0.6448566421600116
precision: 0.6565217391304348
GaussianBayes model (var_smoothing=8.971356503346539e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155


[32m[I 2022-10-04 00:01:20,382][0m Trial 26 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 9.05029521324717e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:20,435][0m Trial 27 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 8.050082312548901e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=9.05029521324717e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155
GaussianBayes model (var_smoothing=8.050082312548901e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155


[32m[I 2022-10-04 00:01:20,494][0m Trial 28 finished with value: 0.7718951014349332 and parameters: {'var_smoothing': 6.461573505227886e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:20,563][0m Trial 29 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.967702130890684e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=6.461573505227886e-09):
accuracy: 0.7718951014349332
f1 score: 0.47557162333607267
recall: 0.6754947439798688
precision: 0.6650968519608668
GaussianBayes model (var_smoothing=9.967702130890684e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347


[32m[I 2022-10-04 00:01:20,630][0m Trial 30 finished with value: 0.7723899059871351 and parameters: {'var_smoothing': 6.84142100940648e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=6.84142100940648e-09):
accuracy: 0.7723899059871351
f1 score: 0.4758132721515555
recall: 0.6757251056476873
precision: 0.6651780698797888
GaussianBayes model (var_smoothing=9.65521587168594e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347


[32m[I 2022-10-04 00:01:20,690][0m Trial 31 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.65521587168594e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:20,783][0m Trial 32 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.958594213022407e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=9.958594213022407e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347


[32m[I 2022-10-04 00:01:20,846][0m Trial 33 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 8.345099394819336e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:20,913][0m Trial 34 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.127417749978898e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=8.345099394819336e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155
GaussianBayes model (var_smoothing=9.127417749978898e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347


[32m[I 2022-10-04 00:01:21,011][0m Trial 35 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 9.07443170898216e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=9.07443170898216e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155


[32m[I 2022-10-04 00:01:21,101][0m Trial 36 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.244271735919796e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=9.244271735919796e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347
GaussianBayes model (var_smoothing=7.818580646338563e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155


[32m[I 2022-10-04 00:01:21,205][0m Trial 37 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 7.818580646338563e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:21,274][0m Trial 38 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 8.672518868893259e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=8.672518868893259e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155


[32m[I 2022-10-04 00:01:21,330][0m Trial 39 finished with value: 0.7714002968827314 and parameters: {'var_smoothing': 4.415443405110991e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:21,378][0m Trial 40 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 8.176883345820719e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:21,431][0m Trial 41 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.099752663688845e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=4.415443405110991e-09):
accuracy: 0.7714002968827314
f1 score: 0.47533032022830835
recall: 0.6752643823120503
precision: 0.6650159476246432
GaussianBayes model (var_smoothing=8.176883345820719e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155
GaussianBayes model (var_smoothing=9.099752663688845e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347


[32m[I 2022-10-04 00:01:21,494][0m Trial 42 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.351793496140263e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=9.351793496140263e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347


[32m[I 2022-10-04 00:01:21,550][0m Trial 43 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 8.714050849835403e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:21,596][0m Trial 44 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.879577323887923e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:21,647][0m Trial 45 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.317765598442688e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=8.714050849835403e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155
GaussianBayes model (var_smoothing=9.879577323887923e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347
GaussianBayes model (var_smoothing=9.317765598442688e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347


[32m[I 2022-10-04 00:01:21,702][0m Trial 46 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.278072525842196e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=9.278072525842196e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347


[32m[I 2022-10-04 00:01:21,764][0m Trial 47 finished with value: 0.7738743196437408 and parameters: {'var_smoothing': 8.316715480056127e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:21,816][0m Trial 48 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.918121797923147e-09}. Best is trial 10 with value: 0.7743691241959426.[0m
[32m[I 2022-10-04 00:01:21,867][0m Trial 49 finished with value: 0.7743691241959426 and parameters: {'var_smoothing': 9.937654125786925e-09}. Best is trial 10 with value: 0.7743691241959426.[0m


GaussianBayes model (var_smoothing=8.316715480056127e-09):
accuracy: 0.7738743196437408
f1 score: 0.47654031558141147
recall: 0.6764161906511427
precision: 0.6654236234709155
GaussianBayes model (var_smoothing=9.918121797923147e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347
GaussianBayes model (var_smoothing=9.937654125786925e-09):
accuracy: 0.7743691241959426
f1 score: 0.4767833700394104
recall: 0.6766465523189611
precision: 0.6655061141304347


#### MLP - Multilayer Perceptron

In [38]:
def MLP(trial):
  lr = trial.suggest_categorical('learning_rate', ['constant', 'invscaling', 'adaptive'])

  max_iter = trial.suggest_int('max_iter', 200, 300, 50)

  with mlflow.start_run(run_name = str('Multilayer Perceptron')):
    model = MLPClassifier(max_iter=max_iter, learning_rate=lr, random_state=1)
    model.fit(x_train, y_train)

    y_pred = model.predict(x_val)

    (accuracy, f1score, recallscore, precisionscore) = eval_metrics(y_val, y_pred)

    print('Multilayer Perceptron model (lr=%s):'%(lr))
    print('Multilayer Perceptron model (max_iter=%f):'%(max_iter))

    print('accuracy: %s'%(accuracy))
    print('f1 score: %s'%(f1score))
    print('recall: %s'%(recallscore))
    print('precision: %s'%(precisionscore))

    mlflow.log_param("lr", lr)
    mlflow.log_param("max_iter", max_iter)

    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1 score", f1score)
    mlflow.log_metric("recall", recallscore)
    mlflow.log_metric("precision", precisionscore)

    mlflow.end_run()

  return accuracy

In [39]:
studyPerceptron = optuna.create_study(direction='maximize')
studyPerceptron.optimize(MLP, n_trials=50)

[32m[I 2022-10-04 00:01:21,897][0m A new study created in memory with name: no-name-56beec29-ae03-424b-84fd-36479b40418e[0m
[32m[I 2022-10-04 00:01:24,859][0m Trial 0 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:01:30,183][0m Trial 1 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'adaptive', 'max_iter': 250}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=adaptive):
Multilayer Perceptron model (max_iter=250.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:01:37,757][0m Trial 2 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:01:45,145][0m Trial 3 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'adaptive', 'max_iter': 200}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=adaptive):
Multilayer Perceptron model (max_iter=200.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:01:52,322][0m Trial 4 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'constant', 'max_iter': 200}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=constant):
Multilayer Perceptron model (max_iter=200.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:01:55,233][0m Trial 5 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:01:58,150][0m Trial 6 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'constant', 'max_iter': 250}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=constant):
Multilayer Perceptron model (max_iter=250.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:01,079][0m Trial 7 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'constant', 'max_iter': 250}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=constant):
Multilayer Perceptron model (max_iter=250.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:03,996][0m Trial 8 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'constant', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=constant):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:06,890][0m Trial 9 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'constant', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=constant):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:09,872][0m Trial 10 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:12,812][0m Trial 11 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'adaptive', 'max_iter': 250}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=adaptive):
Multilayer Perceptron model (max_iter=250.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:15,709][0m Trial 12 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'adaptive', 'max_iter': 250}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=adaptive):
Multilayer Perceptron model (max_iter=250.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:18,661][0m Trial 13 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'adaptive', 'max_iter': 200}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=adaptive):
Multilayer Perceptron model (max_iter=200.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:21,551][0m Trial 14 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 250}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=250.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:24,454][0m Trial 15 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'adaptive', 'max_iter': 250}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=adaptive):
Multilayer Perceptron model (max_iter=250.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:27,302][0m Trial 16 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:30,247][0m Trial 17 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 200}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=200.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:33,139][0m Trial 18 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'adaptive', 'max_iter': 250}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=adaptive):
Multilayer Perceptron model (max_iter=250.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:36,036][0m Trial 19 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:38,954][0m Trial 20 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 200}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=200.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:41,877][0m Trial 21 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 200}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=200.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:44,759][0m Trial 22 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:47,591][0m Trial 23 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:50,568][0m Trial 24 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 200}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=200.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:53,522][0m Trial 25 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:56,429][0m Trial 26 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:02:59,283][0m Trial 27 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:02,281][0m Trial 28 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 200}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=200.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:09,079][0m Trial 29 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:11,934][0m Trial 30 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:14,828][0m Trial 31 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:17,697][0m Trial 32 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:20,602][0m Trial 33 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:23,501][0m Trial 34 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:26,338][0m Trial 35 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 200}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=200.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:29,179][0m Trial 36 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 200}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=200.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:31,996][0m Trial 37 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 200}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=200.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:34,882][0m Trial 38 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'constant', 'max_iter': 250}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=constant):
Multilayer Perceptron model (max_iter=250.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:37,705][0m Trial 39 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 250}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=250.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:40,553][0m Trial 40 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'constant', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=constant):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:43,402][0m Trial 41 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:46,240][0m Trial 42 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:49,093][0m Trial 43 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:51,999][0m Trial 44 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:54,877][0m Trial 45 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 250}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=250.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:03:57,767][0m Trial 46 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'constant', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=constant):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:04:00,693][0m Trial 47 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'adaptive', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=adaptive):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:04:03,651][0m Trial 48 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


[32m[I 2022-10-04 00:04:06,615][0m Trial 49 finished with value: 0.9638792676892627 and parameters: {'learning_rate': 'invscaling', 'max_iter': 300}. Best is trial 0 with value: 0.9638792676892627.[0m


Multilayer Perceptron model (lr=invscaling):
Multilayer Perceptron model (max_iter=300.000000):
accuracy: 0.9638792676892627
f1 score: 0.6439158667041543
recall: 0.6653412856196156
precision: 0.6250611727131065


#### SVM - Support vector machines

In [40]:
def SVM(trial):
  c = trial.suggest_int('c', 1, 101, 10)
  max_iter = trial.suggest_int('max_iter', 1, 101, 10)

  with mlflow.start_run(run_name = str('SMV')):
    model = SVC(C = c, max_iter = max_iter)
    model.fit(x_train, y_train)

    y_model = model.predict(x_val)

    (accuracy, f1score, recallscore, precisionscore) = eval_metrics(y_val, y_model)

    print('SVM model (c=%f):'%(c))
    print('SMV model (max_iter=%f):'%(max_iter))

    print('accuracy: %s'%(accuracy))
    print('f1 score: %s'%(f1score))
    print('recall: %s'%(recallscore))
    print('precision: %s'%(precisionscore))

    mlflow.log_param("c", c)
    mlflow.log_param("max_iter", max_iter)

    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1 score", f1score)
    mlflow.log_metric("recall", recallscore)
    mlflow.log_metric("precision", precisionscore)

    mlflow.end_run()

  return accuracy

In [41]:
studySVM = optuna.create_study(direction="maximize")
studySVM.optimize(SVM, n_trials=50)

[32m[I 2022-10-04 00:04:06,651][0m A new study created in memory with name: no-name-96932e58-7768-4d35-800f-64c88516e411[0m
[32m[I 2022-10-04 00:04:06,822][0m Trial 0 finished with value: 0.8941118258287977 and parameters: {'c': 71, 'max_iter': 61}. Best is trial 0 with value: 0.8941118258287977.[0m
[32m[I 2022-10-04 00:04:06,896][0m Trial 1 finished with value: 0.5710044532409698 and parameters: {'c': 71, 'max_iter': 21}. Best is trial 0 with value: 0.8941118258287977.[0m
[32m[I 2022-10-04 00:04:06,932][0m Trial 2 finished with value: 0.5809005442850074 and parameters: {'c': 101, 'max_iter': 1}. Best is trial 0 with value: 0.8941118258287977.[0m


SVM model (c=71.000000):
SMV model (max_iter=61.000000):
accuracy: 0.8941118258287977
f1 score: 0.6551415047543342
recall: 0.6819256421588699
precision: 0.6712006214842218
SVM model (c=71.000000):
SMV model (max_iter=21.000000):
accuracy: 0.5710044532409698
f1 score: 0.40003538642122827
recall: 0.44259845922985824
precision: 0.40451758257072506
SVM model (c=101.000000):
SMV model (max_iter=1.000000):
accuracy: 0.5809005442850074
f1 score: 0.33204470276853404
recall: 0.3930146276788655
precision: 0.3610033495861394


[32m[I 2022-10-04 00:04:07,103][0m Trial 3 finished with value: 0.8545274616526473 and parameters: {'c': 41, 'max_iter': 91}. Best is trial 0 with value: 0.8941118258287977.[0m
[32m[I 2022-10-04 00:04:07,163][0m Trial 4 finished with value: 0.627906976744186 and parameters: {'c': 41, 'max_iter': 11}. Best is trial 0 with value: 0.8941118258287977.[0m


SVM model (c=41.000000):
SMV model (max_iter=91.000000):
accuracy: 0.8545274616526473
f1 score: 0.6103437283587613
recall: 0.6761403410522778
precision: 0.670697758349692
SVM model (c=41.000000):
SMV model (max_iter=11.000000):
accuracy: 0.627906976744186
f1 score: 0.43554414177133965
recall: 0.5019207265983575
precision: 0.45305193298062624


[32m[I 2022-10-04 00:04:07,311][0m Trial 5 finished with value: 0.5502226620484908 and parameters: {'c': 11, 'max_iter': 51}. Best is trial 0 with value: 0.8941118258287977.[0m
[32m[I 2022-10-04 00:04:07,350][0m Trial 6 finished with value: 0.5809005442850074 and parameters: {'c': 21, 'max_iter': 1}. Best is trial 0 with value: 0.8941118258287977.[0m
[32m[I 2022-10-04 00:04:07,498][0m Trial 7 finished with value: 0.8827313211281543 and parameters: {'c': 61, 'max_iter': 71}. Best is trial 0 with value: 0.8941118258287977.[0m


SVM model (c=11.000000):
SMV model (max_iter=51.000000):
accuracy: 0.5502226620484908
f1 score: 0.40482386756308736
recall: 0.4856322996252606
precision: 0.4276667279637889
SVM model (c=21.000000):
SMV model (max_iter=1.000000):
accuracy: 0.5809005442850074
f1 score: 0.33204470276853404
recall: 0.3930146276788655
precision: 0.3610033495861394
SVM model (c=61.000000):
SMV model (max_iter=71.000000):
accuracy: 0.8827313211281543
f1 score: 0.638080805439439
recall: 0.6695601041637499
precision: 0.6679490446450961


[32m[I 2022-10-04 00:04:07,577][0m Trial 8 finished with value: 0.636318654131618 and parameters: {'c': 51, 'max_iter': 21}. Best is trial 0 with value: 0.8941118258287977.[0m
[32m[I 2022-10-04 00:04:07,613][0m Trial 9 finished with value: 0.5809005442850074 and parameters: {'c': 91, 'max_iter': 1}. Best is trial 0 with value: 0.8941118258287977.[0m
[32m[I 2022-10-04 00:04:07,739][0m Trial 10 finished with value: 0.822365165759525 and parameters: {'c': 81, 'max_iter': 51}. Best is trial 0 with value: 0.8941118258287977.[0m


SVM model (c=51.000000):
SMV model (max_iter=21.000000):
accuracy: 0.636318654131618
f1 score: 0.42221998850058834
recall: 0.4263585454061373
precision: 0.4292812632095668
SVM model (c=91.000000):
SMV model (max_iter=1.000000):
accuracy: 0.5809005442850074
f1 score: 0.33204470276853404
recall: 0.3930146276788655
precision: 0.3610033495861394
SVM model (c=81.000000):
SMV model (max_iter=51.000000):
accuracy: 0.822365165759525
f1 score: 0.5907561206728229
recall: 0.651955513706487
precision: 0.6059241641885715


[32m[I 2022-10-04 00:04:07,908][0m Trial 11 finished with value: 0.8189015338941118 and parameters: {'c': 71, 'max_iter': 81}. Best is trial 0 with value: 0.8941118258287977.[0m
[32m[I 2022-10-04 00:04:08,062][0m Trial 12 finished with value: 0.8827313211281543 and parameters: {'c': 61, 'max_iter': 71}. Best is trial 0 with value: 0.8941118258287977.[0m


SVM model (c=71.000000):
SMV model (max_iter=81.000000):
accuracy: 0.8189015338941118
f1 score: 0.557490286997681
recall: 0.6606837846162462
precision: 0.6635712277180068
SVM model (c=61.000000):
SMV model (max_iter=71.000000):
accuracy: 0.8827313211281543
f1 score: 0.638080805439439
recall: 0.6695601041637499
precision: 0.6679490446450961


[32m[I 2022-10-04 00:04:08,201][0m Trial 13 finished with value: 0.8322612568035626 and parameters: {'c': 51, 'max_iter': 61}. Best is trial 0 with value: 0.8941118258287977.[0m


SVM model (c=51.000000):
SMV model (max_iter=61.000000):
accuracy: 0.8322612568035626
f1 score: 0.5810404317744559
recall: 0.678576428751435
precision: 0.6720985836375188
SVM model (c=31.000000):
SMV model (max_iter=101.000000):
accuracy: 0.8367144977733795
f1 score: 0.5870516698660441
recall: 0.6764763531487133
precision: 0.6710213938457413


[32m[I 2022-10-04 00:04:08,396][0m Trial 14 finished with value: 0.8367144977733795 and parameters: {'c': 31, 'max_iter': 101}. Best is trial 0 with value: 0.8941118258287977.[0m
[32m[I 2022-10-04 00:04:08,516][0m Trial 15 finished with value: 0.7961405244928254 and parameters: {'c': 71, 'max_iter': 41}. Best is trial 0 with value: 0.8941118258287977.[0m


SVM model (c=71.000000):
SMV model (max_iter=41.000000):
accuracy: 0.7961405244928254
f1 score: 0.5389717908943051
recall: 0.6749947186073819
precision: 0.6724235477546735


[32m[I 2022-10-04 00:04:08,721][0m Trial 16 finished with value: 0.293419099455715 and parameters: {'c': 1, 'max_iter': 71}. Best is trial 0 with value: 0.8941118258287977.[0m
[32m[I 2022-10-04 00:04:08,860][0m Trial 17 finished with value: 0.830776843146957 and parameters: {'c': 91, 'max_iter': 41}. Best is trial 0 with value: 0.8941118258287977.[0m


SVM model (c=1.000000):
SMV model (max_iter=71.000000):
accuracy: 0.293419099455715
f1 score: 0.2666236963174911
recall: 0.41446033591418
precision: 0.4348764646340564
SVM model (c=91.000000):
SMV model (max_iter=41.000000):
accuracy: 0.830776843146957
f1 score: 0.580466627314663
recall: 0.6493779567356753
precision: 0.6383799653231342


[32m[I 2022-10-04 00:04:09,033][0m Trial 18 finished with value: 0.793666501731816 and parameters: {'c': 61, 'max_iter': 81}. Best is trial 0 with value: 0.8941118258287977.[0m
[32m[I 2022-10-04 00:04:09,169][0m Trial 19 finished with value: 0.830776843146957 and parameters: {'c': 81, 'max_iter': 61}. Best is trial 0 with value: 0.8941118258287977.[0m


SVM model (c=61.000000):
SMV model (max_iter=81.000000):
accuracy: 0.793666501731816
f1 score: 0.512966008356414
recall: 0.6672080194887017
precision: 0.6668951270857605
SVM model (c=81.000000):
SMV model (max_iter=61.000000):
accuracy: 0.830776843146957
f1 score: 0.5742725051021064
recall: 0.6528278150932115
precision: 0.6602170175295607


[32m[I 2022-10-04 00:04:09,356][0m Trial 20 finished with value: 0.813953488372093 and parameters: {'c': 61, 'max_iter': 101}. Best is trial 0 with value: 0.8941118258287977.[0m
[32m[I 2022-10-04 00:04:09,521][0m Trial 21 finished with value: 0.8827313211281543 and parameters: {'c': 61, 'max_iter': 71}. Best is trial 0 with value: 0.8941118258287977.[0m


SVM model (c=61.000000):
SMV model (max_iter=101.000000):
accuracy: 0.813953488372093
f1 score: 0.5507656749977748
recall: 0.6701854600098938
precision: 0.6683882255900251
SVM model (c=61.000000):
SMV model (max_iter=71.000000):
accuracy: 0.8827313211281543
f1 score: 0.638080805439439
recall: 0.6695601041637499
precision: 0.6679490446450961


[32m[I 2022-10-04 00:04:09,676][0m Trial 22 finished with value: 0.901039089559624 and parameters: {'c': 41, 'max_iter': 71}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:09,840][0m Trial 23 finished with value: 0.7941613062840178 and parameters: {'c': 41, 'max_iter': 81}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=41.000000):
SMV model (max_iter=71.000000):
accuracy: 0.901039089559624
f1 score: 0.6510592946338105
recall: 0.6658546374336143
precision: 0.6662564417072369
SVM model (c=41.000000):
SMV model (max_iter=81.000000):
accuracy: 0.7941613062840178
f1 score: 0.5585582398709914
recall: 0.651909632303547
precision: 0.6604876589074834


[32m[I 2022-10-04 00:04:09,995][0m Trial 24 finished with value: 0.8446313706086096 and parameters: {'c': 31, 'max_iter': 61}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:10,116][0m Trial 25 finished with value: 0.863928748144483 and parameters: {'c': 31, 'max_iter': 41}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=31.000000):
SMV model (max_iter=61.000000):
accuracy: 0.8446313706086096
f1 score: 0.5965654406920494
recall: 0.6709508208962189
precision: 0.6684568914769643
SVM model (c=31.000000):
SMV model (max_iter=41.000000):
accuracy: 0.863928748144483
f1 score: 0.6189566931270254
recall: 0.6685707352131343
precision: 0.6674180301954497


[32m[I 2022-10-04 00:04:10,256][0m Trial 26 finished with value: 0.8322612568035626 and parameters: {'c': 51, 'max_iter': 61}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:10,429][0m Trial 27 finished with value: 0.8985650667986146 and parameters: {'c': 81, 'max_iter': 91}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=51.000000):
SMV model (max_iter=61.000000):
accuracy: 0.8322612568035626
f1 score: 0.5810404317744559
recall: 0.678576428751435
precision: 0.6720985836375188
SVM model (c=81.000000):
SMV model (max_iter=91.000000):
accuracy: 0.8985650667986146
f1 score: 0.6455226540372093
recall: 0.6585090396587611
precision: 0.6624721095604594


[32m[I 2022-10-04 00:04:10,606][0m Trial 28 finished with value: 0.8451261751608115 and parameters: {'c': 101, 'max_iter': 91}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:10,777][0m Trial 29 finished with value: 0.8985650667986146 and parameters: {'c': 81, 'max_iter': 91}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=101.000000):
SMV model (max_iter=91.000000):
accuracy: 0.8451261751608115
f1 score: 0.5943820495946334
recall: 0.6595170759480674
precision: 0.6634256530999646
SVM model (c=81.000000):
SMV model (max_iter=91.000000):
accuracy: 0.8985650667986146
f1 score: 0.6455226540372093
recall: 0.6585090396587611
precision: 0.6624721095604594


[32m[I 2022-10-04 00:04:10,952][0m Trial 30 finished with value: 0.8985650667986146 and parameters: {'c': 81, 'max_iter': 91}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:11,115][0m Trial 31 finished with value: 0.8985650667986146 and parameters: {'c': 81, 'max_iter': 91}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=81.000000):
SMV model (max_iter=91.000000):
accuracy: 0.8985650667986146
f1 score: 0.6455226540372093
recall: 0.6585090396587611
precision: 0.6624721095604594
SVM model (c=81.000000):
SMV model (max_iter=91.000000):
accuracy: 0.8985650667986146
f1 score: 0.6455226540372093
recall: 0.6585090396587611
precision: 0.6624721095604594


[32m[I 2022-10-04 00:04:11,296][0m Trial 32 finished with value: 0.8861949529935675 and parameters: {'c': 91, 'max_iter': 101}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:11,469][0m Trial 33 finished with value: 0.8985650667986146 and parameters: {'c': 81, 'max_iter': 91}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=91.000000):
SMV model (max_iter=101.000000):
accuracy: 0.8861949529935675
f1 score: 0.6379630292884512
recall: 0.6621025023567515
precision: 0.664491434232409
SVM model (c=81.000000):
SMV model (max_iter=91.000000):
accuracy: 0.8985650667986146
f1 score: 0.6455226540372093
recall: 0.6585090396587611
precision: 0.6624721095604594


[32m[I 2022-10-04 00:04:11,638][0m Trial 34 finished with value: 0.8787728847105394 and parameters: {'c': 91, 'max_iter': 81}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:11,809][0m Trial 35 finished with value: 0.8451261751608115 and parameters: {'c': 101, 'max_iter': 91}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=91.000000):
SMV model (max_iter=81.000000):
accuracy: 0.8787728847105394
f1 score: 0.6258294769660485
recall: 0.6481299993466432
precision: 0.6581279833534465
SVM model (c=101.000000):
SMV model (max_iter=91.000000):
accuracy: 0.8451261751608115
f1 score: 0.5943820495946334
recall: 0.6595170759480674
precision: 0.6634256530999646


[32m[I 2022-10-04 00:04:11,992][0m Trial 36 finished with value: 0.8367144977733795 and parameters: {'c': 71, 'max_iter': 101}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:12,151][0m Trial 37 finished with value: 0.8476001979218208 and parameters: {'c': 81, 'max_iter': 81}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=71.000000):
SMV model (max_iter=101.000000):
accuracy: 0.8367144977733795
f1 score: 0.5870516698660441
recall: 0.6764763531487133
precision: 0.6710213938457413
SVM model (c=81.000000):
SMV model (max_iter=81.000000):
accuracy: 0.8476001979218208
f1 score: 0.5983155453218799
recall: 0.662830528565695
precision: 0.6648891608169566


[32m[I 2022-10-04 00:04:12,348][0m Trial 38 finished with value: 0.8476001979218208 and parameters: {'c': 21, 'max_iter': 91}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=21.000000):
SMV model (max_iter=91.000000):
accuracy: 0.8476001979218208
f1 score: 0.601373012732629
recall: 0.674926964037372
precision: 0.6701977170172423
SVM model (c=71.000000):
SMV model (max_iter=101.000000):
accuracy: 0.8367144977733795
f1 score: 0.5870516698660441
recall: 0.6764763531487133
precision: 0.6710213938457413


[32m[I 2022-10-04 00:04:12,543][0m Trial 39 finished with value: 0.8367144977733795 and parameters: {'c': 71, 'max_iter': 101}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:12,710][0m Trial 40 finished with value: 0.7941613062840178 and parameters: {'c': 41, 'max_iter': 81}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:12,881][0m Trial 41 finished with value: 0.8985650667986146 and parameters: {'c': 81, 'max_iter': 91}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=41.000000):
SMV model (max_iter=81.000000):
accuracy: 0.7941613062840178
f1 score: 0.5585582398709914
recall: 0.651909632303547
precision: 0.6604876589074834
SVM model (c=81.000000):
SMV model (max_iter=91.000000):
accuracy: 0.8985650667986146
f1 score: 0.6455226540372093
recall: 0.6585090396587611
precision: 0.6624721095604594


[32m[I 2022-10-04 00:04:13,055][0m Trial 42 finished with value: 0.8198911429985156 and parameters: {'c': 91, 'max_iter': 91}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:13,206][0m Trial 43 finished with value: 0.8421573478476002 and parameters: {'c': 101, 'max_iter': 71}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=91.000000):
SMV model (max_iter=91.000000):
accuracy: 0.8198911429985156
f1 score: 0.5611578518496266
recall: 0.6741056011349742
precision: 0.6702794377419475
SVM model (c=101.000000):
SMV model (max_iter=71.000000):
accuracy: 0.8421573478476002
f1 score: 0.5961632658524477
recall: 0.6742634865700593
precision: 0.6634827842176924


[32m[I 2022-10-04 00:04:13,384][0m Trial 44 finished with value: 0.8985650667986146 and parameters: {'c': 81, 'max_iter': 91}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:13,556][0m Trial 45 finished with value: 0.8189015338941118 and parameters: {'c': 71, 'max_iter': 81}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=81.000000):
SMV model (max_iter=91.000000):
accuracy: 0.8985650667986146
f1 score: 0.6455226540372093
recall: 0.6585090396587611
precision: 0.6624721095604594
SVM model (c=71.000000):
SMV model (max_iter=81.000000):
accuracy: 0.8189015338941118
f1 score: 0.557490286997681
recall: 0.6606837846162462
precision: 0.6635712277180068


[32m[I 2022-10-04 00:04:13,745][0m Trial 46 finished with value: 0.8861949529935675 and parameters: {'c': 91, 'max_iter': 101}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:13,897][0m Trial 47 finished with value: 0.847105393369619 and parameters: {'c': 51, 'max_iter': 71}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=91.000000):
SMV model (max_iter=101.000000):
accuracy: 0.8861949529935675
f1 score: 0.6379630292884512
recall: 0.6621025023567515
precision: 0.664491434232409
SVM model (c=51.000000):
SMV model (max_iter=71.000000):
accuracy: 0.847105393369619
f1 score: 0.5985608125538542
recall: 0.6661999831993951
precision: 0.666362636107593


[32m[I 2022-10-04 00:04:13,986][0m Trial 48 finished with value: 0.8535378525482434 and parameters: {'c': 41, 'max_iter': 21}. Best is trial 22 with value: 0.901039089559624.[0m
[32m[I 2022-10-04 00:04:14,148][0m Trial 49 finished with value: 0.8189015338941118 and parameters: {'c': 71, 'max_iter': 81}. Best is trial 22 with value: 0.901039089559624.[0m


SVM model (c=41.000000):
SMV model (max_iter=21.000000):
accuracy: 0.8535378525482434
f1 score: 0.5971317868225084
recall: 0.5874310705777438
precision: 0.6199089095568705
SVM model (c=71.000000):
SMV model (max_iter=81.000000):
accuracy: 0.8189015338941118
f1 score: 0.557490286997681
recall: 0.6606837846162462
precision: 0.6635712277180068


#### Logistic regression

In [42]:
def Logistic(trial):
  c = trial.suggest_int('c', 1, 2001, 100)
  max_iter = trial.suggest_int('max_iter', 1, 1001, 100)
  solver = trial.suggest_categorical('solver', ['sag', 'saga', 'lbfgs', 'liblinear'])

  with mlflow.start_run(run_name = str('Logistic Regression')):
    model = LogisticRegression(C = c, max_iter = max_iter, solver = solver)
    model.fit(x_train, y_train)

    y_model = model.predict(x_val)

    (accuracy, f1score, recallscore, precisionscore) = eval_metrics(y_val, y_model)

    print('Logistic model (c=%f):'%(c))
    print('Logistic model (max_iter=%f):'%(max_iter))
    print('Logistic model (solver=%s):'%(solver))    

    print('accuracy: %s'%(accuracy))
    print('f1 score: %s'%(f1score))
    print('recall: %s'%(recallscore))
    print('precision: %s'%(precisionscore))

    mlflow.log_param("c", c)
    mlflow.log_param("max_iter", max_iter)
    mlflow.log_param("solver", solver)

    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1 score", f1score)
    mlflow.log_metric("recall", recallscore)
    mlflow.log_metric("precision", precisionscore)

    mlflow.end_run()

  return accuracy

In [43]:
studyLR = optuna.create_study(direction="maximize")
studyLR.optimize(Logistic, n_trials=50)

[32m[I 2022-10-04 00:04:14,176][0m A new study created in memory with name: no-name-649f790d-109b-4b19-81ac-5514ddba4cb4[0m
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:15,962][0m Trial 0 finished with value: 0.9648688767936665 and parameters: {'c': 501, 'max_iter': 501, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=501.000000):
Logistic model (max_iter=501.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9648688767936665
f1 score: 0.6448662715916217
recall: 0.6666666666666666
precision: 0.6257151144665246


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:18,107][0m Trial 1 finished with value: 0.9643740722414647 and parameters: {'c': 1801, 'max_iter': 601, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1801.000000):
Logistic model (max_iter=601.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9643740722414647
f1 score: 0.6444209602124288
recall: 0.6660039761431412
precision: 0.6254140384575168


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:19,882][0m Trial 2 finished with value: 0.9648688767936665 and parameters: {'c': 501, 'max_iter': 501, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=501.000000):
Logistic model (max_iter=501.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9648688767936665
f1 score: 0.6448662715916217
recall: 0.6666666666666666
precision: 0.6257151144665246


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:20,165][0m Trial 3 finished with value: 0.9648688767936665 and parameters: {'c': 901, 'max_iter': 901, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=901.000000):
Logistic model (max_iter=901.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
[32m[I 2022-10-04 00:04:20,559][0m Trial 4 finished with value: 0.9648688767936665 and parameters: {'c': 301, 'max_iter': 101, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=301.000000):
Logistic model (max_iter=101.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9648688767936665
f1 score: 0.6537952810021733
recall: 0.6706988118238923
precision: 0.9589772566291903


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
[32m[I 2022-10-04 00:04:20,973][0m Trial 5 finished with value: 0.9643740722414647 and parameters: {'c': 901, 'max_iter': 101, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:21,036][0m Trial 6 finished with value: 0.7159821870361207 and parameters: {'c': 801, 'max_iter': 1, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=901.000000):
Logistic model (max_iter=101.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9643740722414647
f1 score: 0.6533375769951235
recall: 0.6700361213003668
precision: 0.7922391494357416
Logistic model (c=801.000000):
Logistic model (max_iter=1.000000):
Logistic model (solver=liblinear):
accuracy: 0.7159821870361207
f1 score: 0.27816224529027295
recall: 0.3333333333333333
precision: 0.23866072901204025


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:21,297][0m Trial 7 finished with value: 0.9648688767936665 and parameters: {'c': 301, 'max_iter': 301, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=301.000000):
Logistic model (max_iter=301.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:21,653][0m Trial 8 finished with value: 0.9643740722414647 and parameters: {'c': 101, 'max_iter': 101, 'solver': 'sag'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=101.000000):
Logistic model (max_iter=101.000000):
Logistic model (solver=sag):
accuracy: 0.9643740722414647
f1 score: 0.6442466929051284
recall: 0.6660039761431412
precision: 0.6251333809004979


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:24,199][0m Trial 9 finished with value: 0.9643740722414647 and parameters: {'c': 1801, 'max_iter': 801, 'solver': 'sag'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1801.000000):
Logistic model (max_iter=801.000000):
Logistic model (solver=sag):
accuracy: 0.9643740722414647
f1 score: 0.6442466929051284
recall: 0.6660039761431412
precision: 0.6251333809004979


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:26,041][0m Trial 10 finished with value: 0.9643740722414647 and parameters: {'c': 1401, 'max_iter': 501, 'solver': 'saga'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1401.000000):
Logistic model (max_iter=501.000000):
Logistic model (solver=saga):
accuracy: 0.9643740722414647
f1 score: 0.6442466929051284
recall: 0.6660039761431412
precision: 0.6251333809004979


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:27,794][0m Trial 11 finished with value: 0.9643740722414647 and parameters: {'c': 601, 'max_iter': 501, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=601.000000):
Logistic model (max_iter=501.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9643740722414647
f1 score: 0.6444209602124288
recall: 0.6660039761431412
precision: 0.6254140384575168


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:30,230][0m Trial 12 finished with value: 0.9643740722414647 and parameters: {'c': 501, 'max_iter': 701, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=501.000000):
Logistic model (max_iter=701.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9643740722414647
f1 score: 0.6444209602124288
recall: 0.6660039761431412
precision: 0.6254140384575168


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:31,747][0m Trial 13 finished with value: 0.9643740722414647 and parameters: {'c': 1301, 'max_iter': 401, 'solver': 'saga'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1301.000000):
Logistic model (max_iter=401.000000):
Logistic model (solver=saga):
accuracy: 0.9643740722414647
f1 score: 0.6442466929051284
recall: 0.6660039761431412
precision: 0.6251333809004979


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:32,819][0m Trial 14 finished with value: 0.9643740722414647 and parameters: {'c': 1, 'max_iter': 301, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1.000000):
Logistic model (max_iter=301.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9643740722414647
f1 score: 0.6442466929051284
recall: 0.6660039761431412
precision: 0.6251333809004979


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:36,311][0m Trial 15 finished with value: 0.9648688767936665 and parameters: {'c': 601, 'max_iter': 1001, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=601.000000):
Logistic model (max_iter=1001.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9648688767936665
f1 score: 0.6448662715916217
recall: 0.6666666666666666
precision: 0.6257151144665246


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:38,793][0m Trial 16 finished with value: 0.9643740722414647 and parameters: {'c': 1201, 'max_iter': 701, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1201.000000):
Logistic model (max_iter=701.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9643740722414647
f1 score: 0.6444209602124288
recall: 0.6660039761431412
precision: 0.6254140384575168


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:40,001][0m Trial 17 finished with value: 0.9643740722414647 and parameters: {'c': 301, 'max_iter': 301, 'solver': 'saga'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=301.000000):
Logistic model (max_iter=301.000000):
Logistic model (solver=saga):
accuracy: 0.9643740722414647
f1 score: 0.6442466929051284
recall: 0.6660039761431412
precision: 0.6251333809004979


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:43,138][0m Trial 18 finished with value: 0.9643740722414647 and parameters: {'c': 701, 'max_iter': 1001, 'solver': 'sag'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=701.000000):
Logistic model (max_iter=1001.000000):
Logistic model (solver=sag):
accuracy: 0.9643740722414647
f1 score: 0.6442466929051284
recall: 0.6660039761431412
precision: 0.6251333809004979


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:44,904][0m Trial 19 finished with value: 0.9643740722414647 and parameters: {'c': 1101, 'max_iter': 501, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1101.000000):
Logistic model (max_iter=501.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9643740722414647
f1 score: 0.6444209602124288
recall: 0.6660039761431412
precision: 0.6254140384575168


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:47,383][0m Trial 20 finished with value: 0.9648688767936665 and parameters: {'c': 1501, 'max_iter': 701, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1501.000000):
Logistic model (max_iter=701.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:47,666][0m Trial 21 finished with value: 0.9648688767936665 and parameters: {'c': 301, 'max_iter': 301, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=301.000000):
Logistic model (max_iter=301.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:51,206][0m Trial 22 finished with value: 0.9648688767936665 and parameters: {'c': 1501, 'max_iter': 1001, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1501.000000):
Logistic model (max_iter=1001.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:54,009][0m Trial 23 finished with value: 0.9643740722414647 and parameters: {'c': 1001, 'max_iter': 801, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1001.000000):
Logistic model (max_iter=801.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9643740722414647
f1 score: 0.6442466929051284
recall: 0.6660039761431412
precision: 0.6251333809004979


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:54,295][0m Trial 24 finished with value: 0.9648688767936665 and parameters: {'c': 401, 'max_iter': 401, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=401.000000):
Logistic model (max_iter=401.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:54,549][0m Trial 25 finished with value: 0.9648688767936665 and parameters: {'c': 1501, 'max_iter': 201, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1501.000000):
Logistic model (max_iter=201.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:54,808][0m Trial 26 finished with value: 0.9648688767936665 and parameters: {'c': 1901, 'max_iter': 601, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1901.000000):
Logistic model (max_iter=601.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:55,078][0m Trial 27 finished with value: 0.9648688767936665 and parameters: {'c': 1601, 'max_iter': 401, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1601.000000):
Logistic model (max_iter=401.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:55,329][0m Trial 28 finished with value: 0.9648688767936665 and parameters: {'c': 101, 'max_iter': 201, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=101.000000):
Logistic model (max_iter=201.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:55,599][0m Trial 29 finished with value: 0.9648688767936665 and parameters: {'c': 1901, 'max_iter': 601, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1901.000000):
Logistic model (max_iter=601.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:55,866][0m Trial 30 finished with value: 0.9648688767936665 and parameters: {'c': 2001, 'max_iter': 601, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=2001.000000):
Logistic model (max_iter=601.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:56,145][0m Trial 31 finished with value: 0.9648688767936665 and parameters: {'c': 1801, 'max_iter': 401, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1801.000000):
Logistic model (max_iter=401.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:56,421][0m Trial 32 finished with value: 0.9648688767936665 and parameters: {'c': 1701, 'max_iter': 201, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1701.000000):
Logistic model (max_iter=201.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:56,685][0m Trial 33 finished with value: 0.9648688767936665 and parameters: {'c': 2001, 'max_iter': 601, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=2001.000000):
Logistic model (max_iter=601.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:56,944][0m Trial 34 finished with value: 0.9648688767936665 and parameters: {'c': 2001, 'max_iter': 601, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=2001.000000):
Logistic model (max_iter=601.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:57,221][0m Trial 35 finished with value: 0.9648688767936665 and parameters: {'c': 1701, 'max_iter': 201, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1701.000000):
Logistic model (max_iter=201.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:57,498][0m Trial 36 finished with value: 0.9648688767936665 and parameters: {'c': 1701, 'max_iter': 401, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1701.000000):
Logistic model (max_iter=401.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:04:57,756][0m Trial 37 finished with value: 0.9648688767936665 and parameters: {'c': 2001, 'max_iter': 801, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=2001.000000):
Logistic model (max_iter=801.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
[32m[I 2022-10-04 00:04:58,838][0m Trial 38 finished with value: 0.9633844631370608 and parameters: {'c': 401, 'max_iter': 301, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=401.000000):
Logistic model (max_iter=301.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9633844631370608
f1 score: 0.6436436317288091
recall: 0.6646785950960902
precision: 0.625041338979633


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:05:01,415][0m Trial 39 finished with value: 0.9643740722414647 and parameters: {'c': 801, 'max_iter': 801, 'solver': 'sag'}. Best is trial 0 with value: 0.9648688767936665.[0m
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:05:01,464][0m Trial 40 finished with value: 0.9614052449282533 and parameters: {'c': 1401, 'max_iter': 1, 'solver': 'saga'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=801.000000):
Logistic model (max_iter=801.000000):
Logistic model (solver=sag):
accuracy: 0.9643740722414647
f1 score: 0.6442466929051284
recall: 0.6660039761431412
precision: 0.6251333809004979
Logistic model (c=1401.000000):
Logistic model (max_iter=1.000000):
Logistic model (solver=saga):
accuracy: 0.9614052449282533
f1 score: 0.641563280896801
recall: 0.6620278330019881
precision: 0.6233217453605655


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:05:01,742][0m Trial 41 finished with value: 0.9648688767936665 and parameters: {'c': 1601, 'max_iter': 501, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1601.000000):
Logistic model (max_iter=501.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:05:01,997][0m Trial 42 finished with value: 0.9648688767936665 and parameters: {'c': 1501, 'max_iter': 401, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1501.000000):
Logistic model (max_iter=401.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:05:05,140][0m Trial 43 finished with value: 0.9648688767936665 and parameters: {'c': 1201, 'max_iter': 901, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1201.000000):
Logistic model (max_iter=901.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:05:05,543][0m Trial 44 finished with value: 0.9643740722414647 and parameters: {'c': 901, 'max_iter': 101, 'solver': 'sag'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=901.000000):
Logistic model (max_iter=101.000000):
Logistic model (solver=sag):
accuracy: 0.9643740722414647
f1 score: 0.6442466929051284
recall: 0.6660039761431412
precision: 0.6251333809004979


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:05:08,640][0m Trial 45 finished with value: 0.9648688767936665 and parameters: {'c': 1301, 'max_iter': 901, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1301.000000):
Logistic model (max_iter=901.000000):
Logistic model (solver=lbfgs):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:05:08,941][0m Trial 46 finished with value: 0.9648688767936665 and parameters: {'c': 201, 'max_iter': 501, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=201.000000):
Logistic model (max_iter=501.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:05:12,303][0m Trial 47 finished with value: 0.9643740722414647 and parameters: {'c': 501, 'max_iter': 901, 'solver': 'saga'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=501.000000):
Logistic model (max_iter=901.000000):
Logistic model (solver=saga):
accuracy: 0.9643740722414647
f1 score: 0.6442466929051284
recall: 0.6660039761431412
precision: 0.6251333809004979


  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:05:12,563][0m Trial 48 finished with value: 0.9648688767936665 and parameters: {'c': 1901, 'max_iter': 401, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-10-04 00:05:12,730][0m Trial 49 finished with value: 0.9643740722414647 and parameters: {'c': 1, 'max_iter': 201, 'solver': 'liblinear'}. Best is trial 0 with value: 0.9648688767936665.[0m


Logistic model (c=1901.000000):
Logistic model (max_iter=401.000000):
Logistic model (solver=liblinear):
accuracy: 0.9648688767936665
f1 score: 0.644692045806252
recall: 0.6666666666666666
precision: 0.6254355400696864
Logistic model (c=1.000000):
Logistic model (max_iter=201.000000):
Logistic model (solver=liblinear):
accuracy: 0.9643740722414647
f1 score: 0.6442466929051284
recall: 0.6660039761431412
precision: 0.6251333809004979


### Seleção dos hiperparâmetros e escolha do melhor modelo geral

In [53]:
modelGaussian = GaussianNB(var_smoothing = 9.589617519348033e-09)
modelGaussian.fit(x_train, y_train)
y_modelGaussian = modelGaussian.predict(x_test)

(accuracyNaive, f1scoreNaive, recallNaive, precisionNaive) = eval_metrics(y_test, y_modelGaussian)

print(accuracyNaive)
print(f1scoreNaive)
print(recallNaive)
print(precisionNaive)

0.7742574257425743
0.4615657820335682
0.6813021310453916
0.6639003648136068


In [57]:
modelMLP = MLPClassifier(learning_rate = 'invscaling', max_iter = 300)
modelMLP.fit(x_train, y_train)
y_modelMLP = modelMLP.predict(x_test)

(accuracyMLP, f1scoreMLP, recallMLP, precisionMLP) = eval_metrics(y_test, y_modelMLP)

print(accuracyMLP)
print(f1scoreMLP)
print(recallMLP)
print(precisionMLP)

0.9663366336633663
0.6546465363532699
0.6699841735785254
0.7935369898335777


In [58]:
modelSVM = SVC(C = 41, max_iter = 71)
modelSVM.fit(x_train, y_train)
y_modelSVM = modelSVM.predict(x_test)

(accuracySVM, f1scoreSVM, recallSVM, precisionSVM) = eval_metrics(y_test, y_modelSVM)

print(accuracySVM)
print(f1scoreSVM)
print(recallSVM)
print(precisionSVM)

0.9153465346534654
0.6596994466490304
0.6709624849547828
0.6685199237997389




In [59]:
modelLR = LogisticRegression(C = 501, max_iter = 501, solver = 'lbfgs')
modelLR.fit(x_train, y_train)
y_modelLR = modelLR.predict(x_test)

(accuracyLR, f1scoreLR, recallLR, precisionLR) = eval_metrics(y_test, y_modelLR)

print(accuracyLR)
print(f1scoreLR)
print(recallLR)
print(precisionLR)

0.9663366336633663
0.6447374847374848
0.665533475225388
0.6264147736280475


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))


### Diagnóstico e Aprimoramento

Como podemos notar com base nos testes acima, o melhor modelo foi o MLP

In [48]:
from sklearn.neural_network import MLPClassifier
model = MLPClassifier(learning_rate = 'constant', max_iter = 300)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)
y_pred_train = model.predict(x_train)

In [49]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

print(10*'-','Train',10*'-')

accuracy_treino =accuracy_score(y_train, y_pred_train)
precision_treino =precision_score(y_train, y_pred_train, average='macro')
recall_treino =recall_score(y_train, y_pred_train, average='macro')
f1_treino = f1_score(y_train, y_pred_train,average='macro')

print("Accuracy", accuracy_treino)
print("Precision", precision_treino)
print("Recall ", recall_treino)
print("F1 ", f1_treino,'\n')

print(10*'-','Test',10*'-')

accuracy_teste =accuracy_score(y_test, y_pred)
precision_teste =precision_score(y_test, y_pred, average='macro')
recall_teste =recall_score(y_test, y_pred, average='macro')
f1_teste = f1_score(y_test, y_pred,average='macro')

print("Accuracy", accuracy_teste)
print("Precision", precision_teste)
print("Recall ", recall_teste)
print("F1 ", f1_teste,'\n')

print(40*'-')
print('Vies Accuracy:',(1-accuracy_treino)*100-5)
print('Variancia Accuracy:',(1-accuracy_teste)*100-(1-accuracy_treino)*100)
print(40*'-')
print('Vies Precision:',(1-precision_treino)*100-5)
print('Variancia Precision:',(1-precision_teste)*100-(1-precision_treino)*100)

---------- Train ----------
Accuracy 0.9686520376175548
Precision 0.8208910494980405
Recall  0.6724430757745429
F1  0.6606053367381944 

---------- Test ----------
Accuracy 0.9663366336633663
Precision 0.7935369898335777
Recall  0.6699841735785254
F1  0.6546465363532699 

----------------------------------------
Vies Accuracy: -1.8652037617554806
Variancia Accuracy: 0.23154039541884996
----------------------------------------
Vies Precision: 12.91089505019595
Variancia Precision: 2.735405966446283


In [50]:
from sklearn.neural_network import MLPClassifier
# model = MLPClassifier(learning_rate = 'constant', max_iter = 300)
model = MLPClassifier()

model.fit(x_train, y_train)

y_pred = model.predict(x_test)
y_pred_train = model.predict(x_train)

In [51]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

print(10*'-','Train',10*'-')

accuracy_treino =accuracy_score(y_train, y_pred_train)
precision_treino =precision_score(y_train, y_pred_train, average='macro')
recall_treino =recall_score(y_train, y_pred_train, average='macro')
f1_treino = f1_score(y_train, y_pred_train,average='macro')

print("Accuracy", accuracy_treino)
print("Precision", precision_treino)
print("Recall ", recall_treino)
print("F1 ", f1_treino,'\n')

print(10*'-','Test',10*'-')

accuracy_teste =accuracy_score(y_test, y_pred)
precision_teste =precision_score(y_test, y_pred, average='macro')
recall_teste =recall_score(y_test, y_pred, average='macro')
f1_teste = f1_score(y_test, y_pred,average='macro')

print("Accuracy", accuracy_teste)
print("Precision", precision_teste)
print("Recall ", recall_teste)
print("F1 ", f1_teste,'\n')

print(40*'-')
print('Vies Accuracy:',(1-accuracy_treino)*100-5)
print('Variancia Accuracy:',(1-accuracy_teste)*100-(1-accuracy_treino)*100)
print(40*'-')
print('Vies Precision:',(1-precision_treino)*100-5)
print('Variancia Precision:',(1-precision_teste)*100-(1-precision_treino)*100)

---------- Train ----------
Accuracy 0.968982016168949
Precision 0.897123839739938
Recall  0.6728816722657709
F1  0.6609663198726102 

---------- Test ----------
Accuracy 0.9663366336633663
Precision 0.6264147736280475
Recall  0.665533475225388
F1  0.6447374847374848 

----------------------------------------
Vies Accuracy: -1.8982016168948967
Variancia Accuracy: 0.2645382505582661
----------------------------------------
Vies Precision: 5.287616026006196
Variancia Precision: 27.070906611189045


  _warn_prf(average, modifier, msg_start, len(result))


### BÔNUS

In [52]:
import autosklearn.classification
cls = autosklearn.classification.AutoSklearnClassifier()
cls.fit(x_train, y_train)
predictions = cls.predict(x_test)

accuracy, f1score, recall, precision = eval_metrics(y_test, predictions)

print(accuracy)
print(f1score)
print(recall)
print(precision)

0.9658415841584158
0.6444406323243838
0.6653056328922825
0.6261180679785331


In [62]:
print(cls.sprint_statistics())

auto-sklearn results:
  Dataset name: 41dc3fc0-4378-11ed-8130-0242ac1c0002
  Metric: accuracy
  Best validation score: 0.969515
  Number of target algorithm runs: 271
  Number of successful target algorithm runs: 265
  Number of crashed target algorithm runs: 3
  Number of target algorithms that exceeded the time limit: 3
  Number of target algorithms that exceeded the memory limit: 0



In [60]:
cls.show_models()

{9: {'model_id': 9,
  'rank': 1,
  'cost': 0.031984007996001984,
  'ensemble_weight': 0.22,
  'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice at 0x7f4cbee61d10>,
  'balancing': Balancing(random_state=1),
  'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice at 0x7f4cbee65750>,
  'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice at 0x7f4cbed1df50>,
  'sklearn_classifier': AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                     learning_rate=0.03743735372990651, n_estimators=475,
                     random_state=1)},
 87: {'model_id': 87,
  'rank': 2,
  'cost': 0.031984007996001984,
  'ensemble_weight': 0.02,
  'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice at 0x7f4cbedc4890>,
  'balancing': Balancing(random_state=1, strategy='weighting'),
  'feature_preprocessor': <autosklea