In [71]:
# importando pacotes
from ucimlrepo import fetch_ucirepo 
import pandas
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score
import numpy

In [72]:
# CARREGANDO DADOS

# importando dataset
dataset = fetch_ucirepo(id=763) # 338 registros e 4 atributos
  
# coletando as informações
data_frame = dataset.data.original

In [None]:
# TRATANDO DADOS

print("PRÉ TRATAMENTO: ", len(data_frame))

# removendo colunas com muitos nulos
tolerancia = len(data_frame) * 0.7
data_frame = data_frame.dropna(axis=1, thresh=tolerancia)

# removendo duplicados
print("VALORES DUPLICADOS: ", data_frame.duplicated().sum())
data_frame = data_frame.drop_duplicates()

# convertendo colunas categóricas em valores inteiros
conversores = {}
for coluna in data_frame.columns:
    if (data_frame[coluna].dtype == type(object)):
        conversor = LabelEncoder()
        data_frame[coluna] = conversor.fit_transform(data_frame[coluna])
        conversores[coluna] = conversor

print("PÓS TRATAMENTO: ", len(data_frame))

PRÉ TRATAMENTO:  338
VALORES DUPLICADOS:  0
PÓS TRATAMENTO:  338


In [74]:
print(data_frame.columns)

Index(['V', 'H', 'S', 'M'], dtype='object')


In [75]:
# DIVIDINDO O DATASET TRATADO

atributos = data_frame.drop(["M"], axis=1)
respostas = data_frame[["M"]]

a_treino, a_teste, r_treino, r_teste = train_test_split(atributos, respostas, test_size=0.3, random_state=42)

# convertendo de (N, 1) para (N,)
r_treino = numpy.ravel(r_treino)
r_teste = numpy.ravel(r_teste)


In [76]:
print("TAMANHO DO DATASET TRATADO: ", len(data_frame))
print("CONJUNTO DE TREINO: ", a_treino.shape, " e ", r_treino.shape)
print("CONJUNTO DE TESTE: ", a_teste.shape, " e ", r_teste.shape)

TAMANHO DO DATASET TRATADO:  338
CONJUNTO DE TREINO:  (236, 3)  e  (236,)
CONJUNTO DE TESTE:  (102, 3)  e  (102,)


In [77]:
# CRIANDO PARÂMETROS PARA OS MODELOS

lista_parametros = [
    {"id": "1", "hidden_layer_sizes": (100, 50), "activation": "relu", "solver": "adam", "max_iter": 800},
    {"id": "2", "hidden_layer_sizes": (100, 50), "activation": "relu", "solver": "adam", "max_iter": 900},
    {"id": "3", "hidden_layer_sizes": (100, 50), "activation": "relu", "solver": "adam", "max_iter": 1000},
    {"id": "4", "hidden_layer_sizes": (100, 50), "activation": "relu", "solver": "adam", "max_iter": 1100},
]

In [78]:
# APLICANDO MODELO

# criando a lista de resultados finais
lista_resultados = []

for parametros in lista_parametros:
    # criando classificador
    classificador = MLPClassifier(
        hidden_layer_sizes=parametros["hidden_layer_sizes"],
        activation=parametros["activation"],
        solver=parametros["solver"],
        max_iter=parametros["max_iter"],
        random_state=42
    )
    # treinando o modelo
    classificador.fit(a_treino, r_treino)

    # prevendo respostas
    r_previsao = classificador.predict(a_teste)

    # calculando métricas
    acuracia = accuracy_score(r_teste, r_previsao)
    f1 = f1_score(r_teste, r_previsao, average="weighted")

    # salvando resultados
    lista_resultados.append([parametros["id"], acuracia, f1])

ValueError: Unknown label type: (array([0.  , 0.5 , 1.  , 0.25, 0.75, 0.  , 0.  , 0.75, 0.  , 0.25, 0.  ,
       0.  , 0.5 , 0.75, 0.25, 0.75, 0.  , 1.  , 0.25, 0.25, 0.75, 0.  ,
       0.75, 0.5 , 0.75, 1.  , 0.25, 1.  , 0.75, 0.25, 0.25, 0.75, 0.5 ,
       0.5 , 0.  , 0.25, 0.  , 0.  , 0.25, 0.25, 0.5 , 0.75, 0.5 , 1.  ,
       0.25, 0.5 , 0.25, 0.5 , 0.75, 1.  , 0.  , 0.  , 0.  , 1.  , 0.5 ,
       0.  , 0.75, 0.5 , 0.  , 0.5 , 0.5 , 1.  , 0.5 , 0.75, 1.  , 0.75,
       0.75, 0.5 , 1.  , 0.75, 1.  , 0.  , 0.5 , 0.75, 0.  , 0.5 , 0.25,
       1.  , 0.5 , 0.25, 0.  , 0.25, 0.5 , 0.5 , 0.25, 1.  , 0.5 , 1.  ,
       0.  , 0.  , 0.75, 0.25, 0.  , 0.25, 1.  , 0.25, 0.75, 0.  , 0.  ,
       0.  , 0.75, 0.75, 0.  , 0.5 , 1.  , 0.25, 0.5 , 0.5 , 0.75, 1.  ,
       0.5 , 0.  , 0.25, 0.75, 1.  , 0.5 , 0.  , 1.  , 0.75, 0.5 , 0.75,
       0.  , 0.75, 0.5 , 0.75, 0.5 , 0.  , 1.  , 0.5 , 1.  , 1.  , 0.5 ,
       0.75, 0.5 , 0.  , 1.  , 0.25, 0.75, 0.25, 0.25, 0.5 , 0.  , 1.  ,
       1.  , 0.75, 0.  , 0.25, 0.25, 0.  , 0.  , 0.  , 0.5 , 0.  , 0.25,
       0.75, 0.5 , 0.75, 0.25, 0.5 , 0.5 , 0.  , 0.25, 0.  , 0.75, 0.  ,
       0.75, 0.5 , 1.  , 0.75, 0.  , 1.  , 1.  , 1.  , 0.25, 1.  , 0.75,
       0.25, 1.  , 0.75, 0.  , 1.  , 1.  , 0.25, 0.5 , 0.25, 0.  , 0.25,
       0.25, 1.  , 0.  , 0.25, 0.25, 0.25, 0.25, 0.  , 0.  , 0.25, 0.5 ,
       0.75, 1.  , 0.5 , 1.  , 0.  , 0.25, 0.25, 0.75, 1.  , 1.  , 1.  ,
       0.75, 0.25, 0.25, 0.  , 0.25, 0.  , 0.75, 0.75, 0.5 , 1.  , 0.5 ,
       0.25, 0.75, 0.75, 0.5 , 0.75, 0.5 , 0.25, 1.  , 0.5 , 1.  , 0.  ,
       1.  , 0.25, 0.5 , 0.25, 0.5 ]),)

In [62]:
# mostrando resultados
tabela_resultados = pandas.DataFrame(lista_resultados, columns=["Identificador", "Acurácia", "F1-score"])
print(tabela_resultados)

  Identificador  Acurácia  F1-score
0             1  0.607843  0.583351
1             2  0.607843  0.583351
2             3  0.607843  0.583351
3             4  0.607843  0.583351
