#### **Import Libs**

In [19]:
import pandas as pd
import numpy as np
import optuna
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#### **Getting training && validation data**

In [20]:
table = pd.read_csv('./databases/dermatology.csv')

data = np.array(table)
# Remove a primeira coluna dos dados, que não contém informações relevantes para o modelo.
data = data[:, 1:]

labels = []
for line in range(data.shape[0]):
  if(labels.count(data[line, data.shape[1]-1])==0):
    labels.append(data[line, data.shape[1]-1])

scaler = StandardScaler()
# Extrai as classes dos dados e converte-as em variáveis dummy, usando a função pd.get_dummies do Pandas.
y = np.array(pd.get_dummies(data[:, data.shape[1]-1])).astype(np.float32)
# O resultado é um array binário para cada classe, indicando se uma amostra pertence ou não a essa classe.

# Escala os dados usando StandardScaler (garantir que todas as variáveis tenham a mesma ordem de grandeza)
X = (data[:, :(data.shape[1]-1)]).astype(np.float32)

scaler.fit(X) 
# Divide os dados em três subconjuntos de treinamento, validação e teste, usando a função train_test_split do Scikit-Learn. A divisão é feita na proporção de 60% para treinamento, 20% para validação e 20% para teste.
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25)

Conjuntos de treinamento e teste separados!


#### **Parameters selection**

In [22]:
def objective(trial):
    model = MLPClassifier(
        hidden_layer_sizes=(trial.suggest_int('hidden_layer_sizes', 10, 100),)*trial.suggest_int('n_layers', 1, 5),
        activation=trial.suggest_categorical('activation', ['logistic', 'tanh', 'relu']),
        alpha=trial.suggest_loguniform('alpha', 1e-5, 1e-1),
        solver=trial.suggest_categorical('solver', ['lbfgs', 'sgd', 'adam']),
        learning_rate=trial.suggest_categorical('learning_rate', ['constant', 'invscaling', 'adaptive']),
        max_iter=trial.suggest_int('max_iter', 100, 1000)
    )

    model.fit(X_train, y_train)

    y_pred = model.predict(X_val)

    accuracy = accuracy_score(y_val, y_pred)

    return 1 - accuracy


In [23]:
n_trials = 150

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=n_trials)

[32m[I 2023-04-18 23:18:20,795][0m A new study created in memory with name: no-name-16258938-1fbc-4459-a22e-83720212139f[0m
  alpha=trial.suggest_loguniform('alpha', 1e-5, 1e-1),
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
[32m[I 2023-04-18 23:18:21,506][0m Trial 0 finished with value: 0.013698630136986356 and parameters: {'hidden_layer_sizes': 14, 'n_layers': 5, 'activation': 'tanh', 'alpha': 0.024408255128290483, 'solver': 'lbfgs', 'learning_rate': 'adaptive', 'max_iter': 722}. Best is trial 0 with value: 0.013698630136986356.[0m
  alpha=trial.suggest_loguniform('alpha', 1e-5, 1e-1),
[32m[I 2023-04-18 23:18:23,100][0m Trial 1 finished with value: 0.04109589041095896 and parameters: {'hidden_layer_sizes': 77, 'n_layers': 4, 'activation': 'tanh', 'alpha': 4.6616588513

#### **Saving study**

In [24]:
save_path = './optuna_studies/mlp_study.pkl'

joblib.dump(study, save_path)

['./optuna_studies/mlp_study.pkl']