#### **Import Libs**

In [4]:
import pandas as pd
import numpy as np

from keras import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.optimizers import SGD, Adam

import optuna

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#### **Getting training && validation data**

In [5]:
table = pd.read_csv('./databases/dermatology.csv')

data = np.array(table)

data = data[:, 1:]

labels = []
for line in range(data.shape[0]):
  if(labels.count(data[line, data.shape[1]-1])==0):
    labels.append(data[line, data.shape[1]-1])

scaler = StandardScaler()

y = np.array(pd.get_dummies(data[:, data.shape[1]-1])).astype(np.float32)
X = (data[:, :(data.shape[1]-1)]).astype(np.float32)

scaler.fit(X) 

X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25)
print('Conjuntos de treinamento e teste separados!')

Conjuntos de treinamento e teste separados!


#### **Getting training && validation data**

In [6]:
table = pd.read_csv('databases/dermatology.csv')

data = np.array(table)

data = data[:, 1:]

labels = []
for line in range(data.shape[0]):
  if(labels.count(data[line, data.shape[1]-1])==0):
    labels.append(data[line, data.shape[1]-1])

scaler = StandardScaler()

y = np.array(pd.get_dummies(data[:, data.shape[1]-1])).astype(np.float32)
x = (data[:, :(data.shape[1]-1)]).astype(np.float32)

scaler.fit(x) 

train_x, test_x, train_y, test_y = train_test_split(x,y, test_size=0.25) 

print('Conjuntos de treinamento e teste separados!')

Conjuntos de treinamento e teste separados!


#### **Parameters selection**

In [7]:
def objective(trial):
    model = RandomForestClassifier(
        n_estimators=trial.suggest_int('n_estimators', 10, 100),
        max_depth=trial.suggest_int('max_depth', 5, 50),
        max_features=trial.suggest_categorical('max_features', ['sqrt', 'log2']),
        criterion=trial.suggest_categorical('criterion', ['gini', 'entropy']),
        min_samples_split=trial.suggest_int('min_samples_split', 2, 10),
        min_samples_leaf=trial.suggest_int('min_samples_leaf', 1, 10)
    )

    model.fit(X_train, y_train)

    y_pred = model.predict(X_val)

    accuracy = accuracy_score(y_val, y_pred)

    return 1 - accuracy

In [8]:
n_trials = 150

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=n_trials)

[32m[I 2023-04-19 00:17:43,343][0m A new study created in memory with name: no-name-e9948058-a934-4365-ab8a-1e65196352f9[0m
[32m[I 2023-04-19 00:17:43,458][0m Trial 0 finished with value: 0.26027397260273977 and parameters: {'n_estimators': 60, 'max_depth': 29, 'max_features': 'sqrt', 'criterion': 'entropy', 'min_samples_split': 10, 'min_samples_leaf': 10}. Best is trial 0 with value: 0.26027397260273977.[0m
[32m[I 2023-04-19 00:17:43,560][0m Trial 1 finished with value: 0.15068493150684936 and parameters: {'n_estimators': 71, 'max_depth': 30, 'max_features': 'sqrt', 'criterion': 'gini', 'min_samples_split': 5, 'min_samples_leaf': 6}. Best is trial 1 with value: 0.15068493150684936.[0m
[32m[I 2023-04-19 00:17:43,701][0m Trial 2 finished with value: 0.2191780821917808 and parameters: {'n_estimators': 87, 'max_depth': 48, 'max_features': 'log2', 'criterion': 'gini', 'min_samples_split': 9, 'min_samples_leaf': 10}. Best is trial 1 with value: 0.15068493150684936.[0m
[32m[I 20

#### **Saving study**

In [9]:
save_path = './optuna_studies/randomF_study.pkl'

joblib.dump(study, save_path)

['./optuna_studies/randomF_study.pkl']