In [19]:
import pandas as pd 
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import *
import matplotlib.pyplot as plt

import mlflow 
import mlflow.sklearn


In [3]:
credit = pd.read_csv('../docs/Credit.csv')

In [5]:
for col in credit.columns:
    if credit[col].dtype == 'object':
        credit[col] = credit[col].astype('category').cat.codes

In [6]:
credit.head()

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,2,6,1,7,1169,0,3,4,3,2,...,2,67,1,1,2,3,1,1,1,1
1,1,48,3,7,5951,3,0,2,0,2,...,2,22,1,1,1,3,1,0,1,0
2,0,12,1,4,2096,3,1,2,3,2,...,2,49,1,1,1,2,2,0,1,1
3,2,42,3,5,7882,3,1,2,3,1,...,0,45,1,0,1,3,2,0,1,1
4,2,24,2,1,4870,3,0,3,3,2,...,1,53,1,0,2,3,2,0,1,0


In [7]:
previsores = credit.iloc[:, 0:20].values
classe = credit.iloc[:, 20].values

In [8]:
x_train, x_test, y_train, y_test = train_test_split(previsores, classe, test_size=0.3, random_state=123)

In [11]:
modelorf = RandomForestClassifier(n_estimators=10)
modelorf.fit(x_train, y_train)
previsoes = modelorf.predict(x_test)


In [22]:
def treina_rf(n_estimators):
  mlflow.set_experiment('rfexperiment')

  with mlflow.start_run():

    
    modelorf = RandomForestClassifier(n_estimators=n_estimators)
    modelorf.fit(x_train, y_train)
    prev = modelorf.predict(x_test)

    mlflow.log_param('n_estimators', n_estimators)

    # Mértricas
    acuracia = accuracy_score(y_test, prev)
    recall = recall_score(y_test, prev)
    precision = precision_score(y_test, prev)
    f1 = f1_score(y_test, prev)
    auc = roc_auc_score(y_test, prev)
    # log_loss = log_loss(y_teste, prev)  
      
      # Registrar as métricas
      
    mlflow.log_metric('acuracia', acuracia)
    mlflow.log_metric('recall', recall)
    mlflow.log_metric('precision', precision)
    mlflow.log_metric('f1', f1)
    mlflow.log_metric('auc', auc)
    # mlflow.log_metric('log_loss', log_loss)

    # Gráficos

    confusion = confusion_matrix(y_test, prev)
    class_names = np.unique(y_test)
    disp = ConfusionMatrixDisplay(confusion_matrix=confusion, display_labels=class_names)
    disp.plot(cmap=plt.cm.Blues, values_format=".0f")

    plt.savefig('confusion.png')
    plt.close()


    mlflow.log_artifact('confusion.png')
    #mlflow.log_artifact('roc.png')

    # Modelo

    mlflow.sklearn.log_model(modelorf, 'modelorf')

    # informações da execução 

    print("Modelo: ", mlflow.active_run().info.run_uuid)


  mlflow.end_run()

In [23]:
tree = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

for t in tree:
  treina_rf(t)
  

Modelo:  ee2e7b2e1f6f4fa68215aac2d744bb06
Modelo:  7e820909b0204226b203dbfeb3f8af88
Modelo:  f98a3b86bd3c4a53add9efb64b3878ce
Modelo:  8a1b986b2d3747ddb79815a8123b9cd2
Modelo:  d7c337793fdb4760ab0fa940d249dcff
Modelo:  5a67d9b5f6e042b29938507b27a22ef7
Modelo:  155e8f884be04175bab3288799cdbda7
Modelo:  250e96e48eb040f5b7e27de160c3ff0b
Modelo:  0547e432e5be4ba6a30e5f3672e82a78
Modelo:  e93cd00f153c4cfcb2b9510c58253a26
