# Avaliação do modelo

In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Testes para modelos
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

In [None]:
# Leitura das bases
df_train = pd.read_csv('/content/drive/MyDrive/IA/Machine Learning/_focos_treino.csv')
df_test = pd.read_csv('/content/drive/MyDrive/IA/Machine Learning/_focos_teste.csv')

# Objetivo há se atingir
y_train = df_train['alvo']
y_test = df_test['alvo']

# Conjunto de treinamento
X_train = df_train.drop('alvo', axis=1)
X_test = df_test.drop('alvo', axis=1)

In [None]:
X_train

Unnamed: 0,ano,mes,dia,hora,minuto,diasemchuva_norm,precipitacao_norm,frp_norm,num_bioma,num_estado,latitude,longitude
0,2017,8,19,17,47,0.0,1,19.00,0,10,-9.340,-60.494
1,2016,9,8,17,54,5.0,0,26.95,0,3,-9.099,-67.465
2,2014,9,4,16,59,9.0,0,41.10,0,26,-6.533,-48.029
3,2019,10,31,16,40,9.0,0,73.05,1,17,-6.226,-41.162
4,2020,9,10,16,20,0.0,0,20.50,0,9,-2.844,-44.070
...,...,...,...,...,...,...,...,...,...,...,...,...
1420030,2017,10,7,16,52,0.0,1,31.30,0,13,-8.511,-49.567
1420031,2019,8,15,17,5,14.0,0,73.05,2,10,-15.394,-53.887
1420032,2017,9,28,16,58,0.0,0,32.80,2,9,-7.480,-47.055
1420033,2020,1,14,16,20,8.0,0,73.05,3,25,-10.730,-37.091


In [None]:
X_test

Unnamed: 0,ano,mes,dia,hora,minuto,diasemchuva_norm,precipitacao_norm,frp_norm,num_bioma,num_estado,latitude,longitude
0,2020,9,10,18,0,5.0,1,11.9,0,3,-7.60400,-64.56300
1,2022,6,16,17,20,2.0,0,14.5,3,15,-26.59944,-51.33309
2,2018,6,5,15,55,27.0,0,8.2,3,1,-9.52200,-36.01100
3,2021,9,29,18,0,3.0,1,12.7,0,0,-8.53786,-70.61274
4,2015,9,11,16,33,10.0,0,41.1,2,4,-11.37200,-43.60600
...,...,...,...,...,...,...,...,...,...,...,...,...
608582,2022,9,5,17,49,7.0,0,12.6,0,21,-8.75692,-62.84147
608583,2019,9,7,17,15,10.0,0,18.8,2,10,-15.54500,-53.08300
608584,2015,12,15,17,28,0.0,1,20.9,0,10,-12.08700,-57.44900
608585,2020,5,23,16,10,5.0,0,21.3,2,9,-5.89500,-43.48000


## Modelo

In [None]:
'''
  Este algoritmo seleciona árvores de decisão
  aleatórias em várias subamostras e usa a média para melhorar
  a precisão preditiva e controlar o sobreajuste (overfitting)
'''

# Floresta Aleatória
from sklearn.ensemble import RandomForestClassifier

# Treinamento para tomada de decisões
model = RandomForestClassifier(
    criterion = 'entropy',
    max_depth = X_train['num_estado'].max() * 2,
    min_samples_leaf = X_train['num_bioma'].max() + 1,
    min_samples_split = 2,
).fit(X_train, y_train)

# Criar predição na base de testes
model_predict = model.predict(X_test)

# Verificar acurácia
model_acc = accuracy_score(model_predict, y_test)
print("Modelo Acurácia:", model_acc)

Modelo Acurácia: 0.9480583712764157


In [None]:
#@title Precisão e Revocação
from  sklearn.metrics import precision_recall_fscore_support

pr = precision_recall_fscore_support(y_test, model_predict)

print('Precisão: ', pr[0])
print('Revocação: ', pr[1])

Precisão:  [0.94024424 0.95615488]
Revocação:  [0.95693296 0.93918382]


In [None]:
#@title Matriz de Confusão

from sklearn.metrics import confusion_matrix

mc = confusion_matrix(y_test, model_predict)

df_mc = pd.DataFrame(
    mc,
    columns=['Predito 0', 'Predito 1'],
    index=['Real 0', 'Real 1']
    )
df_mc

Unnamed: 0,Predito 0,Predito 1
Real 0,291188,13105
Real 1,18506,285788


In [None]:
#@title Acurácia
from  sklearn.metrics import accuracy_score

acc = accuracy_score(y_test, model_predict)
acc

0.9480583712764157

In [None]:
#@title Sensibilidade e Especificidades
from  sklearn.metrics import recall_score

# Eensibilidade = revocação para o alvo 1
s = recall_score(y_test, model_predict, pos_label=1)

# Especificidade = revocação para o alvo 0
e = recall_score(y_test, model_predict, pos_label=0)

print("Sensibilidade", s)
print("Especificidade", e)

Sensibilidade 0.9391838156519682
Especificidade 0.9569329560653712


In [None]:
#@title Resumo dos Testes
from  sklearn.metrics import classification_report

cr = classification_report(y_test, model_predict)

print(cr)

              precision    recall  f1-score   support

           0       0.94      0.96      0.95    304293
           1       0.96      0.94      0.95    304294

    accuracy                           0.95    608587
   macro avg       0.95      0.95      0.95    608587
weighted avg       0.95      0.95      0.95    608587



In [None]:
#@title Cross-validation
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

# Aplicar 10-fold cross validantios no modelo, cv recebe a quantidade de folds
y_predict = cross_val_predict(model, X_test, y_test, cv=10)

report = classification_report(y_test, y_predict)

print(report)

              precision    recall  f1-score   support

           0       0.93      0.94      0.94    304293
           1       0.94      0.93      0.93    304294

    accuracy                           0.93    608587
   macro avg       0.94      0.93      0.93    608587
weighted avg       0.94      0.93      0.93    608587

