#Ensemble: RandomForest & Boosting

Avalia a performance da classificação da base de dados sonar com os métodos de combinação de classificadores.

Este notebook foi desenvolvido para o ambiente GOOGLE COLAB ([colab.research.google.com](https://colab.research.google.com)).

Prof. Hugo de Paula

-------------------------------------------------------------------------------

### Base de dados: Sonar, Mines vs. Rocks

https://archive.ics.uci.edu/ml/datasets/Connectionist+Bench+%28Sonar,+Mines+vs.+Rocks%29

208 instâncias

60 atributos

2 classes (rocha, mina)


In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import LabelEncoder

### Carga de dados

90% da base para treinamento (187 registros)

10% da base para teste (21 regisros)

In [None]:
from google.colab import drive
drive.mount('/content/drive')
sonar = pd.read_excel('/content/drive/My Drive/PUC/ML/Datasets/sonar.xlsx', sheet_name=0)



In [None]:
X = sonar.iloc[:,0:(sonar.shape[1] - 1)]

le = LabelEncoder()
y = le.fit_transform(sonar.iloc[:,(sonar.shape[1] - 1)])

class_names = le.classes_
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.1)


### Árvore de decisão tradicional

In [None]:

clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print("Classificador Árvore de Decisão:\n")
print("--------------------------------------")
print("Acurácia da base de treinamento: {:.2f}".format(clf.score(X_train, y_train)))
print("--------------------------------------")

print(classification_report(y_test, y_pred, target_names=class_names))

cnf_matrix = confusion_matrix(y_test, y_pred)
cnf_table = pd.DataFrame(data=cnf_matrix, index=class_names, columns=[x + "(prev)" for x in class_names])
print(cnf_table)


### Random Forest e Extra Trees

Número de estimadores: 10

In [None]:
# Random forest com 10 arvores

clr = RandomForestClassifier(n_estimators=10)
clr = clr.fit(X_train, y_train)
y_pred = clr.predict(X_test)


print("Classificador Random Forest:\n RandomForestClassifier(n_estimators=10)\n")
print("--------------------------------------")
print("Acurácia da base de treinamento: {:.2f}".format(clr.score(X_train, y_train)))
print("--------------------------------------")

print(classification_report(y_test, y_pred, target_names=class_names))

cnf_matrix = confusion_matrix(y_test, y_pred)
cnf_table = pd.DataFrame(data=cnf_matrix, index=class_names, columns=[x + "(prev)" for x in class_names])
print(cnf_table)


In [None]:
# Random forest com heurísticas extremas

cle = ExtraTreesClassifier(n_estimators=10)
cle = cle.fit(X_train, y_train)
y_pred = cle.predict(X_test)

print("Classificador Extreme Tree:\n ExtraTreesClassifier(n_estimators=10)\n")
print("--------------------------------------")
print("Acurácia da base de treinamento: {:.2f}".format(cle.score(X_train, y_train)))
print("--------------------------------------")

print(classification_report(y_test, y_pred, target_names=class_names))

cnf_matrix = confusion_matrix(y_test, y_pred)
cnf_table = pd.DataFrame(data=cnf_matrix, index=class_names, columns=[x + "(prev)" for x in class_names])
print(cnf_table)


### AdaBoost

Estimador: DecisionTreeClassifier

Número de estimadores: 10.

Perceba o overfitting na base de treinamento.


In [None]:
# Adaboost com árvores mínimas
ada = AdaBoostRegression(tree.DecisionTreeRegression(max_depth=2), 
                         algorithm="SAMME", n_estimators=10)
ada.fit(X_train, y_train)
y_pred = ada.predict(X_test)
print("Classificador AdaBoost:\n AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=1), algorithm=\"SAMME\", n_estimators=20)\n")
print("--------------------------------------")
print("Acurácia da base de treinamento: {:.2f}".format(ada.score(X_train, y_train)))
print("--------------------------------------")

.AdaBoostRegressor(base_estimator=None, *, n_estimators=50, learning_rate=1.0, loss='linear', random_state=None)[source]

SyntaxError: ignored