# Especialização em Inteligência Artificial

**Aprendizado de Máquina - Aula 4.4: Algoritmos de comitê**

Código de exemplo desenvolvido pelo docente [Adriano Rivolli](mailto:rivolli@utpfr.edu.br)

*O código apresenta como usar os algoritmos de comitês*

In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score

from sklearn.datasets import load_digits
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

import matplotlib.pyplot as plt

#Comparação dos comitês

In [None]:
# Carregando o dataset e separando o conjunto de treinamento/teste
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target

# Initialize classifiers
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
et_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
ada_clf = AdaBoostClassifier(n_estimators=100, random_state=42)

# Perform cross-validation to compare performance
rf_scores = cross_val_score(rf_clf, X, y, cv=5)
et_scores = cross_val_score(et_clf, X, y, cv=5)
ada_scores = cross_val_score(ada_clf, X, y, cv=5)

# Print average accuracy scores
print("Random Forest Average Accuracy:", rf_scores.mean())
print("Extra Trees Average Accuracy:", et_scores.mean())
print("AdaBoost Average Accuracy:", ada_scores.mean())

In [None]:
# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Initialize classifiers
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
et_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
ada_clf = AdaBoostClassifier(n_estimators=100, random_state=42)

# Perform cross-validation to compare performance
rf_scores = cross_val_score(rf_clf, X, y, cv=5)
et_scores = cross_val_score(et_clf, X, y, cv=5)
ada_scores = cross_val_score(ada_clf, X, y, cv=5)

# Print average accuracy scores
print("Random Forest Average Accuracy:", rf_scores.mean())
print("Extra Trees Average Accuracy:", et_scores.mean())
print("AdaBoost Average Accuracy:", ada_scores.mean())

## Random Forest e importância dos atributos

In [None]:
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target
feature_names = cancer.feature_names

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=2)
rf_classifier.fit(X, y)

feature_importances = rf_classifier.feature_importances_
feature_importances

In [None]:
rk_features = {}
for feature, importance in zip(feature_names, feature_importances):
    rk_features[feature] = importance

for feature in sorted(rk_features, key=rk_features.get, reverse=True):
    print(feature + ':', rk_features[feature].round(2))

#XGBoost

Parte do código foi adaptado de: https://medium.com/@24littledino/xgboost-classification-in-python-f29cc2c50a9b

In [None]:
# Import
import xgboost as xgb

In [None]:
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target
X_train, X_val, y_train, y_val = train_test_split(cancer.data, cancer.target, test_size=0.2)

xgb_classifier = xgb.XGBClassifier()
xgb_classifier.fit(X_train, y_train)

y_pred = xgb_classifier.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)

print("XGBoost Accuracy:", accuracy)

In [None]:
digits = load_breast_cancer()
X, y = digits.data, digits.target
X_train, X_val, y_train, y_val = train_test_split(digits.data, digits.target, test_size=0.3)

xgb_classifier = xgb.XGBClassifier(eta = 0.5, gamma=0.05, objective='multi:softmax', eval_metric='logloss', num_class=10)
xgb_classifier.fit(X_train, y_train)

y_pred = xgb_classifier.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)

print("XGBoost Accuracy:", accuracy)

In [None]:
from sklearn.datasets import fetch_openml

# Load the Iris dataset
titanic = fetch_openml(name='titanic', version='1', parser='auto')
X, y = titanic.data, titanic.target

features = ['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']
x_train, x_test, y_train, y_test = train_test_split(X[features], y, test_size=0.2, random_state=65)

#Transforming data
x_train['sex'] = x_train['sex'].cat.codes
x_test['sex'] = x_test['sex'].cat.codes

y_train = y_train.cat.codes
y_test = y_test.cat.codes

In [None]:
# XGBoost (different learning rate)
learning_rate_range = np.arange(0.01, 1, 0.05)
test_XG = []
train_XG = []
for lr in learning_rate_range:
    xgb_classifier = xgb.XGBClassifier(eta = lr)
    xgb_classifier.fit(x_train, y_train)
    train_XG.append(xgb_classifier.score(x_train, y_train))
    test_XG.append(xgb_classifier.score(x_test, y_test))

In [None]:
fig = plt.figure(figsize=(10, 7))
plt.plot(learning_rate_range, train_XG, c='orange', label='Train')
plt.plot(learning_rate_range, test_XG, c='m', label='Test')
plt.xlabel('Learning rate')
plt.xticks(learning_rate_range)
plt.ylabel('Accuracy score')
plt.ylim(0.6, 1)
plt.legend(prop={'size': 12}, loc=3)
plt.title('Accuracy score vs. Learning rate of XGBoost', size=14)
plt.show()

In [None]:
learning_rate_range = np.arange(0.01, 0.5, 0.05)
weight_range = np.arange(0, 4.5, 0.5)

fig = plt.figure(figsize=(19, 17))
idx = 1
for weight in weight_range:
    train = []
    test = []
    for lr in learning_rate_range:
        xgb_classifier = xgb.XGBClassifier(eta = lr, reg_lambda=1, min_child_weight=weight)
        xgb_classifier.fit(x_train, y_train)
        train.append(xgb_classifier.score(x_train, y_train))
        test.append(xgb_classifier.score(x_test, y_test))

    fig.add_subplot(3, 3, idx)
    idx += 1
    plt.plot(learning_rate_range, train, c='orange', label='Training')
    plt.plot(learning_rate_range, test, c='m', label='Testing')
    plt.xlabel('Learning rate')
    plt.xticks(learning_rate_range)
    plt.ylabel('Accuracy score')
    plt.ylim(0.6, 1)
    plt.legend(prop={'size': 12}, loc=3)
    title = "Min child weight:" + str(weight)
    plt.title(title, size=16)
plt.show()