# Especialização em Inteligência Artificial

**Aprendizado de Máquina - Aula 4.3: Comitês**

Código de exemplo desenvolvido pelo docente [Adriano Rivolli](mailto:rivolli@utpfr.edu.br)

*O código apresenta como criar comitês usando diferentes abordagens*

In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import accuracy_score

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

In [2]:
# Carregando o dataset e separando o conjunto de treinamento/teste
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Votação

In [3]:
clf1 = DecisionTreeClassifier(random_state=1)
clf2 = LogisticRegression(random_state=1, max_iter=6000)
clf3 = KNeighborsClassifier()
clf4 = SVC(random_state=1)
clf5 = RandomForestClassifier(random_state=1)

bases = [('dt', clf1), ('lr', clf2), ('knn', clf3), ('svc', clf4), ('rf', clf5)]
comite = VotingClassifier(estimators=bases, voting='hard')
comite.fit(X_train, y_train)
y_pred = comite.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Comitê Accuracy:", accuracy)

print(f'Cross Val Score Comitê: {cross_val_score(comite, X, y, cv=5, scoring='accuracy').mean()}')

# Test with prediction without Ensembler
test_dt = DecisionTreeClassifier(random_state=1)
test_dt.fit(X_train, y_train)

test_acc = test_dt.predict(X_test)
print(f'Accuracy DecisionTreeClassifier alone: {accuracy_score(y_test, test_acc)}')

print(f'Cross Val Score DecisionTreeClassifier: {cross_val_score(test_dt, X, y, cv=5, scoring='accuracy').mean()}')

Comitê Accuracy: 0.9649122807017544
Cross Val Score Comitê: 0.956078248719143
Accuracy DecisionTreeClassifier alone: 0.9385964912280702
Cross Val Score DecisionTreeClassifier: 0.9173886042539978


In [4]:
clf1 = DecisionTreeClassifier(random_state=1)
clf2 = LogisticRegression(random_state=1, max_iter=6000)
clf3 = KNeighborsClassifier()
clf4 = SVC(random_state=1, probability=True)
clf5 = RandomForestClassifier(random_state=1)

bases = [('dt', clf1), ('lr', clf2), ('knn', clf3), ('svc', clf4), ('rf', clf5)]
comite = VotingClassifier(estimators=bases, voting='soft')
comite.fit(X_train, y_train)
y_pred = comite.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Comitê Accuracy:", accuracy)

print(f'Cross Val Score Comitê: {cross_val_score(comite, X, y, cv=5, scoring='accuracy').mean()}')

Comitê Accuracy: 0.9736842105263158
Cross Val Score Comitê: 0.956078248719143


In [5]:
clf1 = DecisionTreeClassifier(random_state=1)
clf2 = LogisticRegression(random_state=1, max_iter=6000)
clf3 = KNeighborsClassifier()
clf4 = SVC(random_state=1)
clf5 = RandomForestClassifier(random_state=1)

bases = [('dt', clf1), ('lr', clf2), ('knn', clf3), ('svc', clf4), ('rf', clf5)]
comite = VotingClassifier(estimators=bases, weights=(5, 4, 3, 2, 1))
comite.fit(X_train, y_train)
y_pred = comite.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Comitê Accuracy:", accuracy)

Comitê Accuracy: 0.956140350877193


## Bagging

In [6]:
bagging = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=15, random_state=42)
accuracy = cross_val_score(bagging, X, y, cv=5, scoring='accuracy').mean()

print("Bagging Accuracy:", accuracy)

Bagging Accuracy: 0.9526005278683435


In [7]:
bagging = BaggingClassifier(estimator=KNeighborsClassifier(), n_estimators=100, max_features=0.8, max_samples=0.5)
accuracy = cross_val_score(bagging, X, y, cv=5, scoring='accuracy').mean()

print("Bagging Accuracy:", accuracy)

Bagging Accuracy: 0.9279770222015216


## Stacking

In [8]:
clf1 = DecisionTreeClassifier(random_state=1)
clf2 = LogisticRegression(random_state=1, max_iter=6000)
clf3 = KNeighborsClassifier()
clf4 = SVC(random_state=1)
clf5 = RandomForestClassifier(random_state=1)

bases = [('dt', clf1), ('lr', clf2), ('knn', clf3), ('svc', clf4), ('rf', clf5)]
stacking = StackingClassifier(estimators=bases, final_estimator=LogisticRegression())
stacking.fit(X_train, y_train)
y_pred = stacking.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Stacking Accuracy:", accuracy)

print(f'Accuracy Stacking with Cross Val: {cross_val_score(stacking, X, y, cv=5, scoring='accuracy').mean()}')

Stacking Accuracy: 0.9736842105263158
Accuracy Stacking with Cross Val: 0.95960254618848


In [9]:
clf1 = DecisionTreeClassifier(random_state=1)
clf2 = LogisticRegression(random_state=1, max_iter=6000)
clf3 = KNeighborsClassifier()
clf4 = SVC(random_state=1, probability=True)
clf5 = RandomForestClassifier(random_state=1)

bases = [('dt', clf1), ('lr', clf2), ('knn', clf3), ('svc', clf4), ('rf', clf5)]
stacking = StackingClassifier(estimators=bases, final_estimator=KNeighborsClassifier(), cv=5, stack_method='predict_proba')
stacking.fit(X_train, y_train)
y_pred = stacking.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Stacking Accuracy:", accuracy)

Stacking Accuracy: 0.9736842105263158
