In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import StackingClassifier, VotingClassifier, AdaBoostClassifier

dados = pd.read_csv("diabetes.csv")

In [3]:
feature_cols = ['pregnant', 'insulin', 'bmi', 'age', 'glucose', 'bp', 'pedigree']
X = dados[feature_cols]
y = dados.label

In [4]:
#RandomForestClassifier
clfRFC = RandomForestClassifier(n_estimators= 10, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state= 42)

clfRFC.fit(X_train, y_train).score(X_test, y_test)
y_predRFC = clfRFC.predict(X_test)

In [5]:
#StackingClassifier
estimators = [('rf', RandomForestClassifier(n_estimators= 10, random_state= 42)), ('svr', make_pipeline(StandardScaler(), LinearSVC(random_state= 42)))]

clfSC = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
clfSC.fit(X_train, y_train).score(X_test, y_test)
y_predSC = clfSC.predict(X_test)




In [6]:
#VotingClassifier
clf1 = LogisticRegression(multi_class= 'multinomial', random_state= 1)
clf2 = RandomForestClassifier(n_estimators= 50, random_state= 1)
clf3 = GaussianNB()

eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')
eclf1 = eclf1.fit(X, y)
y_predVC = eclf1.predict(X_test)

In [7]:
#AdaBoostClassifier
clfABC = AdaBoostClassifier(n_estimators= 1000, random_state= 0)
clfABC.fit(X_train, y_train).score(X_test, y_test)
y_predABC = clfABC.predict(X_test)

In [8]:
print("Accuracy RandomForestClassifier: ", metrics.accuracy_score(y_test, y_predRFC))
print("Accuracy StackingClassifier: ", metrics.accuracy_score(y_test, y_predSC))
print("Accuracy VotingClassifier: ", metrics.accuracy_score(y_test, y_predVC))
print("Accuracy AdaBoostClassifier: ", metrics.accuracy_score(y_test, y_predABC))

print("\nConfusionMatrix RandomForestClassifier:\n", metrics.confusion_matrix(y_test, y_predRFC))
print("\nConfusionMatrix StackingClassifier:\n", metrics.confusion_matrix(y_test, y_predSC))
print("\nConfusionMatrix VotingClassifier:\n", metrics.confusion_matrix(y_test, y_predVC))
print("\nConfusionMatrix AdaBoostClassifier:\n", metrics.confusion_matrix(y_test, y_predABC))

#Foi visto com base nos resultados que dos métodos analisados, o VotingClassifier foi o melhor para melhorar a acurácia e para ajuste da Matriz de Confusão

Accuracy RandomForestClassifier:  0.7552083333333334
Accuracy StackingClassifier:  0.7552083333333334
Accuracy VotingClassifier:  0.7916666666666666
Accuracy AdaBoostClassifier:  0.734375

ConfusionMatrix RandomForestClassifier:
 [[110  15]
 [ 32  35]]

ConfusionMatrix StackingClassifier:
 [[109  16]
 [ 31  36]]

ConfusionMatrix VotingClassifier:
 [[108  17]
 [ 23  44]]

ConfusionMatrix AdaBoostClassifier:
 [[104  21]
 [ 30  37]]
