In [1]:
import numpy as np

<h2>Carregando dados</h2>

In [2]:
data = np.genfromtxt('diabetes.csv', delimiter=',')

In [3]:
labels = data[:,-1]
data = data[:,:-1]

<h2>Normalização</h2>

In [4]:
from sklearn.preprocessing import MinMaxScaler

In [5]:
scaler = MinMaxScaler()
data = scaler.fit_transform(data)

In [6]:
np.min(data[:,0]), np.max(data[:,0])

(0.0, 1.0)

<h2>Treinamento</h2>

In [7]:
# Modelos
# from sklearn.neural_network import MLPClassifier
# from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

# K-fold CrossValidation
from sklearn.model_selection import KFold

# Métricas
from sklearn.metrics import accuracy_score, recall_score, precision_score

In [8]:
performance = {}

modelos = ['decision_tree', 'random_forest', 'gradient_boosting']

for modelo in modelos:
    performance[modelo] = {
        'acuracia': [],
        'recall': [],
        'precisao': []
    }

In [9]:
kf = KFold(n_splits=10)

for train_index, test_index in kf.split(data):
    
    X_train, X_test = data[train_index], data[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    
    dt = DecisionTreeClassifier()
    rf = RandomForestClassifier()
    bt = GradientBoostingClassifier(n_estimators=10)
    
    # Treino
    dt.fit(X_train, y_train)
    rf.fit(X_train, y_train)
    bt.fit(X_train, y_train)
    
    # Avaliação
    ## Decision Tree
    dt_predict = dt.predict(X_test)
    
    dt_acc = accuracy_score(y_test, dt_predict)
    dt_recall = recall_score(y_test, dt_predict)
    dt_precision = precision_score(y_test, dt_predict)
    
    performance['decision_tree']['acuracia'].append(dt_acc)
    performance['decision_tree']['recall'].append(dt_recall)
    performance['decision_tree']['precisao'].append(dt_precision)
    
    ## Random Forest
    rf_predict = rf.predict(X_test)
    
    rf_acc = accuracy_score(y_test, rf_predict)
    rf_recall = recall_score(y_test, rf_predict)
    rf_precision = precision_score(y_test, rf_predict)
    
    performance['random_forest']['acuracia'].append(rf_acc)
    performance['random_forest']['recall'].append(rf_recall)
    performance['random_forest']['precisao'].append(rf_precision)
    
    ## Gradient Boosting
    bt_predict = bt.predict(X_test)
    
    bt_acc = accuracy_score(y_test, bt_predict)
    bt_recall = recall_score(y_test, bt_predict)
    bt_precision = precision_score(y_test, bt_predict)
    
    performance['gradient_boosting']['acuracia'].append(bt_acc)
    performance['gradient_boosting']['recall'].append(bt_recall)
    performance['gradient_boosting']['precisao'].append(bt_precision)

In [11]:
print("Resultados - Acurácia")
print("Decision Tree: {}".format(np.round(np.mean(performance['decision_tree']['acuracia']),2)))
print("Random Forest: {}".format(np.round(np.mean(performance['random_forest']['acuracia']),2)))
print("Gradient Boosting Tree: {}".format(np.round(np.mean(performance['gradient_boosting']['acuracia']),2)))

Resultados - Acurácia
Decision Tree: 0.69
Random Forest: 0.72
Gradient Boosting Tree: 0.75


In [12]:
print("Resultados - Recall")
print("Decision Tree: {}".format(np.round(np.mean(performance['decision_tree']['recall']),2)))
print("Random Forest: {}".format(np.round(np.mean(performance['random_forest']['recall']),2)))
print("Gradient Boosting Tree: {}".format(np.round(np.mean(performance['gradient_boosting']['recall']),2)))

Resultados - Recall
Decision Tree: 0.56
Random Forest: 0.5
Gradient Boosting Tree: 0.45


In [13]:
print("Resultados - Precision")
print("Decision Tree: {}".format(np.round(np.mean(performance['decision_tree']['precisao']),2)))
print("Random Forest: {}".format(np.round(np.mean(performance['random_forest']['precisao']),2)))
print("Gradient Boosting Tree: {}".format(np.round(np.mean(performance['gradient_boosting']['precisao']),2)))

Resultados - Precision
Decision Tree: 0.55
Random Forest: 0.63
Gradient Boosting Tree: 0.75
