<a href="https://colab.research.google.com/github/carvalheirafc/deep_learning_stuff/blob/master/mlp/diabetes/diabetes_mlp_k-fold.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Import Section
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from sklearn.model_selection import StratifiedShuffleSplit



In [3]:
data_url = 'https://raw.githubusercontent.com/carvalheirafc/deep_learning_stuff/master/mlp/diabetes/diabetes.csv'
diabetes = pd.read_csv(data_url)
diabetes.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A,768.0,3.845052,3.369578,0.0,1.0,3.0,6.0,17.0
B,768.0,70.178819,15.006546,22.666667,60.666667,69.666667,79.666667,122.0
C,768.0,120.894531,31.972618,0.0,99.0,117.0,140.25,199.0
D,768.0,69.105469,19.355807,0.0,62.0,72.0,80.0,122.0
E,768.0,20.536458,15.952218,0.0,0.0,23.0,32.0,99.0
F,768.0,79.799479,115.244002,0.0,0.0,30.5,127.25,846.0
G,768.0,31.992578,7.88416,0.0,27.3,32.0,36.6,67.1
J,768.0,0.472266,0.332348,0.1,0.2,0.4,0.6,2.4
M,768.0,33.240885,11.760232,21.0,24.0,29.0,41.0,81.0
Diagnóstico,768.0,1.348958,0.476951,1.0,1.0,1.0,2.0,2.0


In [4]:
diabetes.shape

(768, 10)

In [0]:
import warnings
warnings.filterwarnings("ignore")

def split_data(clf,features,labels, folds = 150): 
    
    accuracy_scores = []
    precision_scores = []
    recall_scores = []
    f1_scores = []

    sss = StratifiedShuffleSplit(n_splits=folds,random_state=42, test_size=0.3)
    
    print("Making predictions... \n")
    for train_idx, test_idx in sss.split(features, labels):
        X_train = []
        X_test  = []
        y_train = []
        y_test  = []

        for ii in train_idx:
            X_train.append( features.loc[ii] )
            y_train.append( labels.loc[ii] )
        for jj in test_idx:
            X_test.append( features.loc[jj] )
            y_test.append( labels.loc[jj] )
        
        try:
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)

            accuracy_scores.append(accuracy_score(y_test, y_pred))
            precision_scores.append(precision_score(y_test, y_pred))
            recall_scores.append(recall_score(y_test, y_pred))
            f1_scores.append(f1_score(y_test, y_pred))
        except e:
            print(e)
            
    print('Accuracy', np.round( np.mean(accuracy_scores) ,3) )
    print('Precision', np.round( np.mean(precision_scores) ,3) )
    print('Recall', np.round( np.mean(recall_scores), 3) )
    print('F1-measure', np.round( np.mean(f1_scores), 3) )

In [0]:
X = diabetes.drop('Diagnóstico', axis=1)
Y = diabetes['Diagnóstico']

In [0]:
mlp = MLPClassifier(solver='sgd',
                    hidden_layer_sizes=(9,),
                    random_state=1,
                    max_iter=500)

In [13]:
split_data(mlp, X, Y)

Making predictions... 

Accuracy 0.663
Precision 0.706
Recall 0.835
F1-measure 0.761
