# diabetes classfication using neural network

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

## read dataset

In [2]:
df = pd.read_csv('./diabetes-dataset.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


## normalize dataset between 0 and 1

In [3]:
df.loc[:, df.columns != 'Outcome'] = MinMaxScaler().fit_transform(df.loc[:, df.columns != 'Outcome'])
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,0.483333,1
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,0.166667,0
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,0.183333,1
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.0,0
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,0.2,1


## split feature and label

In [4]:
X = df.drop('Outcome',axis=1)
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,0.483333
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,0.166667
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,0.183333
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.0
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,0.2


In [5]:
y = df.loc[:,'Outcome']
y.head()

0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64

## split dataset into data train and data test

In [6]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=15)

In [7]:
X_train.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
304,0.176471,0.753769,0.622951,0.0,0.0,0.312966,0.055081,0.266667
297,0.0,0.633166,0.688525,0.292929,0.254137,0.457526,0.188728,0.05
522,0.352941,0.572864,0.0,0.0,0.0,0.0,0.047395,0.083333
618,0.529412,0.562814,0.672131,0.242424,0.0,0.420268,0.514091,0.483333
501,0.176471,0.422111,0.590164,0.323232,0.0,0.554396,0.0807,0.116667


In [8]:
y_train.head()

304    0
297    0
522    0
618    1
501    0
Name: Outcome, dtype: int64

In [9]:
X_test.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
343,0.294118,0.613065,0.704918,0.0,0.0,0.517139,0.090521,0.2
728,0.117647,0.879397,0.721311,0.0,0.0,0.341282,0.105892,0.016667
73,0.235294,0.648241,0.704918,0.20202,0.319149,0.5231,0.065329,0.033333
254,0.705882,0.462312,0.508197,0.070707,0.304965,0.411326,0.362084,0.383333
741,0.176471,0.512563,0.360656,0.20202,0.111111,0.459016,0.137489,0.083333


In [10]:
y_test.head()

343    0
728    0
73     0
254    1
741    0
Name: Outcome, dtype: int64

## make sklearn neural network model

In [11]:
nn_clf = MLPClassifier(hidden_layer_sizes=3,learning_rate_init=0.1,max_iter=100)
nn_clf.fit(X_train,y_train)

MLPClassifier(hidden_layer_sizes=3, learning_rate_init=0.1, max_iter=100)

In [12]:
y_pred = pd.Series(nn_clf.predict(X_test))
y_pred.head()

0    0
1    0
2    0
3    0
4    0
dtype: int64

## calculate accuracy from our neural network model

In [13]:
accuracy_score(y_test,y_pred)

0.7467532467532467