In [1]:
%matplotlib inline

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split

from sklearn.neural_network import MLPClassifier

from sklearn.metrics import classification_report

In [3]:
diabetes_data = pd.read_csv('data/dataset_diabetes/diabetic_data.csv')

In [4]:
diabetes_data.head()

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,...,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted
0,2278392,8222157,Caucasian,Female,[0-10),?,6,25,1,1,...,No,No,No,No,No,No,No,No,No,NO
1,149190,55629189,Caucasian,Female,[10-20),?,1,1,7,3,...,No,Up,No,No,No,No,No,Ch,Yes,>30
2,64410,86047875,AfricanAmerican,Female,[20-30),?,1,1,7,2,...,No,No,No,No,No,No,No,No,Yes,NO
3,500364,82442376,Caucasian,Male,[30-40),?,1,1,7,2,...,No,Up,No,No,No,No,No,Ch,Yes,NO
4,16680,42519267,Caucasian,Male,[40-50),?,1,1,7,1,...,No,Steady,No,No,No,No,No,Ch,Yes,NO


In [7]:
diabetes_data.dtypes

encounter_id                 int64
patient_nbr                  int64
race                        object
gender                      object
age                         object
weight                      object
admission_type_id            int64
discharge_disposition_id     int64
admission_source_id          int64
time_in_hospital             int64
payer_code                  object
medical_specialty           object
num_lab_procedures           int64
num_procedures               int64
num_medications              int64
number_outpatient            int64
number_emergency             int64
number_inpatient             int64
diag_1                      object
diag_2                      object
diag_3                      object
number_diagnoses             int64
max_glu_serum               object
A1Cresult                   object
metformin                   object
repaglinide                 object
nateglinide                 object
chlorpropamide              object
glimepiride         

In [8]:
diabetes_data.shape

(101766, 50)

In [9]:
diabetes_target = diabetes_data['readmitted']
diabetes_attributes = diabetes_data.drop('readmitted', axis = 1)

In [10]:
diabetes_attributes = pd.get_dummies(diabetes_attributes)

In [11]:
diabetes_attributes.shape

(101766, 2472)

In [12]:
diabetes_attributes_scaled = MinMaxScaler().fit_transform(diabetes_attributes)

In [18]:
all_data, _, all_targets,_ = train_test_split(
    diabetes_attributes_scaled, 
    diabetes_target, train_size = 0.1, 
    stratify = diabetes_target)

In [14]:
len(all_data)

10176

In [16]:
attributes_train, attributes_test, targets_train, targets_test = train_test_split(all_data, all_targets, test_size = 0.2, stratify = all_targets)

In [20]:
nn = MLPClassifier()

In [21]:
nn.fit(attributes_train, targets_train)



MLPClassifier()

In [23]:
nn.score(attributes_train, targets_train)

1.0

In [24]:
nn.score(attributes_test, targets_test)

0.4916502946954813

In [28]:
print(classification_report(targets_train, nn.predict(attributes_train)))

              precision    recall  f1-score   support

         <30       1.00      1.00      1.00       894
         >30       1.00      1.00      1.00      2877
          NO       1.00      1.00      1.00      4369

    accuracy                           1.00      8140
   macro avg       1.00      1.00      1.00      8140
weighted avg       1.00      1.00      1.00      8140



In [29]:
print(classification_report(targets_test, nn.predict(attributes_test)))

              precision    recall  f1-score   support

         <30       0.10      0.08      0.09       224
         >30       0.42      0.40      0.41       719
          NO       0.59      0.64      0.61      1093

    accuracy                           0.49      2036
   macro avg       0.37      0.37      0.37      2036
weighted avg       0.48      0.49      0.48      2036



In [33]:
def train_and_save_results(model):
    model.fit(attributes_train, targets_train)
    train_report  = classification_report(targets_train, model.predict(attributes_train))
    test_report  = classification_report(targets_test, model.predict(attributes_test))
    
    return train_report, test_report

In [34]:
shallow_nn = MLPClassifier(hidden_layer_sizes = (20, ))
train_report_shallow, test_report_shallow = train_and_save_results(shallow_nn)



In [35]:
print(train_report_shallow)

              precision    recall  f1-score   support

         <30       0.93      0.67      0.78       894
         >30       0.86      0.86      0.86      2877
          NO       0.88      0.93      0.91      4369

    accuracy                           0.88      8140
   macro avg       0.89      0.82      0.85      8140
weighted avg       0.88      0.88      0.88      8140



In [36]:
print(test_report_shallow)

              precision    recall  f1-score   support

         <30       0.11      0.08      0.09       224
         >30       0.44      0.46      0.45       719
          NO       0.61      0.64      0.63      1093

    accuracy                           0.51      2036
   macro avg       0.39      0.39      0.39      2036
weighted avg       0.50      0.51      0.51      2036



In [38]:
deep_nn = MLPClassifier(hidden_layer_sizes = (3, 4, 5, 4, 3))
train_report_deep, test_report_deep = train_and_save_results(deep_nn)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [39]:
print(train_report_deep)

              precision    recall  f1-score   support

         <30       0.00      0.00      0.00       894
         >30       0.00      0.00      0.00      2877
          NO       0.54      1.00      0.70      4369

    accuracy                           0.54      8140
   macro avg       0.18      0.33      0.23      8140
weighted avg       0.29      0.54      0.37      8140



In [40]:
nn_middle_size = MLPClassifier(hidden_layer_sizes = (10, 10, 10))
train_report_middle, test_report_middle = train_and_save_results(nn_middle_size)



In [41]:
print(train_report_middle)

              precision    recall  f1-score   support

         <30       0.76      0.52      0.62       894
         >30       0.83      0.87      0.85      2877
          NO       0.87      0.91      0.89      4369

    accuracy                           0.85      8140
   macro avg       0.82      0.77      0.79      8140
weighted avg       0.85      0.85      0.85      8140



In [42]:
print(test_report_middle)

              precision    recall  f1-score   support

         <30       0.18      0.11      0.13       224
         >30       0.42      0.46      0.44       719
          NO       0.60      0.61      0.60      1093

    accuracy                           0.50      2036
   macro avg       0.40      0.39      0.39      2036
weighted avg       0.49      0.50      0.49      2036



## Implementation Notes

In [43]:
scalar = 42

In [45]:
vector = np.array([2, 3, 5, -10, 4.2])
vector

array([  2. ,   3. ,   5. , -10. ,   4.2])

In [46]:
vector.T

array([  2. ,   3. ,   5. , -10. ,   4.2])

In [47]:
matrix = np.eye(3)
matrix

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [48]:
row_vector = np.array([[2, 3, 5, -10, 4.2]])
row_vector

array([[  2. ,   3. ,   5. , -10. ,   4.2]])

In [49]:
row_vector.T

array([[  2. ],
       [  3. ],
       [  5. ],
       [-10. ],
       [  4.2]])

In [50]:
row_vector @ row_vector.T

array([[155.64]])

In [51]:
row_vector @ row_vector

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1 is different from 5)

In [52]:
vector @ vector

155.64

In [53]:
np.squeeze(row_vector @ row_vector.T)

array(155.64)

In [55]:
row_vector.T @ row_vector

array([[  4.  ,   6.  ,  10.  , -20.  ,   8.4 ],
       [  6.  ,   9.  ,  15.  , -30.  ,  12.6 ],
       [ 10.  ,  15.  ,  25.  , -50.  ,  21.  ],
       [-20.  , -30.  , -50.  , 100.  , -42.  ],
       [  8.4 ,  12.6 ,  21.  , -42.  ,  17.64]])