In [3]:
%matplotlib inline

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense

from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split

In [5]:
diabetes_data = pd.read_csv('data/dataset_diabetes/diabetic_data.csv')

In [6]:
diabetes_data.head()

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,...,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted
0,2278392,8222157,Caucasian,Female,[0-10),?,6,25,1,1,...,No,No,No,No,No,No,No,No,No,NO
1,149190,55629189,Caucasian,Female,[10-20),?,1,1,7,3,...,No,Up,No,No,No,No,No,Ch,Yes,>30
2,64410,86047875,AfricanAmerican,Female,[20-30),?,1,1,7,2,...,No,No,No,No,No,No,No,No,Yes,NO
3,500364,82442376,Caucasian,Male,[30-40),?,1,1,7,2,...,No,Up,No,No,No,No,No,Ch,Yes,NO
4,16680,42519267,Caucasian,Male,[40-50),?,1,1,7,1,...,No,Steady,No,No,No,No,No,Ch,Yes,NO


In [7]:
diabetes_data.dtypes

encounter_id                 int64
patient_nbr                  int64
race                        object
gender                      object
age                         object
weight                      object
admission_type_id            int64
discharge_disposition_id     int64
admission_source_id          int64
time_in_hospital             int64
payer_code                  object
medical_specialty           object
num_lab_procedures           int64
num_procedures               int64
num_medications              int64
number_outpatient            int64
number_emergency             int64
number_inpatient             int64
diag_1                      object
diag_2                      object
diag_3                      object
number_diagnoses             int64
max_glu_serum               object
A1Cresult                   object
metformin                   object
repaglinide                 object
nateglinide                 object
chlorpropamide              object
glimepiride         

In [8]:
diabetes_data.shape

(101766, 50)

In [9]:
diabetes_target = diabetes_data['readmitted']
diabetes_attributes = diabetes_data.drop('readmitted', axis = 1)

In [10]:
diabetes_attributes = pd.get_dummies(diabetes_attributes)

In [11]:
diabetes_attributes.shape

(101766, 2472)

In [12]:
diabetes_attributes_scaled = MinMaxScaler().fit_transform(diabetes_attributes)

In [13]:
all_data, _, all_targets,_ = train_test_split(
    diabetes_attributes_scaled, 
    diabetes_target, train_size = 0.1, 
    stratify = diabetes_target)

In [14]:
len(all_data)

10176

In [15]:
attributes_train, attributes_test, targets_train, targets_test = train_test_split(all_data, all_targets, test_size = 0.2, stratify = all_targets)

In [16]:
num_columns = attributes_train.shape[1]
num_results = 1

In [17]:
nn = Sequential([
    Input(shape = (num_columns, )), # input layer
    
    Dense(units = 5, activation = "relu"),
    Dense(units = 6, activation = "relu"),
    Dense(units = 7, activation = "relu"),
    
    Dense(units = 3, activation = "softmax") # output layer
])

In [18]:
nn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 5)                 12365     
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 36        
_________________________________________________________________
dense_2 (Dense)              (None, 7)                 49        
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 24        
Total params: 12,474
Trainable params: 12,474
Non-trainable params: 0
_________________________________________________________________


In [19]:
nn.compile(loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])

In [20]:
targets_train = targets_train.map({"NO": 0, "<30": 1, ">30": 2})
targets_test = targets_test.map({"NO": 0, "<30": 1, ">30": 2})

In [21]:
targets_train

28856    0
39518    0
26074    0
44442    0
46747    0
        ..
64314    0
84481    0
24196    0
65979    0
36177    0
Name: readmitted, Length: 8140, dtype: int64

In [22]:
nn.fit(attributes_train, targets_train, epochs  = 20, validation_data = (attributes_test, targets_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x23883018f40>

In [23]:
nn.evaluate(attributes_train, targets_train)



[0.7841567993164062, 0.6542997360229492]

In [None]:
num_columns = attributes_train.shape[1]
num_classes = 3

In [None]:
log_regr = Sequential([
    Input(shape = (num_columns, )), # input layer    
    Dense(units = num_classes, activation = "softmax") # output layer
])

In [None]:
lin_regr = Sequential([
    Input(shape = (num_columns, )), # input layer    
    Dense(units = num_classes, activation = None) # output layer
])