In [41]:
import numpy as np
import pandas as pd

# Keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils

# Evaluation metrics
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

# Resampling
from collections import Counter
from imblearn.over_sampling import SMOTE

# Train test split
from sklearn.model_selection import train_test_split

# Pickle to save models
import pickle

In [42]:
df = pd.read_csv('Final_NHANES_2013_2014.csv', dtype = object)
df.head()

Unnamed: 0,Gender,Age,Poverty,Weight,Height,BMI,Pulse,BPSysAve,BPDiaAve,DirectChol,...,SleepTrouble,PhysActive,WalkBic,VigActiv,ModActiv,TVHrsDay,AlcoholDay,AlcoholYear,Smoke100,mortstat
0,1,69.0,0.84,78.3,171.3,26.7,86.0,122.0,72.0,1.68,...,Yes,0,0,0,0,2.0,1.0,1.0,1,1
1,1,54.0,1.78,89.5,176.8,28.6,74.0,156.0,62.0,1.29,...,No,1,0,0,0,4.0,4.0,7.0,1,1
2,1,72.0,4.51,88.9,175.3,28.9,68.0,140.0,90.0,1.55,...,No,1,0,0,1,4.0,2.0,0.0,1,1
3,0,73.0,5.0,52.0,162.4,19.7,92.0,136.0,86.0,2.2,...,No,1,0,0,0,1.0,2.0,0.0,0,0
4,1,56.0,4.79,105.0,158.7,41.7,60.0,160.0,84.0,0.98,...,No,0,0,0,0,5.0,1.0,5.0,1,1


In [43]:
cols = ['Weight', 'BMI', 'Pulse', 'TotChol', 'Diabetes', 'SleepHrsNight', 'Age', 'Smoke100', 'PhysActive', 'WalkBic', 'VigActiv', 'ModActiv', 'TVHrsDay', 'FatFoods', 'AlcoholDay', 'AlcoholYear', 'mortstat']
df = df[cols]
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
df.head()

Unnamed: 0,Weight,BMI,Pulse,TotChol,Diabetes,SleepHrsNight,Age,Smoke100,PhysActive,WalkBic,VigActiv,ModActiv,TVHrsDay,FatFoods,AlcoholDay,AlcoholYear,mortstat
0,78.3,26.7,86.0,4.32,1,7.0,69.0,1,0,0,0,0,2.0,8.0,1.0,1.0,1
1,89.5,28.6,74.0,4.4,1,9.0,54.0,1,1,0,0,0,4.0,0.0,4.0,7.0,1
2,88.9,28.9,68.0,3.26,1,8.0,72.0,1,1,0,0,1,4.0,1.0,2.0,0.0,1
3,52.0,19.7,92.0,5.2,0,9.0,73.0,0,1,0,0,0,1.0,0.0,2.0,0.0,0
4,105.0,41.7,60.0,5.84,0,5.0,56.0,1,0,0,0,0,5.0,14.0,1.0,5.0,1


In [44]:
X = df.drop(columns=['mortstat'])
y= df['mortstat']

In [45]:
sm = SMOTE(random_state=42)

In [46]:
X_res, y_res = sm.fit_resample(X, y)

In [47]:
df = X_res

In [48]:
df = pd.concat([df, y_res], axis=1)

In [49]:
df = df[df['mortstat'] == 0]

In [50]:
df.drop(columns = ['mortstat'], inplace = True)

In [51]:
df.head()

Unnamed: 0,Weight,BMI,Pulse,TotChol,Diabetes,SleepHrsNight,Age,Smoke100,PhysActive,WalkBic,VigActiv,ModActiv,TVHrsDay,FatFoods,AlcoholDay,AlcoholYear
3,52.0,19.7,92.0,5.2,0,9.0,73.0,0,1,0,0,0,1.0,0.0,2.0,0.0
28,111.9,38.9,60.0,4.16,0,6.0,75.0,1,1,0,0,0,4.0,3.0,2.0,0.0
95,71.2,25.9,84.0,4.55,1,6.0,69.0,1,0,0,0,0,4.0,2.0,1.0,5.0
108,56.4,25.1,68.0,5.15,0,9.0,80.0,0,0,0,0,0,5.0,10.0,2.0,2.0
151,180.1,64.7,98.0,3.05,1,7.0,57.0,1,0,0,0,0,5.0,5.0,2.0,0.0


In [52]:
df['Age'] = round(df['Age'])

In [37]:
# X and y
X = df.drop(columns=['Age'])
y= df['Age']

In [38]:
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)
y = np_utils.to_categorical(y)

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, 
                                                    random_state=21, stratify=y)

ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.

In [51]:
input_dim = len(X.columns)

model = Sequential()
model.add(Dense(8, input_dim = input_dim , activation = 'relu'))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(64, activation = 'softmax'))

model.compile(loss = 'categorical_crossentropy' , optimizer = 'adam' , metrics = ['accuracy'] )

model.fit(X_train, y_train, epochs = 10, batch_size = 2)

scores = model.evaluate(X_test, y_test)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

accuracy: 75.78%


In [53]:
# save the model to disk
filename = 'Keras_model.sav'
pickle.dump(model, open(filename, 'wb'))