# BRFSS Playground - ANN

Make an ANN with Tensorflow to predict mental health issues

In [117]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score

In [2]:
brfss_norm = pd.read_csv('../data/brfss_normed.csv.gz')

In [31]:
brfss_norm.shape

(96986, 63)

### Small cleaning

In [77]:
drop_vars = ['B_ID', 'SMP_WGHT', 'MNTL_HLTH_LEV_BRFSS', 'MENTAL_HEALTH_30_BRFSS']
brfss_vars = [
    c for c in brfss_norm.columns.values if c not in drop_vars
]
X = brfss_norm[brfss_vars].copy()
y = brfss_norm['MNTL_HLTH_LEV_BRFSS'].copy()

In [78]:
y.unique()

array([1., 2., 0.])

In [79]:
y.loc[y > 0] = 1

In [80]:
y.value_counts(normalize=True)

0.0    0.68413
1.0    0.31587
Name: MNTL_HLTH_LEV_BRFSS, dtype: float64

In [81]:
y_dummy = pd.get_dummies(y)

Split training/test

In [88]:
X_train, X_test, y_train, y_test = train_test_split(X, y_dummy, test_size=0.3, random_state=42)

In [89]:
X_train.shape

(67890, 59)

### Training - All Variables

In [94]:
model = keras.Sequential([
    keras.layers.Dense(59, activation=tf.nn.relu),
    keras.layers.Dense(2, activation=tf.nn.softmax)
])

In [95]:
model.compile(optimizer=tf.train.AdamOptimizer(), 
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [96]:
y_train.sum()

0.0    46275
1.0    21615
dtype: int64

In [97]:
model.fit(X_train.as_matrix(), y_train.as_matrix(), epochs=5)

  if __name__ == '__main__':


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x12596dcc0>

In [98]:
test_loss, test_acc = model.evaluate(X_test, y_test)



In [125]:
test_acc

0.7655347814132527

In [104]:
y_pred = model.predict(X_test)

In [119]:
f1_score(np.argmax(y_test.as_matrix(), axis=1), np.argmax(y_pred, axis=1))

  if __name__ == '__main__':


0.5309405940594059

### Training - Just Behaviors

In [120]:
behaviors = [
    'ROUTINE_CHECK_BRFSS',
    'INTERNET_USE_BRFSS',
    'SMK_NOW_BRFSS',
    'TRY_QUIT_SMK_BRFSS',
    'SNUFF_BRFSS',
    'CNSM_FT_DAY_BRFSS',
    'PA_CAT_BRFSS',
    'AER_STRNGH_GUIDE_BRFSS',
    'PHYS_HLTH_LEV_BRFSS',
    'HVY_DRNKR_BRFSS',
    'AVG_NUM_DRNK_30_BRFSS',
    'BINGE_DRNK_30_BRFSS',
    'DRNK_PER_DAY_BRFSS',
    'DLY_FF_SERVE_BRFSS',
    'DLY_FT_SERVE_BRFSS',
    'DLY_FJ_SERVE_BRFSS',
    'DLY_GRN_VEG_SERVE_BRFSS',
    'LARGE_NUM_DRNK_30_BRFSS',
    'MET_VAL_BRFSS',
    'MET_VAL_OTHR_BRFSS',
    'TTL_MIN_OF_PA_WEEK_BRFSS',
    'TTL_MIN_OF_VIG_WEEK_BRFSS',
    'MIN_OF_PA_WEEK_BRFSS',
    'MIN_OF_PA_WEEK_OTHR_BRFSS',
    'MIN_OF_VIG_WEEK_BRFSS',
    'MIN_OF_VIG_WEEK_OTHR_BRFSS',
    'DLY_POTATO_SERVE_BRFSS',
    'DLY_OTHR_VEG_SERVE_BRFSS',
    'NUM_DRNKS_PER_WEEK_BRFSS',
    'TOTAL_FT_DAY_BRFSS',
    'MIN_OF_EX_WEEK_BRFSS',
    'MIN_OF_EX_WEEK_OTHR_BRFSS',
    'TOTAL_VEG_DAY_BRFSS'
]

In [123]:
model_behaviors = keras.Sequential([
    keras.layers.Dense(33, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.relu),
    keras.layers.Dense(2, activation=tf.nn.softmax)
])

model_behaviors.compile(optimizer=tf.train.AdamOptimizer(), 
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [124]:
model_behaviors.fit(X_train[behaviors].as_matrix(), y_train.as_matrix(), epochs=10)

  if __name__ == '__main__':


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x123f224a8>

In [129]:
y_pred = model_behaviors.predict(X_test[behaviors])

In [131]:
f1_score(np.argmax(y_test.as_matrix(), axis=1), np.argmax(y_pred, axis=1))

  if __name__ == '__main__':


0.37584971858782257