In [None]:


#importing dependencies
import pandas as pd
from pathlib import Path
import seaborn as sns
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#importing medical_df
medical_df = pd.read_csv("/content/medical_df.csv")

diabetes_df = medical_df[['Diabetes','Total Income per Individual',
'Lacking Health Insurance','Binge Drinking','High Blood Pressure',
'Routine Health Checkups','Currently Smoking', 
'Depression', 'No Leisure-Time Physical Activity',
"Less than 7 Hours of Sleep" ]]

diabetes_df

In [None]:
train_dataset = diabetes_df.sample(frac=0.8, random_state=1)
test_dataset = diabetes_df.drop(train_dataset.index)

In [None]:
train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('Diabetes')
test_labels = test_features.pop('Diabetes')

In [None]:
#Normalization of Data
train_dataset.describe().transpose()[["mean","std"]]

In [None]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

In [None]:
first = np.array(train_features[:1]),


linear_model = tf.keras.Sequential([
    layers.Dense(units=1)
])

In [None]:
# When Model.predict is call on batch of inputs, it produces units=1 outputs for each example
linear_model.predict(train_features)[:10]

In [None]:
#When model is called, its weighted matrices will be built-check that the kernal weights(the m in y = mx + b) have a shape of (9,1)
linear_model.layers[1].kernel

In [None]:
#Configuring moel with Keras Model.compile and train with Model.fit for 100 epochs
linear_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [None]:
test_results = {}

test_results['linear_model'] = linear_model.evaluate(
    test_features, test_labels, verbose=0)


In [None]:
def build_and_compile_model():
  model = keras.Sequential([
      layers.Dense(64, activation='relu',input_shape=(train_features.shape[1],)),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
  ])

  model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))
  return model

In [None]:
dnn_model = build_and_compile_model()
dnn_model.summary()

In [None]:
%%time
history = dnn_model.fit(
    train_features,
    train_labels,
    validation_split=0.2,
    verbose=0, epochs=100)

In [None]:
test_results = {}

test_results['dnn_model'] = dnn_model.evaluate(test_features, test_labels, verbose=0)

In [None]:
pd.DataFrame(test_results, index=['Mean absolute error [diabetes]']).T

In [None]:
test_predictions = dnn_model.predict(test_features).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [Diabetes]')
plt.ylabel('Predictions [Diabetes]')
lims = [0, 50]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)

In [None]:
#Error Distribution
error = test_predictions - test_labels
plt.hist(error, bins=25)
plt.xlabel('Prediction Error ')
plt.ylabel('Count')

In [None]:
dnn_model.save('dnn_model.keras')


In [None]:
test_predictions = dnn_model.predict(test_features)
mae = tf.keras.losses.mean_absolute_error(test_labels, test_predictions)
mse = tf.keras.losses.mean_squared_error(test_labels, test_predictions)
rmse = tf.sqrt(mse)

print(f'MAE: {mae.numpy()}')
print(f'MSE: {mse.numpy()}')
print(f'RMSE: {rmse.numpy()}')

In [None]:
# Create scatter plot
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.show()

In [None]:
#Testing Predictions 
first = diabetes_df[:5].drop(columns=["Diabetes"])

first_predictions = dnn_model.predict(first)

In [None]:
first_df = pd.DataFrame({'Predicted_Values': first_predictions.flatten()})

In [None]:
#Merging datadframes to compare results
first_original = diabetes_df["Diabetes"].head()
merge_df_1 = pd.concat([first_original,first_df,], axis=1)
merge_df_1

In [None]:
#Adding zipcodes to data
zip= medical_df['ZIPCODE'].head()
merge_1 = pd.concat([zip,merge_df_1], axis = 1)

merge_1

In [None]:
a = plt.axes(aspect='equal')
plt.scatter(first_original, first_df)
plt.xlabel('True Values [Diabetes]')
plt.ylabel('Predictions [Diabetes]')
plt.title('Diabetes vs. Predicted Values')
lims = [0, 20]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)

In [None]:
last = diabetes_df.tail(5).drop(columns=["Diabetes"])

last_predictions = dnn_model.predict(last)

In [None]:
last_df = pd.DataFrame({"Predicted_Values":last_predictions.flatten()})

last_df

In [None]:
last_zip= medical_df["ZIPCODE"].tail()
last_original = diabetes_df["Diabetes"].tail()

zip_merge = pd.concat([last_zip,last_original],axis=1)

z = zip_merge.reset_index(drop=True)

final_merge = pd.concat([z,last_df],axis=1)

final_merge

In [None]:
final.columns

In [None]:
final[['ZIPCODE', 'Diabetes','Predicted_Diabetes', 'Total Income per Individual',
       'Lacking Health Insurance', 'Binge Drinking', 'High Blood Pressure',
       'Routine Health Checkups', 'Currently Smoking', 'Depression',
       'No Leisure-Time Physical Activity', 'Less than 7 Hours of Sleep'
       ]]