In [None]:
#importing dependencies 
import pandas as pd
from pathlib import Path 
import seaborn as sns
import tensorflow as tf

In [None]:
#importing medical_df
medical_df = pd.read_csv("resources/medical_df.csv")

In [None]:
diabetes_df = medical_df[['Diabetes','Total Income per Individual',
'Lacking Health Insurance','Binge Drinking','High Blood Pressure',
'Routine Health Checkups','Currently Smoking', 'Dentist Visits', 
'Depression', 'High Cholesterol', 'No Leisure-Time Physical Activity', 
"Less than 7 Hours of Sleep" ]]

diabetes_df

## Splitting Data into training sets 

In [None]:
train_dataset = cancer_df.sample(frac=0.8, random_state=1)
test_dataset = cancer_df.drop(train_dataset.index)

In [None]:
#Inspecting data 

sns.pairplot(train_dataset[['Diabetes','Total Income per Individual',
'Lacking Health Insurance','Binge Drinking','High Blood Pressure']], diag_kind='kde')

In [None]:
train_dataset.describe().transpose()

## Splitting features from labels

In [None]:
train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('Diabetes')
test_labels = test_features.pop('Diabetes')

In [None]:
#Normalization of Data 
train_dataset.describe().transpose()[["mean","std"]]

In [None]:
#Data Normalization 
normalizer = tf.keras.layers.Normalization(axis=-1)

In [None]:
#Fitting state of preprocessing layer to the data by calling Normalization.adapt
normalizer.adapt(np.array(train_features))

In [None]:
#Calculating the mean and varience and store them in a layer, when layer is called, it returns the input data, with each feature indepen normalized 
print(normalizer.mean.numpy())

In [None]:
first = np.array(train_features[:1])

with np.printoptions(precision=2, suppress = True):
    print('First example:' first)
    print()
    print('Normalized: ' , normalizer(first).numpy())

## Linear Regression 

In [None]:
total_income_per_individual_lr = np.array(train_features['Total Income per Individual'])

total_income_per_individual_lr_normalizer = layers.Normalization(input_shape=[1,], axis=None)
total_income_per_individual_lr_normalizer.adapt('Total Income per Individual')

In [None]:
#Build the Keras Sequential model, model will predict cancer rate from total income per individual 

total_income_per_individual_lr_model = tf.kera.Sequential([
    total_income_per_individual_lr_normalizer,
    layers.Dense(units=1)
])
total_income_per_individual_lr_model.summary()

In [None]:
total_income_per_individual_lr_model.predict(total_income_per_individual_lr[:10])

In [None]:
#Compiling loss and optimizer
total_income_per_individual_lr_model.complile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')


In [None]:
#Fittiing model 
history = total_income_per_individual_lr_model.fit(
    train_features['total_income_per_individual_lr'],
    train_labels,
    epochs=100,
    verbose=0,
    #Calculating vallidation results on 20% of the training data 
    validation_split = 0.2)


In [None]:
#Visualizing history 
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [None]:
def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.ylim([0, 10])
  plt.xlabel('Epoch')
  plt.ylabel('Error [MPG]')
  plt.legend()
  plt.grid(True)

In [None]:
plot_loss(history)

In [None]:
#Collecting results to save for later 
test_results = {}

test_results['total_income_per_individual_lr_model'] = total_income_per_individual_lr_model.evaluate(
    test_features["Total Income per Individual"],
    test_labels, verbose=0
)

In [None]:
x = tf.linespace(0.0,250,251)
y = total_income_per_individual_lr_model.predict(x)

In [None]:
def plot_total_income_per_individual_lr(x,y):
    plt.scatter(train_features['total_income_per_individual_lr'], train_labels,label='Data')
    plt.plot(x,y,color='k',label='Predictions')
    plt.xlabel('Total Income Per Individual')
    plt.ylabel(('Diabetes'))

In [None]:
plot_total_income_per_individual_lr (x,y)

## Linear Regression with multiple inputs 

In [None]:
linear_model = tf.keras.Sequential([
    normalizer,
    layers.Dense(units=1)
])

In [None]:
# When Model.predict is call on batch of inputs, it produces units=1 outputs for each example 
linear_model.predict(train_features)[:10]

In [None]:
#When model is called, its weighted matrices will be built-check that the kernal weights(the m in y = mx + b) have a shape of (9,1)
linear_model.layers[1].kernel

In [None]:
#Configuring moel with Keras Model.compile and train with Model.fit for 100 epochs
linear_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [None]:
test_results['linear_model'] = linear_model.evaluate(
    test_features, test_labels, verbose=0)

## Regression with a deep neural network (DNN)


In [None]:
def build_and_compile_model(norm):
  model = keras.Sequential([
      norm,
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
  ])

  model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))
  return model

## Regression using a DNN and multiple inputs

In [None]:
dnn_model = build_and_compile_model(normalizer)
dnn_model.summary()

In [None]:
%%time
history = dnn_model.fit(
    train_features,
    train_labels,
    validation_split=0.2,
    verbose=0, epochs=100)


In [None]:
plot_loss(history)

In [None]:
test_results['dnn_model'] = dnn_model.evaluate(test_features, test_labels, verbose=0)


In [None]:
pd.DataFrame(test_results, index=['Mean absolute error [Cancer Risk]']).T


In [None]:
test_predictions = dnn_model.predict(test_features).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values ')
plt.ylabel('Predictions ')
lims = [0, 50]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)

In [None]:
#Error Distribution
error = test_predictions - test_labels
plt.hist(error, bins=25)
plt.xlabel('Prediction Error ')
plt.ylabel('Count')


In [None]:
#dnn_model.save('dnn_model')


In [None]:
reloaded = tf.keras.models.load_model('dnn_model')

test_results['reloaded'] = reloaded.evaluate(
    test_features, test_labels, verbose=0)

In [None]:
pd.DataFrame(test_results, index=['Mean absolute error ']).T