#Problem Definition
Given clinical parameters about a patient, can we predict whether or not they have heart disease?

# Features
Explanation of fields in dataset

## Data Dictionary
1. `age` - age in years
2. `sex` - (1 = male; 0 = female)
3. `cp` - chest pain type
    * 0: Typical angina
    * 1: Atypical angina
    * 2: Non-anginal pain
    * 3: Asymptomatic
4. `trestbps` - resting blood pressure (in mm Hg on admission to the hospital) 
5. `chol` - Serum cholesterole in mg/dl
6. `fbs` - (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)
7. `restecg` - resting electrocardiographic results
    * 0: Nothing to note
    * 1: ST-T Wave abnormality
    * 2: Possible or definite left ventricular hypertrophy
8. `thalach` - maximum heart rate achieved
9. `exang` - exercise induced angina (1 = yes; 0 = no)
10. `oldpeak` - ST depression induced by exercise relative to rest looks at stress of heart during excercise unhealthy heart will stress more
11. `slope` - the slope of the peak exercise ST segment
    * 0: Upsloping: better heart rate with excercise (uncommon)
    * 1: Flatsloping: minimal change (typical healthy heart)
    * 2: Downslopins: signs of unhealthy heart
12. `ca` - number of major vessels (0-3) colored by flourosopy
    * colored vessel means the doctor can see the blood passing through
    * the more blood movement the better (no clots)
13. `thal` - thalium stress result
    * 1,3: normal
    * 6: fixed defect: used to be defect but ok now
    * 7: reversable defect: no proper blood movement when excercising
14. `target` - have disease or not (1=yes, 0=no) (= the predicted attribute)

# Introduction
First, load the appropriate libraries.

In [1]:

!pip install -q seaborn

!pip install -q git+https://github.com/tensorflow/docs

import pathlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from keras.utils.generic_utils import get_custom_objects

print("Tensorflow sürümü:",tf.__version__)

import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

[33m  DEPRECATION: tensorflow-docs was installed using the legacy 'setup.py install' method, because a wheel could not be built for it. A possible replacement is to fix the wheel build issue reported above. You can find discussion regarding this at https://github.com/pypa/pip/issues/8368.[0m


ModuleNotFoundError: No module named 'keras'

In [None]:
dataset_path = "../data/heart.csv"
column_names = ["Age","Gender","Angina","Rest_BP","Cholesterole","Fasting_BS","ECG","Stress_BPM","SI_Angina","Stress_STDep","Slope", "Colored_Vessels","Thalium","Diagnose"]
raw_dataset = pd.read_csv(dataset_path, names=column_names,
                      comment='\t',
                      sep=",", skipinitialspace=True)

df= raw_dataset.copy()
df.head()
#with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#    print(df)

In [None]:
df.info()

In [None]:
df['Gender'] = df['Gender'].map(lambda x: {0: 'Female', 1: 'Male'}.get(x))
df['Angina'] = df['Angina'].map(lambda x: {0: 'Angina', 1: 'Atypical_Angina', 2: 'Non-Anginal'}.get(x))
df['Slope'] = df['Slope'].map(lambda x: {0: 'Upsloping', 1: 'Flatsloping', 2: 'Downsloping'}.get(x))
df['Thalium'] = df['Thalium'].map(lambda x: {6: 'Thalium_Fixed', 7: 'Thalium_Reversable'}.get(x))

df = pd.get_dummies(df, prefix='', prefix_sep='')
df.head()

In [None]:
train_dataset = df.sample(frac=0.8,random_state=0)
test_dataset = df.drop(train_dataset.index)

In [None]:
sns.pairplot(train_dataset[["Age", "Cholesterole", "Stress_BPM", "Rest_BP"]], diag_kind="kde")

In [None]:
train_stats = train_dataset.describe()
train_stats.pop("Diagnose")
train_stats = train_stats.transpose()
train_stats

In [None]:
train_labels = train_dataset.pop('Diagnose')
test_labels = test_dataset.pop('Diagnose')

# Normalize Data
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']

normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

# Building Model

In [None]:
def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation='tanh', input_shape=[len(train_dataset.keys())]),
    layers.Dense(32, activation='tanh'),
    layers.Dense(16, activation='tanh'),
    layers.Dense(16, activation='tanh'),
    layers.Dense(1, activation='sigmoid'),
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.001)

  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse'])
  return model

model = build_model()
model.summary()

Try short batch

In [None]:
example_batch = normed_train_data[:10]
example_result = model.predict(example_batch)
example_result

In [None]:
EPOCHS = 500

history = model.fit(
  normed_train_data, train_labels,
  epochs=EPOCHS, validation_split = 0.2, verbose=0,
  callbacks=[tfdocs.modeling.EpochDots()])

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [None]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)

In [None]:
plotter.plot({'Basic': history}, metric = "mae")
plt.ylim([0, 1])
plt.ylabel('MAE [MPG]')

In [None]:
plotter.plot({'Basic': history}, metric = "mse")
plt.ylim([0, 1])
plt.ylabel('MSE [MPG^2]')

In [None]:
model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

early_history = model.fit(normed_train_data, train_labels, 
                    epochs=EPOCHS, validation_split = 0.2, verbose=0, 
                    callbacks=[early_stop, tfdocs.modeling.EpochDots()])

In [None]:
plotter.plot({'Early Stopping': early_history}, metric = "mae")
plt.ylim([0, 1])
plt.ylabel('MAE [MPG]')

In [None]:
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=2)

print("Testing set Mean Abs Error: {:5.2f} MPG".format(mae))

In [None]:
test_predictions = model.predict(normed_test_data).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
lims = [0, 1]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)


In [None]:

error = test_predictions - test_labels
plt.hist(error, bins = 100)

plt.xlabel("Prediction Error [MPG]")
_ = plt.ylabel("Count")

In [None]:

print(np.mean(error))
print(np.std(error))
print(len(error))
model.save('trained_model.h5')

In [None]:
test_dataset_merged = pd.DataFrame(test_labels, columns=['Diagnose'])
#test_dataset_merged['Diagnose'] = test_labels
test_dataset_merged['Prediction'] = test_predictions

with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(test_dataset_merged)