# Notebook for Model Training

## Importing modules

In [1]:
import pandas as pd
import numpy as np

#For visualizations
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats

from mlxtend.preprocessing import minmax_scaling

## Importing Dataset

In [2]:
dataset = pd.read_csv("preprocessed_data.csv")
dataset.head()

Unnamed: 0.1,Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,0,53,0,4.0,130.0,264.0,0.0,2.0,143.0,0.0,0.4,2.0,0.0,3.0,0
1,1,56,1,4.0,132.0,184.0,0.0,2.0,105.0,1.0,2.1,2.0,1.0,6.0,1
2,2,58,1,4.0,130.0,0.0,0.0,1.0,100.0,1.0,1.0,2.0,0.676375,6.0,1
3,3,44,1,2.0,130.0,215.0,0.0,0.0,135.0,0.0,0.0,2.0,0.676375,5.087558,0
4,4,53,1,4.0,142.0,226.0,0.0,2.0,111.0,1.0,0.0,1.0,0.0,7.0,0


In [3]:
dataset.dtypes

Unnamed: 0      int64
age             int64
sex             int64
cp            float64
trestbps      float64
chol          float64
fbs           float64
restecg       float64
thalach       float64
exang         float64
oldpeak       float64
slope         float64
ca            float64
thal          float64
target          int64
dtype: object

## Scaling and Normalization

#### Extracting columns from Dataframe

In [6]:
age = pd.DataFrame(dataset.age)
cp = pd.DataFrame(dataset.cp)
trestbps = pd.DataFrame(dataset.trestbps)
chol = pd.DataFrame(dataset.chol)
fbs = pd.DataFrame(dataset.fbs)
restecg = pd.DataFrame(dataset.restecg)
thalach = pd.DataFrame(dataset.thalach)
exang = pd.DataFrame(dataset.exang)
oldpeak = pd.DataFrame(dataset.oldpeak)
slope = pd.DataFrame(dataset.slope)
ca = pd.DataFrame(dataset.ca)
thal = pd.DataFrame(dataset.thal)

#### Applying scaling (min-max scaling) : effect would result in a value between 0 and 1

In [7]:
age_scaled = minmax_scaling(age, columns=['age'])
cp_scaled = minmax_scaling(cp, columns=['cp'])
trestbps_scaled = minmax_scaling(trestbps, columns=['trestbps'])
chol_scaled = minmax_scaling(chol, columns=['chol'])
fbs_scaled = minmax_scaling(fbs, columns=['fbs'])
restecg_scaled = minmax_scaling(restecg, columns=['restecg'])
thalach_scaled = minmax_scaling(thalach, columns=['thalach'])
exang_scaled = minmax_scaling(exang, columns=['exang'])
oldpeak_scaled = minmax_scaling(oldpeak, columns=['oldpeak'])
slope_scaled = minmax_scaling(slope, columns=['slope'])
ca_scaled = minmax_scaling(ca, columns=['ca'])
thal_scaled = minmax_scaling(thal, columns=['thal'])


data = {'age' : age_scaled.age, 'sex': dataset.sex,  'cp' : cp_scaled.cp, 'trestbps': trestbps_scaled.trestbps,
        'chol': chol_scaled.chol, 'fbs' : fbs_scaled.fbs, 'restecg' : restecg_scaled.restecg, 
        'thalach': thalach_scaled.thalach, 'exang': exang_scaled.exang, 'oldpeak': oldpeak_scaled.oldpeak,
        'slope': slope_scaled.slope, 'ca': ca_scaled.ca, 'thal' : thal_scaled.thal, 'target': dataset.target}
scaled_dataset = pd.DataFrame(data)
scaled_dataset

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,0.510204,0,1.000000,0.650,0.437811,0.000000,1.0,0.584507,0.0,0.340909,0.5,0.000000,0.00000,0
1,0.571429,1,1.000000,0.660,0.305141,0.000000,1.0,0.316901,1.0,0.534091,0.5,0.333333,0.75000,1
2,0.612245,1,1.000000,0.650,0.000000,0.000000,0.5,0.281690,1.0,0.409091,0.5,0.225458,0.75000,1
3,0.326531,1,0.333333,0.650,0.356551,0.000000,0.0,0.528169,0.0,0.295455,0.5,0.225458,0.52189,0
4,0.510204,1,1.000000,0.710,0.374793,0.000000,1.0,0.359155,1.0,0.295455,0.0,0.000000,1.00000,0
5,0.673469,1,0.000000,0.710,0.331675,1.000000,0.5,0.281690,0.0,0.465909,1.0,0.225458,0.52189,1
6,0.387755,1,1.000000,0.550,0.000000,0.166265,0.5,0.626761,0.0,0.534091,0.0,0.225458,0.52189,1
7,0.836735,1,1.000000,0.675,0.000000,0.000000,0.0,0.492958,0.0,0.295455,0.5,0.225458,0.75000,1
8,0.693878,1,1.000000,0.790,0.348259,1.000000,0.0,0.366197,1.0,0.636364,1.0,0.225458,0.52189,1
9,0.367347,1,0.333333,0.700,0.456053,0.000000,0.0,0.739437,1.0,0.295455,0.5,0.225458,0.52189,0


## Saving the scaled dataframe to disk

In [10]:
scaled_dataset.to_csv("scaled_data.csv")

## Starting model training

In [8]:
import tensorflow as tf

In [9]:
model = tf.keras.models.Sequential()

In [10]:
model.add(tf.keras.layers.Dense(13, input_dim = 13, activation='relu'))

In [11]:
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

In [14]:
model.compile(optimizer="Adam", loss="binary_crossentropy", metrics=["acc"])

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 13)                182       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 14        
Total params: 196
Trainable params: 196
Non-trainable params: 0
_________________________________________________________________
