## Notebook for Model Training

#### Load dataset

In [2]:
import pandas as pd
import numpy as np

#For visualizations
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats

from mlxtend.preprocessing import minmax_scaling


In [4]:
dataset = pd.read_csv("preprocessed_data.csv")
dataset.head()


Unnamed: 0.1,Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,0,53,0,4.0,130.0,264.0,0.0,2.0,143.0,0.0,0.4,2.0,0.0,3.0,0
1,1,56,1,4.0,132.0,184.0,0.0,2.0,105.0,1.0,2.1,2.0,1.0,6.0,1
2,2,58,1,4.0,130.0,0.0,0.0,1.0,100.0,1.0,1.0,2.0,0.676375,6.0,1
3,3,44,1,2.0,130.0,215.0,0.0,0.0,135.0,0.0,0.0,2.0,0.676375,5.087558,0
4,4,53,1,4.0,142.0,226.0,0.0,2.0,111.0,1.0,0.0,1.0,0.0,7.0,0


In [6]:
dataset.dtypes

Unnamed: 0      int64
age             int64
sex             int64
cp            float64
trestbps      float64
chol          float64
fbs           float64
restecg       float64
thalach       float64
exang         float64
oldpeak       float64
slope         float64
ca            float64
thal          float64
target          int64
dtype: object

#### Scaling and Normalization

In [7]:
age = pd.DataFrame(dataset.age)
cp = pd.DataFrame(dataset.cp)
trestbps = pd.DataFrame(dataset.trestbps)
chol = pd.DataFrame(dataset.chol)
fbs = pd.DataFrame(dataset.fbs)
restecg = pd.DataFrame(dataset.restecg)
thalach = pd.DataFrame(dataset.thalach)
exang = pd.DataFrame(dataset.exang)
oldpeak = pd.DataFrame(dataset.oldpeak)
slope = pd.DataFrame(dataset.slope)
ca = pd.DataFrame(dataset.ca)
thal = pd.DataFrame(dataset.thal)

In [9]:
age_scaled = minmax_scaling(age, columns=['age'])
cp_scaled = minmax_scaling(cp, columns=['cp'])
trestbps_scaled = minmax_scaling(trestbps, columns=['trestbps'])
chol_scaled = minmax_scaling(chol, columns=['chol'])
fbs_scaled = minmax_scaling(fbs, columns=['fbs'])
restecg_scaled = minmax_scaling(restecg, columns=['restecg'])
thalach_scaled = minmax_scaling(thalach, columns=['thalach'])
exang_scaled = minmax_scaling(exang, columns=['exang'])
oldpeak_scaled = minmax_scaling(oldpeak, columns=['oldpeak'])
slope_scaled = minmax_scaling(slope, columns=['slope'])
ca_scaled = minmax_scaling(ca, columns=['ca'])
thal_scaled = minmax_scaling(thal, columns=['thal'])


data = {'age' : age_scaled.age, 'sex': dataset.sex,  'cp' : cp_scaled.cp, 'trestbps': trestbps_scaled.trestbps,
        'chol': chol_scaled.chol, 'fbs' : fbs_scaled.fbs, 'restecg' : restecg_scaled.restecg, 
        'thalach': thalach_scaled.thalach, 'exang': exang_scaled.exang, 'oldpeak': oldpeak_scaled.oldpeak,
        'slope': slope_scaled.slope, 'ca': ca_scaled.ca, 'thal' : thal_scaled.thal, 'target': dataset.target}
scaled_dataset = pd.DataFrame(data)
scaled_dataset

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,0.510204,0,1.000000,0.65,0.437811,0.0,1.0,0.584507,0.0,0.340909,0.5,0.000000,0.000000,0
1,0.571429,1,1.000000,0.66,0.305141,0.0,1.0,0.316901,1.0,0.534091,0.5,0.333333,0.750000,1
2,0.612245,1,1.000000,0.65,0.000000,0.0,0.5,0.281690,1.0,0.409091,0.5,0.225458,0.750000,1
3,0.326531,1,0.333333,0.65,0.356551,0.0,0.0,0.528169,0.0,0.295455,0.5,0.225458,0.521889,0
4,0.510204,1,1.000000,0.71,0.374793,0.0,1.0,0.359155,1.0,0.295455,0.0,0.000000,1.000000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5515,0.734694,1,1.000000,0.67,0.452736,0.0,0.0,0.295775,1.0,0.750000,1.0,0.225458,0.521889,1
5516,0.367347,0,1.000000,0.65,0.394693,0.0,0.0,0.211268,0.0,0.295455,0.5,0.225458,0.521889,0
5517,0.775510,1,1.000000,0.80,0.378109,0.0,1.0,0.549296,0.0,0.556818,0.0,0.000000,0.750000,0
5518,0.510204,0,0.666667,0.60,0.454395,0.0,0.0,0.492958,0.0,0.295455,0.5,0.225458,0.521889,0


In [10]:
scaled_dataset.to_csv("scaled_data.csv")