In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
from dnn_app_utils_v4 import *

In [3]:
df = pd.read_csv("heart.csv")
df

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat,1


In [4]:
df = pd.get_dummies(df, columns=['Sex', 'ChestPainType','RestingECG','ExerciseAngina','ST_Slope'])
df

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease,Sex_F,Sex_M,ChestPainType_ASY,...,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ExerciseAngina_N,ExerciseAngina_Y,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
0,40,140,289,0,172,0.0,0,0,1,0,...,0,0,0,1,0,1,0,0,0,1
1,49,160,180,0,156,1.0,1,1,0,0,...,1,0,0,1,0,1,0,0,1,0
2,37,130,283,0,98,0.0,0,0,1,0,...,0,0,0,0,1,1,0,0,0,1
3,48,138,214,0,108,1.5,1,1,0,1,...,0,0,0,1,0,0,1,0,1,0
4,54,150,195,0,122,0.0,0,0,1,0,...,1,0,0,1,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,110,264,0,132,1.2,1,0,1,0,...,0,1,0,1,0,1,0,0,1,0
914,68,144,193,1,141,3.4,1,0,1,1,...,0,0,0,1,0,1,0,0,1,0
915,57,130,131,0,115,1.2,1,0,1,1,...,0,0,0,1,0,0,1,0,1,0
916,57,130,236,0,174,0.0,1,1,0,0,...,0,0,1,0,0,1,0,0,1,0


In [5]:
num_list= ["Age","RestingBP","Cholesterol","FastingBS","MaxHR","Oldpeak"]
for i in num_list:
    mean = df[i].mean()
    std = df[i].std()
    df[i] = (df[i]-mean)/std
df.head()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease,Sex_F,Sex_M,ChestPainType_ASY,...,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ExerciseAngina_N,ExerciseAngina_Y,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
0,-1.432359,0.410685,0.824621,-0.551041,1.382175,-0.831979,0,0,1,0,...,0,0,0,1,0,1,0,0,0,1
1,-0.478223,1.49094,-0.171867,-0.551041,0.753746,0.105606,1,1,0,0,...,1,0,0,1,0,1,0,0,1,0
2,-1.750404,-0.129442,0.769768,-0.551041,-1.524307,-0.831979,0,0,1,0,...,0,0,0,0,1,1,0,0,0,1
3,-0.584238,0.30266,0.138964,-0.551041,-1.131539,0.574398,1,1,0,1,...,0,0,0,1,0,0,1,0,1,0
4,0.051853,0.950812,-0.034736,-0.551041,-0.581664,-0.831979,0,0,1,0,...,1,0,0,1,0,1,0,0,0,1


In [6]:
Y = df['HeartDisease']
X = df.drop(columns=["HeartDisease"])

In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=123)
Y_train = np.expand_dims(y_train,axis=1)
Y_test = np.expand_dims(y_test,axis=1)
X_train, X_test, Y_train, Y_test = X_train.T, X_test.T, Y_train.T, Y_test.T

In [8]:
print("Training X: ",X_train.shape)
print("Training Y: ",Y_train.shape)
print("Testing X: ",X_test.shape)
print("Testing Y: ",Y_test.shape)

Training X:  (20, 734)
Training Y:  (1, 734)
Testing X:  (20, 184)
Testing Y:  (1, 184)


In [9]:
def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, lamb= 0.5, print_cost=False):
    """
    Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID.
    
    Arguments:
    X -- data, numpy array of shape (num_px * num_px * 3, number of examples)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
    layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).
    learning_rate -- learning rate of the gradient descent update rule
    num_iterations -- number of iterations of the optimization loop
    print_cost -- if True, it prints the cost every 100 steps
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """

    np.random.seed(3)
    costs = []                         # keep track of cost
    
    # Parameters initialization.
    parameters = initialize_parameters_deep(layers_dims)
    # Loop (gradient descent)
    for i in range(0, num_iterations):
        # Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.
        AL, caches = L_model_forward(X, parameters)
        # Compute cost.
        cost = compute_cost(AL, Y, parameters, lamb)
        # Backward propagation.
        grads = L_model_backward(AL, Y, caches, lamb)
        # Update parameters.
        parameters = update_parameters(parameters, grads, learning_rate)
                
        # Print the cost every 100 iterations
        if print_cost and i % 100 == 0 or i == num_iterations - 1:
            print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
        if i % 100 == 0 or i == num_iterations:
            costs.append(cost)
    
    return parameters, costs

In [10]:
input_size = X_train.shape[0]
input_size

20

In [11]:
#layers_dims = [11, 7, 5, 1] 
layers_dims = [20, 16, 4, 1] 
parameters,costs = L_layer_model(X_train,Y_train,layers_dims, learning_rate= 0.01, num_iterations = 15000,lamb= 0.9, print_cost = True)

Cost after iteration 0: 0.8345876085585237
Cost after iteration 100: 0.72914025192822
Cost after iteration 200: 0.6967096062396183
Cost after iteration 300: 0.6618054142676575
Cost after iteration 400: 0.6236663243504347
Cost after iteration 500: 0.5883915836269332
Cost after iteration 600: 0.5574175828283977
Cost after iteration 700: 0.5299329274579415
Cost after iteration 800: 0.5061759970665944
Cost after iteration 900: 0.4859857479121588
Cost after iteration 1000: 0.4685670848519583
Cost after iteration 1100: 0.45372138540993906
Cost after iteration 1200: 0.44122212282973383
Cost after iteration 1300: 0.4306739624627124
Cost after iteration 1400: 0.4216647839571669
Cost after iteration 1500: 0.4139757024459162
Cost after iteration 1600: 0.4073320167827372
Cost after iteration 1700: 0.4014938672186916
Cost after iteration 1800: 0.3963847613125429
Cost after iteration 1900: 0.3918101857290017
Cost after iteration 2000: 0.3876305839627428
Cost after iteration 2100: 0.38382823308776415

In [12]:
pred_train = predict(X_train,Y_train, parameters)

Accuracy: 0.9318801089918254


In [13]:
pred_test = predict(X_test,Y_test, parameters)

Accuracy: 0.8804347826086956
