In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df=pd.read_csv("heart.csv")
df['Sex']=df['Sex'].map({'M': 1, 'F': 0})
df['ExerciseAngina']=df['ExerciseAngina'].map({'N': 0, 'Y': 1})
df = pd.get_dummies(df, columns=['ST_Slope','ChestPainType','RestingECG'], drop_first=False)

In [3]:
n_entries = len(df)
shuffled_indices = np.random.permutation(n_entries)
train_size = int(n_entries * 0.8)
train_indices = shuffled_indices[:train_size]
test_indices = shuffled_indices[train_size:]
train_df = df.iloc[train_indices]
test_df = df.iloc[test_indices]

In [4]:
train_df

Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,HeartDisease,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST
36,65,1,140,306,1,87,1,1.5,1,False,True,False,True,False,False,False,False,True,False
312,41,1,125,0,1,176,0,1.6,1,False,False,True,True,False,False,False,False,True,False
647,37,0,120,215,0,170,0,0.0,0,False,False,True,False,False,True,False,False,True,False
292,53,1,130,182,0,148,0,0.0,0,False,False,True,True,False,False,False,False,True,False
493,51,1,137,339,0,127,1,1.7,1,False,True,False,False,False,True,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
484,65,1,134,0,0,112,1,1.1,1,False,True,False,True,False,False,False,False,True,False
605,51,0,114,258,1,96,0,1.0,0,False,False,True,True,False,False,False,True,False,False
443,60,1,130,186,1,140,1,0.5,1,False,True,False,True,False,False,False,False,False,True
250,44,1,135,491,0,135,0,0.0,1,False,True,False,True,False,False,False,False,True,False


In [5]:
def sigmoid(z):
    y_head = 1 / (1+np.exp(-z))
    return y_head

In [6]:
def Cost(x,y,theta):
    length=len(y)
    h=sigmoid(x@theta)
    cost=-(1/length)*(y.T@np.log(h+1e-5)+(1-y).T@np.log(1-h+1e-5))
    return cost[0][0]

In [7]:
def gradientDes(x,y,alpha,iteration):
    m,n=x.shape
    theta=np.zeros((n,1))
    prevcosts=[]

    for _ in range(iteration):
        z=x@theta
        h=sigmoid(z)
        gradient = (1/m) * (x.T @ (h - y))
        theta -= alpha * gradient
        cost = Cost(x, y, theta)
        prevcosts.append(cost)
    return theta, prevcosts

In [8]:
x = train_df.drop(columns='HeartDisease').values
y = train_df['HeartDisease'].values.reshape(-1, 1)
y=y.astype(float)
x=x.astype(float)
xmean=x.mean(axis=0)
xstd=x.std(axis=0)
x = (x - xmean) / xstd
bias = np.ones((x.shape[0], 1))
x_scaled = np.concatenate([bias, x], axis=1)
x=x_scaled

In [9]:
theta, cost_history = gradientDes(x, y,0.1,5000)

In [12]:
def predict(x, theta):
    probs = sigmoid(x @ theta)
    return (probs >= 0.5).astype(int)
def TrainingAcc(x,y):
    y_pred = predict(x, theta)
    accuracy = (y_pred == y).mean() * 100
    print(f"Training Accuracy: {accuracy:.2f}%")

In [14]:
x_test=test_df.drop(columns='HeartDisease').values
y_test=test_df.HeartDisease.values.reshape(-1,1)
y_test=y_test.astype(float)
x_test=x_test.astype(float)
x_test = (x_test - xmean) / xstd
bias = np.ones((x_test.shape[0], 1))
x_test_scaled = np.concatenate([bias, x_test], axis=1)
x_test=x_test_scaled

In [15]:
TrainingAcc(x_test,y_test)

Training Accuracy: 85.33%
