In [28]:
import pandas as pd
import numpy as np

In [30]:
df = pd.read_csv("Cardiovascular_Disease_Dataset.csv")
print(df.head())

   patientid  age  gender  chestpain  restingBP  serumcholestrol  \
0     103368   53       1          2        171                0   
1     119250   40       1          0         94              229   
2     119372   49       1          2        133              142   
3     132514   43       1          0        138              295   
4     146211   31       1          1        199                0   

   fastingbloodsugar  restingrelectro  maxheartrate  exerciseangia  oldpeak  \
0                  0                1           147              0      5.3   
1                  0                1           115              0      3.7   
2                  0                0           202              1      5.0   
3                  1                1           153              0      3.2   
4                  0                2           136              0      5.3   

   slope  noofmajorvessels  target  
0      3                 3       1  
1      1                 1       0  
2    

In [32]:
x_train = df.drop(["target","patientid"],axis=1)
y_train = df["target"]
print(x_train)
print(y_train)

     age  gender  chestpain  restingBP  serumcholestrol  fastingbloodsugar  \
0     53       1          2        171                0                  0   
1     40       1          0         94              229                  0   
2     49       1          2        133              142                  0   
3     43       1          0        138              295                  1   
4     31       1          1        199                0                  0   
..   ...     ...        ...        ...              ...                ...   
995   48       1          2        139              349                  0   
996   47       1          3        143              258                  1   
997   69       1          0        156              434                  1   
998   45       1          1        186              417                  0   
999   25       1          0        158              270                  0   

     restingrelectro  maxheartrate  exerciseangia  oldpeak  slo

In [34]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [36]:
def compute_logistic_cost(x,y,w,b):
    m = x.shape[0]
    cost = 0
    for i in range(m):
        z = np.dot(w,x[i]) + b
        f_wb = sigmoid(z)
        cost += -y[i] * np.log(f_wb) - (1-y[i])*np.log(1-f_wb)
    return cost/m
        

In [38]:
def compute_gradient(x,y,w,b):
    m,n = x.shape
    dj_dw = np.zeros(n)
    dj_db = 0
    for i in range(m):
        f_wb = sigmoid(np.dot(x[i],w)+b)
        err = f_wb - y[i]
        for j in range(n):
            dj_dw[j] += err * x[i,j]
        dj_db += err
    return dj_dw/m,dj_db/m

In [68]:
def gradient_descent(x,y,w_in,b_in,num_iters,alpha):
    w = w_in
    b = b_in
    j = []
    for i in range(num_iters):
        dj_dw,dj_db = compute_gradient(x,y,w,b)

        w = w - alpha*dj_dw
        b = b - alpha*dj_db

        if i<100000:
            j.append(compute_logistic_cost(x,y,w,b))

        if i%1000 == 0:
            print(f"Iteration : {i} - Cost : {j[-1]}")
    
    return w,b

In [112]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
n = x_train.shape[1]
alpha = 0.1
num_iters = 5000
w = np.zeros(n,)
b = 0
w,b = gradient_descent(x_train_scaled,y_train,w,b,num_iters,alpha)
print(f"w : {w}, b: {b}")

Iteration : 0 - Cost : 0.6502780338088572
Iteration : 1000 - Cost : 0.10984134672046524
Iteration : 2000 - Cost : 0.09795571702663604
Iteration : 3000 - Cost : 0.0935133342600591
Iteration : 4000 - Cost : 0.09130713738592948
w : [-0.05393373  1.05896925  0.97800913  1.01480582  0.07969583  0.37143673
  0.84281971  0.45919117 -0.03776919 -1.60341286  5.68918438  0.16442502], b: 1.5980909328308992


In [154]:
x_test = [[53,1,2,171,0,0,1,147,0,5.3,3,3]]
x_test_df = pd.DataFrame(x_test, columns=x_train.columns)
x_test_scaled = scaler.transform(x_test_df)
print(sigmoid(np.dot(x_test_scaled,w)+b))

[0.99995184]


In [156]:
m = x_train.shape[0]
f_wb = np.zeros(m)
for i in range(m):
    f_wb[i] = sigmoid(np.dot(w, x_train_scaled[i]) + b)

# Convert probabilities to 0/1 labels before accuracy
pred_labels = (f_wb >= 0.4).astype(int)

from sklearn.metrics import accuracy_score
acc = accuracy_score(y_train, pred_labels)
print(f"Accuracy: {acc*100:.2f}%")



Accuracy: 97.20%
