In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [17]:

data = pd.read_csv('../data/framingham.csv')
data.head()


Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


In [3]:
print(data.isnull().sum())

male                 0
age                  0
education          105
currentSmoker        0
cigsPerDay          29
BPMeds              53
prevalentStroke      0
prevalentHyp         0
diabetes             0
totChol             50
sysBP                0
diaBP                0
BMI                 19
heartRate            1
glucose            388
TenYearCHD           0
dtype: int64


In [4]:
data.fillna(data.mean(), inplace=True)

In [18]:
X = data.drop('TenYearCHD', axis=1)  # Assuming 'TenYearCHD' is the target column
y = data['TenYearCHD']

In [19]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [32]:
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)



Shape of X_train: (2966, 15)
Shape of y_train: (2966,)


Step 1: Initialize Parameters

In [39]:
def initialize_parameters(n_features):
    weights = np.zeros(n_features)  # Shape: (15,)
    bias = 0.0  # Scalar
    return weights, bias


Step 2: Sigmoid Function

In [21]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


Step 3: Model Prediction

In [40]:
def predict(X, weights, bias):
    z = np.dot(X, weights) + bias  # z has shape (2966,)
    return sigmoid(z)  # y_pred has shape (2966,)


Step 4: Cost Function

In [41]:
def compute_cost(y_true, y_pred):
    m = y_true.shape[0]  # m = 2966
    y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)  # Ensure y_pred is in range (0, 1)
    cost = -1/m * np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))  # Cost is scalar
    return cost


Step 5: Gradient Descent

In [42]:
def gradient_descent(X, y_true, y_pred, weights, bias, learning_rate):
    m = X.shape[0]  # m = 2966
    dw = 1/m * np.dot(X.T, (y_pred - y_true))  # dw shape: (15,)
    db = 1/m * np.sum(y_pred - y_true)  # db is scalar
    weights -= learning_rate * dw  # Update weights, shape (15,)
    bias -= learning_rate * db  # Update bias, scalar
    return weights, bias


Step 6: Training the Model

In [43]:
def train(X, y, learning_rate=0.01, epochs=1000):
    n_features = X.shape[1]
    weights, bias = initialize_parameters(n_features)
    
    for i in range(epochs):
        # Get predictions
        y_pred = predict(X, weights, bias)
        
        # Compute cost
        cost = compute_cost(y, y_pred)
        
        # Update parameters
        weights, bias = gradient_descent(X, y, y_pred, weights, bias, learning_rate)
        
        # Print the cost every 100 iterations
        if i % 100 == 0:
            print(f"Epoch {i}: Cost = {cost}")
    
    return weights, bias


In [36]:
def predict_class(X, weights, bias, threshold=0.5):
    y_pred = predict(X, weights, bias)
    return (y_pred >= threshold).astype(int)


In [44]:
def evaluate(y_true, y_pred):
    accuracy = np.mean(y_true == y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")


In [45]:
# Training the model
weights, bias = train(X_train, y_train, learning_rate=0.01, epochs=1000)

# Making predictions
y_pred_test = predict_class(X_test, weights, bias)

# Evaluating accuracy
evaluate(y_test, y_pred_test)


Epoch 0: Cost = 0.592423500579724
Epoch 100: Cost = -0.0
Epoch 200: Cost = -0.0
Epoch 300: Cost = -0.0
Epoch 400: Cost = -0.0
Epoch 500: Cost = -0.0
Epoch 600: Cost = -0.0
Epoch 700: Cost = -0.0
Epoch 800: Cost = -0.0
Epoch 900: Cost = -0.0
Accuracy: 85.22%
