<h2> Multivariate Logistic Regression Implementation </h2>

In [6]:
import numpy as np
import pandas as pd
import kagglehub
kagglehub.dataset_download("dileep070/heart-disease-prediction-using-logistic-regression")
# Load dataset
path = '/kaggle/input/heart-disease-prediction-using-logistic-regression/framingham.csv'
df = pd.read_csv(path)

Using Colab cache for faster access to the 'heart-disease-prediction-using-logistic-regression' dataset.


'/kaggle/input/heart-disease-prediction-using-logistic-regression'

In [111]:
# Target variable
target = 'TenYearCHD'
X = df.drop(target, axis = 1)
y = df[target].copy()

# Seperate train and test sets
X_train = X[:int(0.9 * len(X))].to_numpy()
X_test = X[int(0.9 * len(X)):].to_numpy()
y_train = y[:int(0.9 * len(y))].to_numpy()
y_test = y[int(0.9 * len(y)):].to_numpy()

# Standerdize X
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [115]:
class LogisticRegression:
    def __init__(self, lr : float = 0.01, input_dim : int = 1) -> None:
        self.lr = lr
        self.input_dim = input_dim
        self.w = np.random.rand(input_dim)
        self.b = np.random.rand()
        return

    def __sigmoid__(self, x : np.ndarray) -> np.ndarray:
        x = np.clip(x, -250, 250)
        return 1 / (1 + np.exp(-x))

    def forward(self, X : np.ndarray) -> np.ndarray:
        y = self.__sigmoid__(X @ self.w.T + self.b)
        return y

    def __optimize__(self, grad_w : np.ndarray, grad_b : np.float64):
        self.b -= self.lr * grad_b
        self.w -= self.lr * grad_w

    def fit(self, X : np.ndarray, y : np.ndarray, epochs : int, X_test : np.ndarray, y_test : np.ndarray) -> None:
        # Avoid nans can be difficult to debug
        X = np.nan_to_num(X)
        for epoch in range(1, epochs + 1):
            # predict train data
            preds = self.forward(X)
            # get gradients
            grad_w = (1/len(X)) * X.T @ (preds - y)
            grad_b = (preds - y).mean()
            # get loss
            loss = -(y * np.log((preds >= 0.5) + 1e-9) + (1-y)*np.log(1 - (preds >= 0.5) + 1e-9)).mean()
            pred_test = self.forward(X_test)
            # get accuracy on test data
            acc_test = ((pred_test >= 0.5) == y_test).mean()
            # tune parameters
            self.__optimize__(grad_w, grad_b)
            if epoch % 20 == 0:
                print(f'Epoch {epoch} / {epochs} Train: {loss} Accuracy: {acc_test}')

model = LogisticRegression(input_dim = X_train.shape[1])
model.fit(X_train, y_train, 1000, X_test, y_test)

Epoch 20 / 1000 Train: 9.50314419159787 Accuracy: 0.5259433962264151
Epoch 40 / 1000 Train: 9.34014000304559 Accuracy: 0.5330188679245284
Epoch 60 / 1000 Train: 9.204303179252024 Accuracy: 0.5377358490566038
Epoch 80 / 1000 Train: 9.101067193168916 Accuracy: 0.5377358490566038
Epoch 100 / 1000 Train: 8.88916174805095 Accuracy: 0.5400943396226415
Epoch 120 / 1000 Train: 8.69899019473996 Accuracy: 0.5471698113207547
Epoch 140 / 1000 Train: 8.574020316849879 Accuracy: 0.5518867924528302
Epoch 160 / 1000 Train: 8.449050438959798 Accuracy: 0.5566037735849056
Epoch 180 / 1000 Train: 8.324080561069717 Accuracy: 0.5613207547169812
Epoch 200 / 1000 Train: 8.150209426613953 Accuracy: 0.5683962264150944
Epoch 220 / 1000 Train: 7.905703143785534 Accuracy: 0.5754716981132075
Epoch 240 / 1000 Train: 7.704664644571057 Accuracy: 0.5943396226415094
Epoch 260 / 1000 Train: 7.492759199453094 Accuracy: 0.6084905660377359
Epoch 280 / 1000 Train: 7.2482529166246765 Accuracy: 0.625
Epoch 300 / 1000 Train: 7.