<a href="https://colab.research.google.com/github/ishanjabade/BML_ISHAN/blob/main/Experiment_4_Logisitic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Experiment 4: Build a Logisitic Regression Model on suitable dataset.**

**Dataset Used: Heart Disease Dataset**

Step 1: Load And Prepare Dataset

In [4]:
import pandas as pd
import numpy as np

df = pd.read_csv('/content/heart.csv')

print(df.head())
print(df.shape)

# Check columns
print(df.columns)

# Target column is "target", convert to 0/1 (already 0/1)
X = df.drop("target", axis=1)
y = df["target"]

# Scale
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training shape:", X_train.shape)
print("Test shape:", X_test.shape)


   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   52    1   0       125   212    0        1      168      0      1.0      2   
1   53    1   0       140   203    1        0      155      1      3.1      0   
2   70    1   0       145   174    0        1      125      1      2.6      0   
3   61    1   0       148   203    0        1      161      0      0.0      2   
4   62    0   0       138   294    1        1      106      0      1.9      1   

   ca  thal  target  
0   2     3       0  
1   0     3       0  
2   0     3       0  
3   1     3       0  
4   3     2       0  
(1025, 14)
Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')
Training shape: (820, 13)
Test shape: (205, 13)


**Step 2: Manual Implementation**

In [7]:
import pandas as pd
import numpy as np

# Load Kaggle heart.csv
df = pd.read_csv('/content/heart.csv')

# Separate features and target
X = df.drop("target", axis=1).values
y = df["target"].values.reshape(-1, 1)

# Scale features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Logistic Regression from scratch
def train_logistic_regression(X, y, lr=0.01, epochs=2000):
    m, n = X.shape
    W = np.zeros((n, 1))
    b = 0

    for i in range(epochs):
        # Forward pass
        z = np.dot(X, W) + b
        y_pred = sigmoid(z)

        # Loss: binary cross entropy
        cost = -(1/m) * np.sum(y * np.log(y_pred + 1e-9) +
                               (1-y) * np.log(1-y_pred + 1e-9))

        # Gradients
        dW = (1/m) * np.dot(X.T, (y_pred - y))
        db = (1/m) * np.sum(y_pred - y)

        # Update
        W -= lr * dW
        b -= lr * db

        if i % 200 == 0:
            print(f"Epoch {i}, Loss = {cost:.4f}")

    return W, b

# Train
W, b = train_logistic_regression(X_train, y_train)

# Prediction
def predict(X, W, b):
    z = np.dot(X, W) + b
    y_pred = sigmoid(z)
    return (y_pred >= 0.5).astype(int)

# Test accuracy
y_pred_test = predict(X_test, W, b)
accuracy = np.mean(y_pred_test == y_test)

print("Manual Logistic Regression Accuracy:", accuracy)


Epoch 0, Loss = 0.6931
Epoch 200, Loss = 0.4337
Epoch 400, Loss = 0.3826
Epoch 600, Loss = 0.3625
Epoch 800, Loss = 0.3520
Epoch 1000, Loss = 0.3458
Epoch 1200, Loss = 0.3417
Epoch 1400, Loss = 0.3388
Epoch 1600, Loss = 0.3368
Epoch 1800, Loss = 0.3353
Manual Logistic Regression Accuracy: 0.7902439024390244
