Logistic Regression Using Numpy

In [1]:
import numpy as np

class LogisticRegressionScratch:
    def __init__(self, lr=0.1, epochs=1000):
        self.lr = lr
        self.epochs = epochs

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.epochs):
            linear_output = np.dot(X, self.w) + self.b
            y_pred = self.sigmoid(linear_output)

            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            self.w -= self.lr * dw
            self.b -= self.lr * db

    def predict_proba(self, X):
        return self.sigmoid(np.dot(X, self.w) + self.b)

    def predict(self, X):
        proba = self.predict_proba(X)
        return np.where(proba >= 0.5, 1, 0)


In [2]:
import numpy as np

# Feature: Age
X = np.array([[22], [25], [47], [52], [46], [56], [48], [29]])

# Target: Purchased (0 = No, 1 = Yes)
y = np.array([0, 0, 1, 1, 1, 1, 1, 0])

# Create and train model
model = LogisticRegressionScratch()
model.fit(X, y)

# Predict labels and probabilities
y_pred = model.predict(X)
y_proba = model.predict_proba(X)

# Output
print("Predicted Labels (0 or 1):", y_pred)
print("\nPredicted Probabilities (class 0, class 1):\n", y_proba)

Predicted Labels (0 or 1): [0 0 1 1 1 1 1 0]

Predicted Probabilities (class 0, class 1):
 [0.00505827 0.01398256 0.96326658 0.99314776 0.94905457 0.99824588
 0.97362412 0.05274324]


Using Scikit-learn

In [3]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X, y)

y_pred = model.predict(X)         # 0 or 1 predictions
y_proba = model.predict_proba(X)  # [prob of 0, prob of 1]

print("Predicted Labels:", y_pred)
print("Predicted Probabilities:", y_proba)

Predicted Labels: [0 0 1 1 1 1 1 0]
Predicted Probabilities: [[9.99225286e-01 7.74714393e-04]
 [9.96733566e-01 3.26643440e-03]
 [7.76836203e-03 9.92231638e-01]
 [7.08023554e-04 9.99291976e-01]
 [1.25004461e-02 9.87499554e-01]
 [1.03662438e-04 9.99896338e-01]
 [4.81888206e-03 9.95181118e-01]
 [9.78093939e-01 2.19060609e-02]]


Metrics

In [4]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print("Accuracy:", accuracy_score(y, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y, y_pred))
print("Classification Report:\n", classification_report(y, y_pred))


Accuracy: 1.0
Confusion Matrix:
 [[3 0]
 [0 5]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         5

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8

