# Day 2 — Logistic Regression (NumPy from scratch)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression as SkLogReg
np.random.seed(42)
# Data
X,y = make_classification(n_samples=600, n_features=2, n_redundant=0, n_informative=2,
                          n_clusters_per_class=1, class_sep=1.5, random_state=42)
X = StandardScaler().fit_transform(X)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)
# Helpers
def sigmoid(z):
    return 1/(1+np.exp(-z))
def predict_proba(W,b,X):
    return sigmoid(X@W + b)
def predict(W,b,X):
    return (predict_proba(W,b,X)>=0.5).astype(int)
def bce_loss(y_hat,y):
    eps=1e-9
    return -(y*np.log(y_hat+eps)+(1-y)*np.log(1-y_hat+eps)).mean()
# Train (batch GD)
def fit(X,y,lr=0.1,epochs=2000):
    n,d = X.shape
    W = np.zeros(d); b = 0.0
    for t in range(epochs):
        y_hat = predict_proba(W,b,X)
        grad_W = X.T@(y_hat - y)/n
        grad_b = (y_hat - y).mean()
        W -= lr*grad_W; b -= lr*grad_b
        if t%400==0:
            print(f'epoch {t}: loss={bce_loss(y_hat,y):.4f}')
    return W,b
W,b = fit(X_train,y_train,lr=0.1,epochs=2000)
train_acc = (predict(W,b,X_train)==y_train).mean()
test_acc  = (predict(W,b,X_test)==y_test).mean()
print('NumPy train acc:', round(train_acc,3), ' test acc:', round(test_acc,3))
# Sklearn parity
sk = SkLogReg().fit(X_train,y_train)
print('sklearn train acc:', round(sk.score(X_train,y_train),3), ' test acc:', round(sk.score(X_test,y_test),3))
# Decision boundary
xx,yy = np.meshgrid(np.linspace(X[:,0].min()-1, X[:,0].max()+1, 200),
                     np.linspace(X[:,1].min()-1, X[:,1].max()+1, 200))
grid = np.c_[xx.ravel(), yy.ravel()]
zz = predict(W,b,grid).reshape(xx.shape)
plt.contourf(xx,yy,zz,alpha=0.2)
plt.scatter(X_test[:,0],X_test[:,1],c=y_test,edgecolor='k',alpha=0.8)
plt.title('NumPy logistic regression — decision boundary'); plt.show()