In [None]:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)

def train_test_split(X, y, test_size=0.25):
    n = len(X)
    idx = np.random.permutation(n)
    n_te = int(n * test_size)
    te, tr = idx[:n_te], idx[n_te:]
    return X[tr], X[te], y[tr], y[te]

def standardize(X):
    mu = X.mean(axis=0, keepdims=True)
    std = X.std(axis=0, keepdims=True) + 1e-12
    return (X - mu)/std, mu, std

In [None]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=800, n_features=4, n_informative=3,
                           class_sep=1.5, random_state=42)
X, mu, std = standardize(X)
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.25)
X_tr.shape, X_te.shape

In [None]:
def add_bias(X):
    return np.hstack([np.ones((X.shape[0], 1)), X])

def sigmoid(z):
    z = np.clip(z, -20, 20)
    return 1. / (1. + np.exp(-z))

def loss_and_grad(w, X, y, l2=0.0):
    # X: [N, d], w: [d], y: [N]
    N = X.shape[0]
    p = sigmoid(X @ w)
    # 交叉熵 + L2（不惩罚偏置项）
    l2_term = 0.5 * l2 * (w[1:] @ w[1:])
    loss = - (y*np.log(p+1e-12) + (1-y)*np.log(1-p+1e-12)).mean() + l2_term
    grad = (X.T @ (p - y))/N
    grad[1:] += l2 * w[1:]
    return loss, grad

def fit_logreg(X, y, lr=0.1, epochs=300, l2=0.0):
    Xb = add_bias(X)
    w = np.zeros(Xb.shape[1])
    hist = []
    for t in range(epochs):
        L, g = loss_and_grad(w, Xb, y, l2=l2)
        w -= lr * g
        if (t+1)%20==0 or t==0:
            hist.append(L)
    return w, np.array(hist)

def predict_proba(X, w):
    return sigmoid(add_bias(X) @ w)

def predict(X, w, thresh=0.5):
    return (predict_proba(X, w) >= thresh).astype(int)

In [None]:
def grad_check(X, y, l2=0.0, eps=1e-5):
    Xb = add_bias(X)
    w0 = np.random.randn(Xb.shape[1]) * 0.1
    _, g = loss_and_grad(w0, Xb, y, l2=l2)
    num_g = np.zeros_like(w0)
    for i in range(len(w0)):
        w1 = w0.copy(); w1[i] += eps
        w2 = w0.copy(); w2[i] -= eps
        L1,_ = loss_and_grad(w1, Xb, y, l2=l2)
        L2,_ = loss_and_grad(w2, Xb, y, l2=l2)
        num_g[i] = (L1 - L2) / (2*eps)
    rel_err = np.linalg.norm(g - num_g) / (np.linalg.norm(g) + np.linalg.norm(num_g) + 1e-12)
    return rel_err

rel = grad_check(X_tr[:64], y_tr[:64], l2=0.1)
print("gradient check relative error:", rel)
assert rel < 1e-6

In [None]:
w, hist = fit_logreg(X_tr, y_tr, lr=0.2, epochs=400, l2=0.1)
y_hat_tr = predict(X_tr, w); y_hat_te = predict(X_te, w)
acc_tr = (y_hat_tr==y_tr).mean(); acc_te=(y_hat_te==y_te).mean()
print(f"train acc={acc_tr:.3f}, test acc={acc_te:.3f}")

plt.plot(hist); plt.title("Training loss"); plt.xlabel("checkpoints"); plt.ylabel("loss"); plt.show()