# 极简逻辑回归：Python实现

In [None]:
import numpy as np
from sklearn.datasets import make_classification
import plotly.graph_objects as go

# 生成模拟数据
X, y = make_classification(n_samples=200, n_features=2, n_redundant=0, n_clusters_per_class=1, random_state=42)
y = y.reshape(-1, 1)

# 可视化数据分布
fig = go.Figure()
fig.add_trace(go.Scatter(x=X[y[:,0]==0,0], y=X[y[:,0]==0,1], mode='markers', name='类别0'))
fig.add_trace(go.Scatter(x=X[y[:,0]==1,0], y=X[y[:,0]==1,1], mode='markers', name='类别1'))
fig.update_layout(title='模拟二分类数据', xaxis_title='x1', yaxis_title='x2', title_x=0.5)
fig.show()

In [None]:
# Sigmoid函数
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [None]:
# 逻辑回归梯度下降实现
def logistic_regression(X, y, lr=0.1, n_iters=200):
    m, n = X.shape
    X_b = np.c_[np.ones((m, 1)), X]  # 增加偏置项
    w = np.zeros((n+1, 1))
    losses = []
    for i in range(n_iters):
        z = X_b @ w
        y_pred = sigmoid(z)
        loss = -np.mean(y * np.log(y_pred + 1e-8) + (1-y) * np.log(1-y_pred + 1e-8))
        grad = X_b.T @ (y_pred - y) / m
        w -= lr * grad
        losses.append(loss)
    return w, losses

w, losses = logistic_regression(X, y, lr=0.2, n_iters=200)
print(f'训练结束，最终损失：{losses[-1]:.4f}')

In [None]:
# 可视化损失曲线
fig_loss = go.Figure()
fig_loss.add_trace(go.Scatter(y=losses, mode='lines', name='训练损失'))
fig_loss.update_layout(title='逻辑回归损失曲线', xaxis_title='迭代次数', yaxis_title='损失', title_x=0.5)
fig_loss.show()

In [None]:
# 可视化决策边界
x_min, x_max = X[:,0].min()-1, X[:,0].max()+1
y_min, y_max = X[:,1].min()-1, X[:,1].max()+1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200), np.linspace(y_min, y_max, 200))
grid = np.c_[np.ones(xx.ravel().shape), xx.ravel(), yy.ravel()]
probs = sigmoid(grid @ w).reshape(xx.shape)

fig = go.Figure()
fig.add_trace(go.Contour(z=probs, x=np.linspace(x_min, x_max, 200), y=np.linspace(y_min, y_max, 200), showscale=False, contours_coloring='lines', line_width=2, name='决策边界'))
fig.add_trace(go.Scatter(x=X[y[:,0]==0,0], y=X[y[:,0]==0,1], mode='markers', name='类别0'))
fig.add_trace(go.Scatter(x=X[y[:,0]==1,0], y=X[y[:,0]==1,1], mode='markers', name='类别1'))
fig.update_layout(title='逻辑回归决策边界', xaxis_title='x1', yaxis_title='x2', title_x=0.5)
fig.show()

In [None]:
# 计算准确率
from sklearn.metrics import accuracy_score
X_b = np.c_[np.ones((X.shape[0], 1)), X]
y_pred = sigmoid(X_b @ w) >= 0.5
acc = accuracy_score(y, y_pred)
print(f'训练集准确率：{acc:.2%}')