数据预处理

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt

# 加载 Titanic 数据
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
data = pd.read_csv(url)

# 简单预处理（选取特征）
data = data[['Survived', 'Pclass', 'Sex', 'Age', 'Fare']]
data.dropna(inplace=True)
data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})

# 特征和标签
X = data[['Pclass', 'Sex', 'Age', 'Fare']].values
y = data['Survived'].values.reshape(-1, 1)

# 标准化
X = (X - X.mean(axis=0)) / X.std(axis=0)

# 拆分训练/测试
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


逻辑回归模型（含 Sigmoid、Loss、梯度下降）

In [None]:
class LogisticRegressionCustom:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs
        self.losses = []
        self.weights = []
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def compute_loss(self, y, y_hat):
        m = y.shape[0]
        return -np.mean(y * np.log(y_hat + 1e-8) + (1 - y) * np.log(1 - y_hat + 1e-8))
    
    def fit(self, X, y):
        m, n = X.shape
        self.theta = np.zeros((n, 1))
        for epoch in range(self.epochs):
            z = X @ self.theta
            y_hat = self.sigmoid(z)
            loss = self.compute_loss(y, y_hat)
            self.losses.append(loss)
            self.weights.append(self.theta.copy())
            grad = X.T @ (y_hat - y) / m
            self.theta -= self.lr * grad
    
    def predict(self, X):
        return (self.sigmoid(X @ self.theta) >= 0.5).astype(int)


训练模型、输出训练曲线 & 权重变化

In [None]:
model = LogisticRegressionCustom(lr=0.1, epochs=300)
model.fit(X_train, y_train)

# 画 Loss 曲线
plt.plot(model.losses)
plt.title("Training Loss Curve")
plt.xlabel("Epoch")
plt.ylabel("Binary Cross-Entropy Loss")
plt.grid()
plt.show()

# 打印前 5 个 epoch 的权重变化
for i, w in enumerate(model.weights[:5]):
    print(f"Epoch {i+1}: {w.ravel()}")


模型评估

In [None]:
y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {acc:.4f}")
print(f"F1 Score: {f1:.4f}")
