In [1]:
import numpy as np

In [3]:
# Sigmoid 函数
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# 损失函数：交叉熵
def compute_loss(y, y_hat):
    m = len(y)
    loss = -np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) / m
    return loss

# 梯度下降
def logistic_regression(X, y, learning_rate = 0.01, num_iteration = 1000):
    m, n = X.shape
    w = np.zeros(n)
    b = 0
    loss_history = []

    for i in range(num_iteration):
        # 计算线性模型输出
        z = np.dot(X, w) + b
        y_hat = sigmoid(z)

        # 计算损失
        loss = compute_loss(y, y_hat)
        loss_history.append(loss)

        # 计算梯度,注意sigmoid导数为y_hat * (1 - y_hat)，然后链式法则可以得到
        dw = np.dot(X.T, (y_hat - y)) / m 
        db = np.sum(y_hat - y) / m 

        # 更新参数
        w -= learning_rate * dw
        b -= learning_rate * db

        if i % 100 == 0:
            print(f"Iteration {i}, loss: {loss}")
    
    return w, b, loss_history

X = np.array([[1, 2], [1, 3], [2, 3], [4, 5], [6, 7]])    # 输入2个特征
y = np.array([0, 0, 0, 1, 1])   # 真实标签

w, b, loss_history = logistic_regression(X, y)

print(w, b)

Iteration 0, loss: 0.6931471805599453
Iteration 100, loss: 0.5785085057822918
Iteration 200, loss: 0.5101523134039823
Iteration 300, loss: 0.45569542496216126
Iteration 400, loss: 0.41178408708668
Iteration 500, loss: 0.375887072920645
Iteration 600, loss: 0.3461301007463976
Iteration 700, loss: 0.32113124406621973
Iteration 800, loss: 0.2998672766461169
Iteration 900, loss: 0.28157381490682754
[ 1.30487443 -0.58558045] -1.2931549586737172


### 交叉熵

In [4]:
import numpy as np
def binary_cross_entropy(y_true, y_pred):
    # 方法1
    m = y_true.shape[0]
    loss = - np.sum((y_true * np.log(y_pred)) + (1- y_true) * np.log(1- y_pred)) / m
    return loss

    # 方法2
    loss = - (y_true * np.log(y_pred)) + (1- y_true) * np.log(1 - y_pred)
    return np.mean(loss)

# 示例：真实标签和预测概率
y_true = np.array([1, 0, 1, 1, 0])
y_pred = np.array([0.9, 0.1, 0.8, 0.7, 0.2])

# 计算交叉熵损失
loss = binary_cross_entropy(y_true, y_pred)
print(f"Binary Cross-Entropy Loss: {loss}")

Binary Cross-Entropy Loss: 0.20273661557656092


### 多分类交叉熵

In [None]:
import numpy as np
def categorical_cross_entropy(y_true, y_pred):
    # shape (batch_size, num_classes)
    # 防止log(0)的情况，对预测概率做裁剪
    epsilon = 1e-15
    y_pred = np.clip(y_pred, epsilon, 1-epsilon)

    loss = -np.sum(y_true * np.log(y_pred), axis=1)
    return np.mean(loss)    # 注意这里还是要用mean！因为上面的np.sum只是二维转一维

# 三个类别，one-hot编码
y_true = np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]])
y_pred = np.array([[0.1, 0.8, 0.1], [0.9, 0.05, 0.05], [0.1, 0.1, 0.8], [0.3, 0.6, 0.1], [0.8, 0.1, 0.1]])

print(y_true.shape, y_pred.shape)

loss = categorical_cross_entropy(y_true, y_pred)
print(f"Categorical Cross-Entropy Loss: {loss}")

(5, 3) (5, 3)
Categorical Cross-Entropy Loss: 0.2571233586732892


### Softmax

In [6]:
import numpy as np
# 注意softmax是归一化，sigmoid是激活函数
def softmax(z):
    exp_z = np.exp(z - np.max(z))   # 减去最大值防止溢出
    return np.exp(z) / np.sum(exp_z)

def softmax_derivative(z):
    p = softmax(z)
    S = np.diag(p) - np.outer(p,p)
    return S

# 示例

z = np.array([2.0, 1.0, 0.1])  # 模型的输出（未归一化的得分）
softmax_output = softmax(z)
softmax_derivative_output = softmax_derivative(z)

print("Softmax Output:", softmax_output)
print("Softmax Derivative Matrix:\n", softmax_derivative_output)

Softmax Output: [4.86939638 1.79135082 0.72830889]
Softmax Derivative Matrix:
 [[-18.84162476  -8.72279721  -3.54642469]
 [ -8.72279721  -1.41758694  -1.30465673]
 [ -3.54642469  -1.30465673   0.19787505]]
