手撕线性回归

In [6]:
import numpy as np

# 生成随机数据
np.random.seed(0)
X = 2 * np.random.rand(100, 2)              # 特征数据
y = 4 + 3 * X + np.random.randn(100, 1)     # 输出数据，添加了噪声

# 使用SGD
def compute_cost(X, y, theta):
    m = len(y)
    predictions = X.dot(theta)
    cost = (1 / (2 * m)) * np.sum(np.square(predictions - y))
    return cost

def gradient_descent(X, y, theta, alpha, num_iters):
    m = len(y)
    for i in range(num_iters):
        predictions = X.dot(theta)
        error = np.dot(X.transpose(), (predictions - y))
        descent = alpha * (1 / m) * error
        theta -= descent
        eval_loss = compute_cost(X, y, theta)
        print(eval_loss)
    return theta

# 初始化参数
theta = np.random.randn(3, 2)

# 设置超参数
alpha = 0.01
num_iters = 1000

# 添加 x0 = 1 的列
ones = np.ones((X.shape[0], 1))
X_b = np.concatenate([ones, X], axis=1)

# 梯度下降
theta = gradient_descent(X_b, y, theta, alpha, num_iters)

88.30505828335492
82.96349608904917
77.95845445474822
73.2686742924841
68.87423989204105
64.7564940293976
60.89795843970571
57.28225931580641
53.89405751470427
50.718983174491136
47.74357446301042
44.95522019716567
42.342106088276125
39.89316438433919
37.59802669453978
35.446979794910995
33.43092422675893
31.541335511368946
29.77022781566295
28.110119913925324
26.55400330050232
25.095312317548796
23.727896170485764
22.445992711878944
21.24420388198674
20.117472701288026
19.06106171691581
18.07053281112016
17.14172828569009
16.270753141702812
15.453958479064305
14.687925945078364
13.969453165753098
13.295540097742856
12.66337624274806
12.070328669871783
11.513930794876002
10.991871868506841
10.501987129080867
10.042248577355688
9.610756334361177
9.205730545352239
8.825503795372216
8.4685140040968
8.133297769671382
7.8184841331686314
7.5227887370861986
7.245008352983956
6.984015754933873
6.738754916929615
6.508236513783965
6.291533706335793
6.087778193000282
5.8961565108314105
5.71590657

手撕逻辑回归

In [None]:
import numpy as np

# 定义 Sigmoid 函数
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# 逻辑回归类
class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.weights = None
        self.bias = None

    # 训练函数（梯度下降）
    def fit(self, X, y):
        num_samples, num_features = X.shape
        self.weights = np.zeros(num_features)
        self.bias = 0

        for _ in range(self.num_iterations):
            # 前向传播
            linear_model = np.dot(X, self.weights) + self.bias
            y_pred = sigmoid(linear_model)

            # 计算梯度
            dw = (1 / num_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / num_samples) * np.sum(y_pred - y)

            # 参数更新
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    # 概率预测
    def predict_prob(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        return sigmoid(linear_model)

    # 类别预测
    def predict(self, X, threshold=0.5):
        y_pred_prob = self.predict_prob(X)
        y_pred = np.zeros_like(y_pred_prob)
        y_pred[y_pred_prob >= threshold] = 1
        return y_pred

手撕交叉熵损失梯度回传

In [None]:
import numpy as np

# 定义sigmoid和其导数
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))

# 定义简单的两层神经网络
class SimpleNN:
    def __init__(self, input_size, hidden_size, output_size):
        # 初始化权重和偏置
        self.weights1 = np.random.randn(input_size, hidden_size)
        self.bias1 = np.zeros((1, hidden_size))
        self.weights2 = np.random.randn(hidden_size, output_size)
        self.bias2 = np.zeros((1, output_size))

    def forward(self, X):
        self.Z1 = np.dot(X, self.weights1) + self.bias1
        self.A1 = sigmoid(self.Z1)
        self.Z2 = np.dot(self.A1, self.weights2) + self.bias2
        self.A2 = sigmoid(self.Z2)
        return self.A2

    def backward(self, X, y, learning_rate):
        m = X.shape[0]  # 样本数

        # 输出层梯度
        dA2 = self.A2 - y
        dZ2 = dA2 * sigmoid_derivative(self.Z2)
        dW2 = np.dot(self.A1.T, dZ2) / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m

        # 隐藏层梯度
        dA1 = np.dot(dZ2, self.weights2.T)
        dZ1 = dA1 * sigmoid_derivative(self.Z1)
        dW1 = np.dot(X.T, dZ1) / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m

        # 更新权重和偏置
        self.weights1 -= learning_rate * dW1
        self.bias1   -= learning_rate * db1
        self.weights2 -= learning_rate * dW2
        self.bias2   -= learning_rate * db2

    def compute_loss(self, X, y):
        A2 = self.forward(X)
        m = X.shape[0]
        loss = -np.mean(y * np.log(A2) + (1 - y) * np.log(1 - A2))
        return loss

    def train(self, X_train, y_train, epochs, learning_rate):
        for epoch in range(epochs):
            self.forward(X_train)  # 前向传播
            self.backward(X_train, y_train, learning_rate)  # 反向传播
            if epoch % 100 == 0:
                loss = self.compute_loss(X_train, y_train)
                print(f"Epoch {epoch}/{epochs}, Loss: {loss:.4f}")

    def predict(self, X):
        A2 = self.forward(X)
        return (A2 > 0.5).astype(int)

手撕对比学习

In [None]:
# Contrastive loss
import torch
import torch.nn.functional as F

# -------- 1 Pair-wise Contrastive Loss --------
def contrastive_loss(feat1, feat2, label, margin=1.0):
    dist = F.pairwise_distance(feat1, feat2, keepdim=False)
    pos_loss = label * dist.pow(2)
    neg_loss = (1 - label) * F.relu(margin - dist).pow(2)
    return (pos_loss + neg_loss).mean()


# 不使用 F.pairwise_distance 的话
def pairwise_distance_impl(x1, x2, p=2.0, eps=1e-6, keepdim=False):
    diff = x1 - x2 + eps
    # 按最后一维做 p 范数
    out = diff.abs().pow(p).sum(dim=-1, keepdim=keepdim).pow(1.0 / p)
    return out


# -------- 2 Triplet Loss --------
def triplet_loss(anchor, positive, negative, margin=1.0):
    d_pos = F.pairwise_distance(anchor, positive)
    d_neg = F.pairwise_distance(anchor, negative)
    loss = F.relu(d_pos - d_neg + margin)
    return loss.mean()


# -------- 3 InfoNCE Loss --------
def infonce_loss(embeddings, temperature=0.1):
    # embeddings: shape (2N, D)，包含 N 对正样本
    features1 = embeddings[:N]   # anchor
    features2 = embeddings[N:]   # positive

    logits = torch.matmul(features1, features2.T)  # 相似度矩阵 (N, N)
    logits /= temperature

    labels = torch.arange(N)  # 正样本在对角线上
    loss = F.cross_entropy(logits, labels)
    return loss