In [2]:
!pip install numpy
!pip install loguru



In [3]:
import numpy as np
from loguru import logger

ModuleNotFoundError: No module named 'loguru'

# Lora for Linear
实现对于 linear 层的 lora，包括前向传播，反向传播，训练API，以及推理。以 交叉熵 损失为例。

In [6]:
def binary_crossentropy_loss(y_true, y_pred):
    # 防止 log(0) 出现
    epsilon = 1e-7
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)  # clip的作用：限制 y_pred 的范围
    # loss
    loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    return loss


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def sigmoid_derivative(fx):
    return fx * (1 - fx)

In [4]:
class LoraLinear:
    def __init__(self, in_dim, out_dim, rank=8, lr=0.01):
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.rank = rank
        self.lr = lr

        # original linear layer
        self.W = np.random.randn(in_dim, out_dim)
        self.b = np.random.randn(1, out_dim)

        # lora
        self.A = np.random.randn(in_dim, rank)
        self.B = np.zeros((rank, out_dim))

    def forward(self, x):
        # x: [bs, in_dim]
        self.linear_output = np.dot(x, self.W) + self.b
        self.xA = np.dot(x, self.A)  # [bs, rank]
        self.xB = np.dot(self.xA, self.B)  # [bs, out_dim]
        self.output = self.linear_output + self.xB
        self.a = sigmoid(self.output)
        return self.a

    def backward(self, x, y_true, y_pred):
        # x: [bs, in_dim]
        error = y_pred - y_true
        # error: [bs, out_dim]
        self.dB = np.dot(self.xA.T, error)  # [rank, out_dim]
        error_a = np.dot(error, self.B.T)  # [bs, rank]
        self.dA = np.dot(x.T, error_a)  # [in_dim, rank]
        # update
        self.A = self.A - self.lr * self.dA
        self.B = self.B - self.lr * self.dB

    def calculate_loss(self, x, y_true):
        y_pred = self.forward(x)
        loss = binary_crossentropy_loss(y_true=y_true, y_pred=y_pred)
        self.backward(x, y_true=y_true, y_pred=y_pred)
        return loss

    def train(self, X, y, epochs):
        for epoch in range(epochs):
            loss = self.calculate_loss(X, y)
            if epoch % 100 == 0:
                print(f"Epoch {epoch}/{epochs}, Loss: {loss:.4f}")

In [7]:
# 示例数据
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # 输入
y = np.array([[0], [1], [1], [0]])  # 二分类目标

# 创建神经网络
input_size = X.shape[1]
hidden_size = 4
output_size = 1
rank = 1

nn = LoraLinear(in_dim=input_size, out_dim=hidden_size, rank=rank, lr=0.01)

# 训练网络
nn.train(X, y, epochs=1000)

# 测试
print("Testing the network:")
for i in range(len(X)):
    output = nn.forward(X[i:i + 1])
    pred = 1 if output[0][0] > 0.5 else 0
    res = bool(X[i][1] == int(pred))
    print(f"Input: {X[i]}, Predicted Output: {output[0][0]:.4f}, Result: {res}")

Epoch 0/1000, Loss: 0.8927
Epoch 100/1000, Loss: 0.8876
Epoch 200/1000, Loss: 0.8375
Epoch 300/1000, Loss: 0.7707
Epoch 400/1000, Loss: 0.7664
Epoch 500/1000, Loss: 0.7649
Epoch 600/1000, Loss: 0.7642
Epoch 700/1000, Loss: 0.7639
Epoch 800/1000, Loss: 0.7637
Epoch 900/1000, Loss: 0.7637
Testing the network:
Input: [0 0], Predicted Output: 0.3985, Result: True
Input: [0 1], Predicted Output: 0.2771, Result: False
Input: [1 0], Predicted Output: 0.6671, Result: False
Input: [1 1], Predicted Output: 0.5369, Result: True
