In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms


# 定义受限玻尔兹曼机（RBM）模块
class RBM(nn.Module):
    def __init__(self, visible_units, hidden_units):
        super(RBM, self).__init__()
        self.W = nn.Parameter(torch.randn(hidden_units, visible_units) * 0.1)
        self.h_bias = nn.Parameter(torch.zeros(hidden_units))
        self.v_bias = nn.Parameter(torch.zeros(visible_units))

    def forward(self, v):
        h_prob = torch.sigmoid(torch.matmul(v, self.W.t()) + self.h_bias)
        h_sample = torch.bernoulli(h_prob)
        return h_sample

    def backward(self, h):
        v_prob = torch.sigmoid(torch.matmul(h, self.W) + self.v_bias)
        v_sample = torch.bernoulli(v_prob)
        return v_sample

    def contrastive_divergence(self, v, k=1):
        v0 = v
        for _ in range(k):
            h = self.forward(v0)
            v0 = self.backward(h)
        h0 = self.forward(v)
        hk = self.forward(v0)

        positive_grad = torch.matmul(h0.t(), v)
        negative_grad = torch.matmul(hk.t(), v0)

        self.W.grad = (positive_grad - negative_grad) / v.size(0)
        self.h_bias.grad = torch.mean(h0 - hk, dim=0)
        self.v_bias.grad = torch.mean(v - v0, dim=0)


# 定义深度信念网络（DBN）
class DBN(nn.Module):
    def __init__(self, layer_sizes):
        super(DBN, self).__init__()
        self.rbm_layers = nn.ModuleList()
        for i in range(len(layer_sizes) - 1):
            rbm = RBM(layer_sizes[i], layer_sizes[i + 1])
            self.rbm_layers.append(rbm)
        self.classifier = nn.Linear(layer_sizes[-1], 10)  # 假设有10个分类

    def pretrain(self, train_loader, epochs=5, lr=0.1):
        for idx, rbm in enumerate(self.rbm_layers):
            optimizer = optim.SGD(rbm.parameters(), lr=lr)
            for epoch in range(epochs):
                for data, _ in train_loader:
                    data = data.view(data.size(0), -1)
                    # 通过前面的RBM获取输入
                    for prev_rbm in self.rbm_layers[:idx]:
                        data = prev_rbm.forward(data)
                    optimizer.zero_grad()
                    rbm.contrastive_divergence(data)
                    optimizer.step()
                print(f"RBM层 {idx + 1} 训练完成，第 {epoch + 1} 轮")

    def forward(self, x):
        x = x.view(x.size(0), -1)
        for rbm in self.rbm_layers:
            x = rbm.forward(x)
        x = self.classifier(x)
        return x


# 加载数据集
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

# 初始化DBN
dbn = DBN([784, 500, 200])

# 预训练
dbn.pretrain(train_loader)

# 微调
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(dbn.parameters(), lr=0.001)

for epoch in range(5):
    for data, target in train_loader:
        optimizer.zero_grad()
        output = dbn(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print(f"微调完成，第 {epoch + 1} 轮，损失：{loss.item()}")


RBM层 1 训练完成，第 1 轮
RBM层 1 训练完成，第 2 轮
RBM层 1 训练完成，第 3 轮
RBM层 1 训练完成，第 4 轮
RBM层 1 训练完成，第 5 轮
RBM层 2 训练完成，第 1 轮
RBM层 2 训练完成，第 2 轮
RBM层 2 训练完成，第 3 轮
RBM层 2 训练完成，第 4 轮
RBM层 2 训练完成，第 5 轮
微调完成，第 1 轮，损失：2.3055920600891113
微调完成，第 2 轮，损失：2.2768776416778564
微调完成，第 3 轮，损失：2.327281951904297
微调完成，第 4 轮，损失：2.279536247253418
微调完成，第 5 轮，损失：2.2749054431915283
