In [1]:
##################################################################
#           《Python人工智能编程实践（2024年度版）》开源代码
#-----------------------------------------------------------------
#            @章节号：6.8.1（变换模型的PyTorch实践）                                                  
#            @作者：范淼、徐晟桐 
#            @购书链接：暂无
#            @电子邮箱：fm12@tsinghua.org.cn             
#            @官方交流QQ群号：561500762                        
##################################################################

In [2]:
from torch import nn, optim, randn


#设定超参数。
INPUT_UNITS = 56
TIME_STEPS = 14
NUM_HEADS = 8
HIDDEN_UNITS = 256
NUM_CLASSES = 10
EPOCHS = 5
BATCH_SIZE = 64
LEARNING_RATE = 1e-3


class Transformer_Encoder(nn.Module):
    '''
    自定义变换模型的编码器类，继承自nn.Module。
    '''
    
    def __init__(self, input_units, time_steps, num_heads, hidden_units, num_classes):
        
        super(Transformer_Encoder, self).__init__()
        
        self.pos_emb = nn.Parameter(randn(1, time_steps, input_units))
        
        self.m_attn = nn.MultiheadAttention(embed_dim=input_units, num_heads=num_heads, batch_first=True)
        
        self.ln = nn.LayerNorm(input_units)
        
        self.l1 = nn.Linear(input_units, input_units) 
        
        self.relu = nn.ReLU()
        
        self.l2 = nn.Linear(input_units * time_steps, hidden_units)
        
        self.l3 = nn.Linear(hidden_units, num_classes)
        
        
    def forward(self, input_tensor):
        
        input_tensor += self.pos_emb
        
        out_att = self.m_attn(input_tensor, input_tensor, input_tensor)
        
        out = self.ln(out_att[0] + input_tensor)
        
        out_l1 = self.l1(out)
        
        out_l1 = self.relu(out_l1)
        
        out  = self.ln(out + out_l1)
        
        out = torch.reshape(out, (-1,784))
        
        out_l2 = self.l2(out)
        
        out_l2 = self.relu(out_l2)
        
        out_l3 = self.l3(out_l2)
        
        return out_l3 


#初始化变换模型的编码器。
model = Transformer_Encoder(INPUT_UNITS, TIME_STEPS, NUM_HEADS, HIDDEN_UNITS, NUM_CLASSES)

#设定神经网络的损失函数。
criterion = nn.CrossEntropyLoss()

#设定神经网络的优化方法。
optimizer = optim.Adam(model.parameters(), lr = LEARNING_RATE) 

In [3]:
import pandas as pd


#使用pandas，读取fashion_mnist的训练和测试数据文件。
train_data = pd.read_csv('../datasets/fashion_mnist/fashion_mnist_train.csv')
test_data = pd.read_csv('../datasets/fashion_mnist/fashion_mnist_test.csv')

#从训练数据中，拆解出训练特征和类别标签。
X_train = train_data[train_data.columns[1:]]
y_train = train_data['label']

#从测试数据中，拆解出测试特征和类别标签。
X_test = test_data[train_data.columns[1:]]
y_test = test_data['label']

In [4]:
from sklearn.preprocessing import StandardScaler


#初始化数据标准化处理器。
ss = StandardScaler()

#标准化训练数据特征。
X_train = ss.fit_transform(X_train)

#标准化测试数据特征。
X_test = ss.transform(X_test)

In [5]:
import torch
from torch.utils.data import TensorDataset, DataLoader


#构建适用于PyTorch模型训练的数据结构。
train_tensor = TensorDataset(torch.tensor(X_train.astype('float32')), torch.tensor(y_train.values))

#构建适用于PyTorch模型训练的数据读取器。 
train_loader = DataLoader(dataset = train_tensor, batch_size = BATCH_SIZE, shuffle = True)

n_total_steps = len(train_loader)

#开启模型训练。
model.train()

for epoch in range(EPOCHS):
    for i, (images, labels) in enumerate(train_loader): 
        images = images.reshape([-1, TIME_STEPS, INPUT_UNITS])
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step() 
        
        if (i+1) % 300 == 0:
             print (f'Epoch [{epoch+1}/{EPOCHS}], Step[{i+1}/{n_total_steps}], Loss: {loss.item():.4f}') 

Epoch [1/5], Step[300/938], Loss: 0.3184
Epoch [1/5], Step[600/938], Loss: 0.3566
Epoch [1/5], Step[900/938], Loss: 0.3689
Epoch [2/5], Step[300/938], Loss: 0.3626
Epoch [2/5], Step[600/938], Loss: 0.2422
Epoch [2/5], Step[900/938], Loss: 0.2480
Epoch [3/5], Step[300/938], Loss: 0.2653
Epoch [3/5], Step[600/938], Loss: 0.3076
Epoch [3/5], Step[900/938], Loss: 0.2986
Epoch [4/5], Step[300/938], Loss: 0.1940
Epoch [4/5], Step[600/938], Loss: 0.2684
Epoch [4/5], Step[900/938], Loss: 0.2933
Epoch [5/5], Step[300/938], Loss: 0.1847
Epoch [5/5], Step[600/938], Loss: 0.2781
Epoch [5/5], Step[900/938], Loss: 0.1804


In [6]:
#构建适用于PyTorch模型测试的数据结构。
test_tensor = TensorDataset(torch.tensor(X_test.astype('float32')), torch.tensor(y_test.values))

#构建适用于PyTorch模型测试的数据读取器。
test_loader = DataLoader(dataset = test_tensor, batch_size = BATCH_SIZE, shuffle = False)

#开启模型测试。
model.eval()

n_correct = 0
n_samples = 0

for images, labels in test_loader:
    images = images.reshape([-1, TIME_STEPS, INPUT_UNITS])
    outputs = model(images)
    _, predictions = torch.max(outputs.data, 1)

    n_samples += labels.size(0)
    n_correct += (predictions == labels).sum().item() 

acc = 100.0 * n_correct / n_samples

print('变换模型（PyTorch版本）在fashion_mnist测试集上的准确率为: %.2f%%。' %acc) 

变换模型（PyTorch版本）在fashion_mnist测试集上的准确率为: 89.92%。
