In [1]:
##################################################################
#           《Python人工智能编程实践（2024年度版）》开源代码
#-----------------------------------------------------------------
#            @章节号：6.6.3（注意力机制的PaddlePaddle实践）                                       
#            @作者：范淼、徐晟桐 
#            @购书链接：暂无
#            @电子邮箱：fm12@tsinghua.org.cn             
#            @官方交流QQ群号：561500762                        
##################################################################

In [2]:
import paddle
from paddle import nn, optimizer, metric


#设定超参数。
INPUT_UNITS = 56
TIME_STEPS = 14
NUM_HEADS = 1
HIDDEN_UNITS = 256
NUM_CLASSES = 10
EPOCHS = 5
BATCH_SIZE = 64
LEARNING_RATE = 1e-3


class Attention(nn.Layer):
    '''
    自定义注意力机制类，继承自nn.Layer。
    '''
    def __init__(self, input_units, time_steps, num_heads, hidden_units, num_classes):
        
        super(Attention, self).__init__() 
        
        self.multihead_attn = nn.MultiHeadAttention(embed_dim=input_units, num_heads=num_heads)
        
        self.l1 = nn.Linear(input_units * time_steps, hidden_units)
        
        self.relu = nn.ReLU()
        
        self.l2 = nn.Linear(hidden_units, num_classes)
        

    def forward(self, input_tensor):

        out = self.multihead_attn(input_tensor, input_tensor, input_tensor)
        
        out = paddle.reshape(out, (-1,784))
        
        out = self.l1(out)
        
        out = self.relu(out)
        
        out = self.l2(out)
        
        return out 
    

#初始化注意力机制网络。
paddle_model = Attention(INPUT_UNITS, TIME_STEPS, NUM_HEADS, HIDDEN_UNITS, NUM_CLASSES)

model = paddle.Model(paddle_model)

#为模型训练做准备，设置优化器，损失函数和评估指标。
model.prepare(optimizer=optimizer.Adam(learning_rate=LEARNING_RATE, parameters=model.parameters()),
              loss=nn.CrossEntropyLoss(),
              metrics=metric.Accuracy())

In [3]:
import pandas as pd


#使用pandas，读取fashion_mnist的训练和测试数据文件。
train_data = pd.read_csv('./datasets/fashion_mnist/fashion_mnist_train.csv')
test_data = pd.read_csv('./datasets/fashion_mnist/fashion_mnist_test.csv')

#从训练数据中，拆解出训练特征和类别标签。
X_train = train_data[train_data.columns[1:]]
y_train = train_data['label']

#从测试数据中，拆解出测试特征和类别标签。
X_test = test_data[train_data.columns[1:]]
y_test = test_data['label']

In [4]:
from sklearn.preprocessing import StandardScaler


#初始化数据标准化处理器。
ss = StandardScaler()

#标准化训练数据特征。
X_train = ss.fit_transform(X_train)

#标准化测试数据特征。
X_test = ss.transform(X_test)

In [5]:
from paddle.io import TensorDataset

X_train = X_train.reshape([-1, TIME_STEPS, INPUT_UNITS])

X_train = paddle.to_tensor(X_train.astype('float32'))

y_train = y_train.values

#构建适用于PaddlePaddle模型训练的数据集。
train_dataset = TensorDataset([X_train, y_train])

#启动模型训练，指定训练数据集，设置训练轮次，设置每次数据集计算的批次大小。
model.fit(train_dataset, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1)

The loss value printed in the log is the current step, and the metric is the average value of previous steps.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [6]:
X_test = X_test.reshape([-1, TIME_STEPS, INPUT_UNITS])

X_test = paddle.to_tensor(X_test.astype('float32'))

y_test = y_test.values

#构建适用于PaddlePaddle模型测试的数据集。
test_dataset = TensorDataset([X_test, y_test])

#启动模型测试，指定测试数据集。
result = model.evaluate(test_dataset, verbose=0)

print('注意力机制（PaddlePaddle版本）在fashion_mnist测试集上的准确率为: %.2f%%。' %(result['acc'] * 100))

注意力机制（PaddlePaddle版本）在fashion_mnist测试集上的准确率为: 86.05%。
