In [1]:
##################################################################
#           《Python人工智能编程实践（2024年度版）》开源代码
#-----------------------------------------------------------------
#            @章节号：6.8.2（变换模型的TensorFlow实践）                                                  
#            @作者：范淼、徐晟桐 
#            @购书链接：暂无
#            @电子邮箱：fm12@tsinghua.org.cn             
#            @官方交流QQ群号：561500762                        
##################################################################

In [2]:
from tensorflow.keras import models, layers, losses, optimizers, Model
import tensorflow as tf


#设置超参数。
INPUT_UNITS = 56
TIME_STEPS = 14
NUM_HEADS = 8
HIDDEN_UNITS = 256
NUM_CLASSES = 10
EPOCHS = 5
BATCH_SIZE = 64
LEARNING_RATE = 1e-3


class Transformer_Encoder(Model):
    '''
    自定义变换模型的编码器类，继承自keras.Model。
    '''
    def __init__(self, input_units, time_steps, num_heads, hidden_units, num_classes):
        
        super(Transformer_Encoder, self).__init__()
        
        self.pos_emb = tf.Variable(tf.random.normal((1, time_steps, input_units)), trainable=True)
        
        self.m_attn = layers.MultiHeadAttention(key_dim=input_units, num_heads=num_heads)
        
        self.ln = layers.LayerNormalization()
        
        self.l1 = layers.Dense(input_units, activation='relu')
        
        self.l2 = layers.Dense(hidden_units, activation='relu')
    
        self.l3 = layers.Dense(num_classes)
        
        self.relu = layers.ReLU()


    def call(self, input_tensor):
        
        input_tensor += self.pos_emb
        
        out_att = self.m_attn(input_tensor, input_tensor, input_tensor)
        
        out = self.ln(out_att + input_tensor)
        
        out_l1 = self.l1(out)
        
        out_l1 = self.relu(out_l1)
        
        out  = self.ln(out + out_l1)
        
        out = tf.reshape(out, (-1,784))
        
        out_l2 = self.l2(out)
        
        out_l2 = self.relu(out_l2)
        
        out_l3 = self.l3(out_l2)
        
        return out_l3 

    
#初始化变换模型的编码器。
model = Transformer_Encoder(INPUT_UNITS, TIME_STEPS, NUM_HEADS, HIDDEN_UNITS, NUM_CLASSES)

#设定神经网络的损失函数、优化方式，以及评估方法。
model.compile(optimizer=optimizers.legacy.Adam(LEARNING_RATE),
              loss=losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [3]:
import pandas as pd


#使用pandas，读取fashion_mnist的训练和测试数据文件。
train_data = pd.read_csv('./datasets/fashion_mnist/fashion_mnist_train.csv')
test_data = pd.read_csv('./datasets/fashion_mnist/fashion_mnist_test.csv')

#从训练数据中，拆解出训练特征和类别标签。
X_train = train_data[train_data.columns[1:]]
y_train = train_data['label']

#从测试数据中，拆解出测试特征和类别标签。
X_test = test_data[train_data.columns[1:]]
y_test = test_data['label']

In [4]:
from sklearn.preprocessing import StandardScaler


#初始化数据标准化处理器。
ss = StandardScaler()

#标准化训练数据特征。
X_train = ss.fit_transform(X_train)

#标准化测试数据特征。
X_test = ss.transform(X_test)

In [5]:
X_train = X_train.reshape([-1, TIME_STEPS, INPUT_UNITS])

#使用fashion_mnist的训练集数据训练网络模型。
model.fit(X_train, y_train.values, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1)

Epoch 1/5


2023-08-16 16:42:41.079277: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x14e784850>

In [6]:
X_test = X_test.reshape([-1, TIME_STEPS, INPUT_UNITS])

#使用fashion_mnist的测试集数据评估网络模型的效果。
result = model.evaluate(X_test, y_test.values, verbose=0)

print('变换模型（TensorFlow版本）在fashion_mnist测试集上的准确率为: %.2f%%。' %(result[1] * 100))

变换模型（TensorFlow版本）在fashion_mnist测试集上的准确率为: 88.75%。
