In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, datasets, optimizers, losses
import matplotlib.pyplot as plt

### 数据的预处理

初始数据设置并加载数据

In [None]:
batchsz = 100
shuffles = 1000
# 词汇表大小
total_words = 10000
# 句子最大长度
max_review_len = 80
# 词向量特征长度
embedding_len = 100

(x_train, y_train), (x_test, y_test) = datasets.imdb.load_data(num_words=total_words)

维度信息显示

In [None]:
# 长度为 25000 的一维数组，每个元素的长度不定,存储的是相关单词的索引
print(x_train.shape, x_test.shape)
print(y_train.shape, y_test.shape)
print(x_train[0])
print(y_train)

数据处理，设置最大截断长度

In [None]:
# 设置截断
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)

db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# drop_remainder=True 参数设置丢弃最后一个batch,因为实际的batch小于预设的
db_train = db_train.shuffle(shuffles).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)


# print(y_train)
# 'word':index
word_index = datasets.imdb.get_word_index()
# print(word_index)

In [None]:
# 长度为 25000 的一维数组，每个元素的长度不定,存储的是相关单词的索引
print(x_train.shape, x_test.shape)
print(y_train.shape, y_test.shape)
print(y_train)

In [None]:
# 添加4个标志位
word_index = {k:(v+3) for k, v in word_index.items()}
word_index['<PAD>'] = 0
word_index['<STAR>'] = 1
word_index['<UNK>'] = 2
word_index['<UNUSED>'] = 3
index_to_word = {value:key for key, value in word_index.items()}

In [None]:
# 将数字编码的句子转换位句子
def num_to_sentence(num):
    return ' '.join([index_to_word.get(i, '?') for i in num])
# num_to_sentence(x_train[9])

### 网络模型的创建

In [None]:
class demoRNN(keras.Model):
    def __init__(self, units):
        super(demoRNN, self).__init__()
        # [b, 64],Cell 初始化状态向量 h0
        self.state0 = [tf.zeros([batchsz, units])]
        self.state1 = [tf.zeros([batchsz, units])]
        # [b, 80]=>[b, 80, 100] input_dims, output_dims, max_review_len
        self.embedding = layers.Embedding(total_words, embedding_len, input_length=max_review_len)
        self.rnn_cell0 = layers.SimpleRNNCell(units, dropout=0.5)
        self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.5)
        # 二分类
        # [b, 80, 100]=>[b, 64]=>[b,1]
        # self.outlayer = layers.Dense(1)
        self.outlayers = keras.Sequential([
            layers.Dense(units),
            layers.Dropout(rate=0.5),
            layers.ReLU(),
            layers.Dense(1)
        ])
    def call(self, inputs, training=None):
        # [b,80]
        x = inputs
        # [b,80]=>[b,80,100]
        x = self.embedding(x)
        # 通过 2 个 RNN cell,[b,80,100]=>[b,64]
        state0 = self.state0
        state1 = self.state1
        # [b,80,100]=>[b,100]
        for word in tf.unstack(x, axis=1):
            out0, state0 = self.rnn_cell0(word, state0, training)
            out1, state1 = self.rnn_cell1(out0, state1, training)
        x = self.outlayers(out1, training)
        prob = tf.sigmoid(x)
        return prob

### 训练与测试

In [None]:
units = 64
epochs =60
    # 创建模型
model = demoRNN(units)
model.compile(optimizer=optimizers.Adam(lr=0.001),loss=losses.binary_crossentropy, metrics=['accuracy'])
history = model.fit(db_train, epochs=epochs, validation_data=db_test)
model.evaluate(db_test)

In [None]:
all_accuracy = history.history['val_accuracy']
all_loss = history.history['val_loss']
plt.plot(range(1, len(all_accuracy)+1), all_accuracy)
plt.plot(range(1, len(all_loss)+1), all_loss)

梯度裁剪

In [74]:
a = tf.random.uniform([2,2])*5
print(a)
tf.clip_by_norm(a, 5)

tf.Tensor(
[[4.923814   0.08684337]
 [4.3033166  3.6082268 ]], shape=(2, 2), dtype=float32)


<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[3.2960672 , 0.05813412],
       [2.8806982 , 2.4153957 ]], dtype=float32)>

In [76]:
w1 = tf.random.normal([3,3])
w2 = tf.random.normal([3,3])
# 返回两个参数:张量 List 和 global_norm(裁剪前的梯度总范数和), max norm =2
(ww1, ww2), global_norm = tf.clip_by_global_norm([w1, w2], 2)
global_norm2 = tf.math.sqrt(tf.norm(ww1)**2+tf.norm(ww2)**2)
print(global_norm, global_norm2)

tf.Tensor(4.087775, shape=(), dtype=float32) tf.Tensor(2.0, shape=(), dtype=float32)
