In [1]:
from datetime import datetime
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [7]:
total_words = 10000
max_review_len = 80
embedding_len = 100

(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)

In [4]:
# 文本预处理
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)


In [5]:
batch_size = 128

db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(10000).batch(batch_size, 
                                         drop_remainder=True # 如果最后一批次不够128 直接弃掉
                                        )

db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batch_size, drop_remainder=True)

In [6]:
print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)

x_train shape: (25000, 80) tf.Tensor(1, shape=(), dtype=int64) tf.Tensor(0, shape=(), dtype=int64)
x_test shape: (25000, 80)


In [16]:
class MyRNN(keras.Model):
    def __init__(self, units):
        super(MyRNN, self).__init__()
        # 初始化 rnn cell的state
        self.state0 = [tf.zeros([batch_size, units])]
        
        # [b, 80] => [b, 80, 100] 每个句子最多80个单词, 每个单词用100维的向量表示
        # 嵌入模型把你文本转化为数值型的向量
        self.embedding = keras.layers.Embedding(total_words, # 输入维度 目前设置的80维
                                                embedding_len, # 每个单词
                                                input_length=max_review_len, # 句子长度
                                               )
        # cell把你一个句子在时间轴上展开 h_dim: 64 
        # [b, 80, 100] => [b, 64] 
        # # units参数就是h_dim, 将100维的单词向量进行信息提取,转化为内部的state, 就是label的状态
        self.rnn_cell0 = keras.layers.SimpleRNNCell(units=units, dropout=0.5)
        # 从开始到输出层 数据维度的变化 [b, 80, 100] => [b, 64] => [b, 1]
        # 总共三层 第一层是嵌入层  第二层是SimpleRNNCell 信息提取层 第三层是输出层 输出预测值
        self.out_layer = keras.layers.Dense(1)
        
    def call(self, inputs, training=None):
        """
        前向传播的计算过程
        
        net(x) net(x, training=True) :train mode 
        net(x, training=False): test
        dropout 只有在训练模式下起作用 测试模式下不管用
        :param inputs: 一句话 [b, 80] b为批次大小, 80 每个句子80个单词
        :param training:
        :return:
        """
        x = inputs
        # 嵌入层 [b, 80] => [b, 80, 100]
        x = self.embedding(x)
        
        # rnn cell计算
        state0 = self.state0
        for word in tf.unstack(x, axis=1): 
            # unstack 在单词数量的维度上展开 转化成80个[b, 100], [b, 100] 每一个代表一个句子中第一个单词, 第二个单词...
            # 在时间轴上 依次计算每个单词的的state, 并把它当做下一次的输入
            out, state0 = self.rnn_cell0(word, state0, training)
        
        # 输出层 [b, 64] => [b, 1]
        x = self.out_layer(out)
        # 将这个值 转化为概率 P(y is positive | x)
        prob = tf.sigmoid(x)
        return prob
        
            


In [15]:
h_dim = 64
epochs = 6

# 建立模型
model = MyRNN(h_dim)

# 编译 训练
model.compile(optimizer=keras.optimizers.Adam(0.001),
              loss=tf.losses.BinaryCrossentropy(), 
              metrics=['accuracy'])

model.fit(db_train, epochs=epochs, validation_data=db_test)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0xb3a170668>

In [17]:
model.evaluate(db_test)




[0.5492052810314374, 0.8147035]

### 多层RNN

In [22]:
class MyRNN2(keras.Model):
    def __init__(self, units):
        super(MyRNN2, self).__init__()
        # 初始化 rnn cell的state
        self.state0 = [tf.zeros([batch_size, units])]
        self.state1 = [tf.zeros([batch_size, units])]

        
        # [b, 80] => [b, 80, 100] 每个句子最多80个单词, 每个单词用100维的向量表示
        # 嵌入模型把你文本转化为数值型的向量
        self.embedding = keras.layers.Embedding(total_words, # 输入维度 目前设置的80维
                                                embedding_len, # 每个单词
                                                input_length=max_review_len, # 句子长度
                                               )
        # cell把你一个句子在时间轴上展开 h_dim: 64 
        # [b, 80, 100] => [b, 64] 
        # # units参数就是h_dim, 将100维的单词向量进行信息提取,转化为内部的state, 就是label的状态
        self.rnn_cell0 = keras.layers.SimpleRNNCell(units=units, dropout=0.5)
        # 在增加一层
        self.rnn_cell1 = keras.layers.SimpleRNNCell(units=units, dropout=0.5)
        
        # 从开始到输出层 数据维度的变化 [b, 80, 100] => [b, 64] => [b, 1]
        # 总共三层 第一层是嵌入层  第二层是SimpleRNNCell 信息提取层 第三层是输出层 输出预测值
        self.out_layer = keras.layers.Dense(1)
        
    def call(self, inputs, training=None):
        """
        前向传播的计算过程
        
        net(x) net(x, training=True) :train mode 
        net(x, training=False): test
        dropout 只有在训练模式下起作用 测试模式下不管用
        :param inputs: 一句话 [b, 80] b为批次大小, 80 每个句子80个单词
        :param training:
        :return:
        """
        x = inputs
        # 嵌入层 [b, 80] => [b, 80, 100]
        x = self.embedding(x)
        
        # rnn cell计算
        state0 = self.state0
        state1 = self.state1
        for word in tf.unstack(x, axis=1): 
            # unstack 在单词数量的维度上展开 转化成80个[b, 100], [b, 100] 每一个代表一个句子中第一个单词, 第二个单词...
            # 在时间轴上 依次计算每个单词的的state, 并把它当做下一次的输入
            out0, state0 = self.rnn_cell0(word, state0, training)
            out1, state1 = self.rnn_cell1(out0, state1, training)
        
        # 输出层 [b, 64] => [b, 1]
        x = self.out_layer(out1)
        # 将这个值 转化为概率 P(y is positive | x)
        prob = tf.sigmoid(x)
        return prob
        
            


In [23]:
h_dim = 64
epochs = 6

# 建立模型
model2 = MyRNN2(h_dim)

# 编译 训练
model2.compile(optimizer=keras.optimizers.Adam(0.001),
              loss=tf.losses.BinaryCrossentropy(), 
              metrics=['accuracy'])

model2.fit(db_train, epochs=epochs, validation_data=db_test)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0xb3ca9edd8>

In [24]:
model2.evaluate(db_test)




[0.5510787895092597, 0.81666666]

In [None]:
keras.layers.SimpleRNN