In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,Sequential,optimizers
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

assert tf.__version__.startswith('2.')

tf.random.set_seed(22)
np.random.seed(22) 

In [41]:
# most frequency words
total_words=10000
max_review_len=80
embedding_len=100

(x_train,y_train),(x_test,y_test)=keras.datasets.imdb.load_data(num_words=total_words)

# sentence padding
# x_train:[b,80]
# x_test: [b,80]
x_train=keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)
x_test=keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)

In [47]:
batchsz=128
db_train=tf.data.Dataset.from_tensor_slices((x_train,y_train))
db_train=db_train.shuffle(1000).batch(batchsz,drop_remainder=True)
db_test=tf.data.Dataset.from_tensor_slices((x_test,y_test))
db_test=db_test.batch(batchsz,drop_remainder=True)
print('x_train shape:',x_train.shape,tf.reduce_max(x_train),tf.reduce_min(x_train))
print('x_test shape:',x_test.shape)


x_train shape: (25000, 80) tf.Tensor(9999, shape=(), dtype=int32) tf.Tensor(0, shape=(), dtype=int32)
x_test shape: (25000, 80)


In [48]:
class MyRNN(keras.Model):
    
    def __init__(self,units):
        super(MyRNN,self).__init__()
        
        # [b,64]
        self.state0=[tf.zeros([batchsz,units])]
        self.state1=[tf.zeros([batchsz,units])]
        
        # transform text to embedding representation
        # [b,80]-->[b,80,100]
        self.embedding=layers.Embedding(total_words,embedding_len,input_length=max_review_len)
        
        # [b,80,100],h_dim: 64
        # RNN: cell1,cell2,cell3
        # simpleRNN
        self.rnn_cell0=layers.SimpleRNNCell(units,dropout=0.2)
        self.rnn_cell1=layers.SimpleRNNCell(units,dropout=0.2)
        
        # FC,[b,80,100]->[b,64]->[b,1]
        self.outlayer=layers.Dense(1)
        
        
    def call(self,inputs,training=None):
        """
        net(x),net(x,training=True):trainning mode
        net(x,training=False)
        
        :param inputs:[b,80]
        :param training:
        :return: 
        """
        # [b,80] 
        x=inputs
        
        # embedding: [b,80]->[b,80,100]
        x=self.embedding(x)
        
        # run cell compute
        # [b,80,100]->[b,64]
        state0=self.state0
        state1=self.state1
        for word in tf.unstack(x,axis=1): # word:[b,100]
            
            # h1=x*wxh+h*whh
            out0,state0=self.rnn_cell0(word,state0,training)
            out,state1=self.rnn_cell1(out0,state1,training)
            
        # out: [b,64]->[b,1]
        x=self.outlayer(out)
        
        # p(y is pos|x)
        prob=tf.sigmoid(x)
        
        return prob

In [49]:
def main():
    units=100
    epochs=5
    
    model=MyRNN(units)
    
    model.compile(optimizer=optimizers.Adam(0.001),
                 loss=tf.losses.BinaryCrossentropy(),
                 metrics=['accuracy'],
                 experimental_run_tf_function=False,
                 )
    
    model.fit(db_train,epochs=epochs,validation_data=db_test)
    
    model.evaluate(db_test)
    
main()
    

Epoch 1/5
Epoch 2/5
Epoch 3/5
 24/195 [==>...........................] - ETA: 9s - loss: 0.3121 - accuracy: 0.8618

KeyboardInterrupt: 