In [1]:
import os
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
import time
import read_func

In [2]:
class Model(object):
    
    def __init__(self):
        self.batch_size = batch_size
        self.num_steps = num_steps
        self.hidden_size_l1 = hidden_size_l1
        self.hidden_size_l2 = hidden_size_l2
        self.vocab_size = vocab_size
        self.embedding_vector_size = embedding_vector_size
        self.lr = 0.0
        self.max_grad_norm=max_grad_norm
        
        self.model=keras.Sequential()
        with tf.device('/device:XLA_GPU:0'):
            self.embedding_layer=keras.layers.Embedding(self.vocab_size,self.embedding_vector_size,\
                                           batch_input_shape=(self.batch_size,self.num_steps),trainable=True,name='embedding')
            
        lr_1=keras.layers.LSTMCell(self.hidden_size_l1)
        lr_2=keras.layers.LSTMCell(self.hidden_size_l2)
        stacked=keras.layers.StackedRNNCells([lr_1,lr_2])
        self.rnn_layer=keras.layers.RNN(stacked,[self.batch_size,self.num_steps],return_state=False,trainable=True,\
                                        stateful=True)
        
        initial_state=tf.Variable((self.batch_size,embedding_vector_size),trainable=False)
        self.rnn_layer.initial_state=initial_state
        
        self.dense_layer=keras.layers.Dense(self.vocab_size)
        
        self.activation=keras.layers.Activation('softmax')
        self.optimizer=keras.optimizers.SGD(self.lr,clipnorm=self.max_grad_norm)
        
        self.model.add(self.embedding_layer)
        self.model.add(self.rnn_layer)
        self.model.add(self.dense_layer)
        self.model.add(self.activation)
        self.model.compile(self.optimizer,self.crossentropy)
        self.model.summary()
    
    def crossentropy(self,ytrue,ypred):
        return keras.losses.sparse_categorical_crossentropy(ytrue,ypred)
    
    def train_batch(self,input_,target_):
        t_vars=self.model.trainable_variables
        
        with tf.GradientTape() as tape:
            output=self.model(input_)
            loss=self.crossentropy(target_,output)
            cost=tf.reduce_sum(loss)/batch_size
            
        grads=tape.gradient(cost,t_vars)
        clipped,value=tf.clip_by_global_norm(grads,self.max_grad_norm)
        self.optimizer.apply_gradients(zip(clipped,t_vars))
        return cost
    
    def test_batch(self,input_,target_):
        output=self.model(input_)
        loss=self.crossentropy(target_,output)
        cost=tf.reduce_sum(loss)/batch_size
        return cost
        
    @classmethod
    def instance(cls):
        return Model()
        

In [3]:
def run_one_epoch(m,data,is_training=True,verbose=True):
    iters=0
    cost=0
    epoch_size=(len(data)//m.batch_size)//m.num_steps
    start=time.time()
    
    for step,(x,y) in enumerate(read_func.data_to_batch(data,m.batch_size,m.num_steps)):
        if is_training:
            loss=m.train_batch(x,y)
        else:
            loss=m.test_batch(x,y)
        
        cost+=loss
        iters+=m.num_steps
    
        if verbose and (step%(epoch_size//10))==10:
            print("Iterations===> %d/%d    Perplexity===> %.2f    speed===> %.3f wps"%(step,epoch_size,np.exp(cost/iters),\
                                                                                    (iters*m.batch_size)/(time.time()-start)))
    
    return np.exp(cost/iters)

In [4]:
batch_size=30
num_steps=20
hidden_size_l1=256
hidden_size_l2=128
vocab_size=10000
embedding_vector_size=200
max_grad_norm=5

init_lr=1
decay=0.5
max_epoch_decay_lr = 4
max_epoch=15

In [5]:
files=['ptb.train.txt','ptb.valid.txt','ptb.test.txt']
vocab=read_func.build_vocab(files[0])
train_data=read_func.reader(files[0],vocab)
valid_data=read_func.reader(files[1],vocab)
test_data=read_func.reader(files[2],vocab)
m=Model.instance()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (30, 20, 200)             2000000   
_________________________________________________________________
rnn (RNN)                    (30, 20, 128)             665090    
_________________________________________________________________
dense (Dense)                (30, 20, 10000)           1290000   
_________________________________________________________________
activation (Activation)      (30, 20, 10000)           0         
Total params: 3,955,090
Trainable params: 3,955,088
Non-trainable params: 2
_________________________________________________________________


In [8]:
K=tf.keras.backend
for i in range(0,max_epoch):
    
    decay_rate=decay**max(i-max_epoch_decay_lr,0)
    final_lr=init_lr*decay_rate
    K.set_value(m.model.optimizer.learning_rate,final_lr)
    print('Epoch===> %d    Learning_rate===>%.3f'%(i+1,m.model.optimizer.learning_rate))
    
    
    prp=run_one_epoch(m,train_data,verbose=False)
    print('Final_Perplexity===>%.2f'%(prp))
    
    
    prp=run_one_epoch(m,valid_data,is_training=False,verbose=False)
    print('Valid_Perplexity===>%.2f'%(prp))

Epoch===> 1    Learning_rate===>1.00
Final_Perplexity===>359.99
Valid_Perplexity===>213.90
Epoch===> 2    Learning_rate===>1.00
Final_Perplexity===>165.27
Valid_Perplexity===>162.33
Epoch===> 3    Learning_rate===>1.00
Final_Perplexity===>125.70
Valid_Perplexity===>145.74
Epoch===> 4    Learning_rate===>1.00
Final_Perplexity===>106.35
Valid_Perplexity===>139.25
Epoch===> 5    Learning_rate===>1.00
Final_Perplexity===>94.20
Valid_Perplexity===>136.20
Epoch===> 6    Learning_rate===>0.50
Final_Perplexity===>75.62
Valid_Perplexity===>126.01
Epoch===> 7    Learning_rate===>0.25
Final_Perplexity===>64.21
Valid_Perplexity===>123.28
Epoch===> 8    Learning_rate===>0.12
Final_Perplexity===>58.12
Valid_Perplexity===>122.64
Epoch===> 9    Learning_rate===>0.06
Final_Perplexity===>54.99
Valid_Perplexity===>122.57
Epoch===> 10    Learning_rate===>0.03
Final_Perplexity===>53.33
Valid_Perplexity===>122.57
Epoch===> 11    Learning_rate===>0.02
Final_Perplexity===>52.43
Valid_Perplexity===>122.49
Epoc

In [23]:
keras.models.save_model(m.model,'./rnn_model/')



INFO:tensorflow:Assets written to: ./assets


INFO:tensorflow:Assets written to: ./assets
