In [1]:
import os
import trax
import trax.fastmath.numpy as np
import pickle
import itertools
import numpy
import random as rnd
from trax import fastmath
from trax import layers as tl

# set random seed
rnd.seed(32)

In [2]:
dirname = 'data/'
lines = [] 
for filename in os.listdir(dirname):
    with open(os.path.join(dirname, filename)) as files:
        for line in files:
            # remove leading and trailing whitespace
            pure_line = line.strip()
            # if pure_line is not the empty string,
            if pure_line:
                lines.append(pure_line.lower())
                

print(f"Number of lines: {len(lines)}")
print(lines[100])

Number of lines: 125097
hath beaten down young hotspur and his troops,


## make training and eval from data

In [3]:
eval_lines = lines[-1000:] 
lines = lines[:-1000] 

print(f"Number of lines for training: {len(lines)}")
print(f"Number of lines for validation: {len(eval_lines)}")

Number of lines for training: 124097
Number of lines for validation: 1000


## make tensor from each letter(convert to ascii code)

In [4]:
def line_to_tensor(line, EOS_int=1):
    
    tensor = []
    # for each character:
    for c in line:
        c_int = ord(c)
        tensor.append(c_int)

    tensor.append(EOS_int)
    return tensor

In [5]:
line_to_tensor('I like deeplearning!')

[73,
 32,
 108,
 105,
 107,
 101,
 32,
 100,
 101,
 101,
 112,
 108,
 101,
 97,
 114,
 110,
 105,
 110,
 103,
 33,
 1]

## batch data generator

In [6]:
def batch_data_generator(batch_size, max_length, data_lines, line_to_tensor=line_to_tensor):
    index =[]
    while True:
        if len(index)<len(data_lines):
            #find index of lines less than maxlength
            index = numpy.where([1 if len(line)<max_length else 0 for line in data_lines])[0]
        batch_index = numpy.random.choice(index,batch_size)
        #remove used index 
        index = [x for x in index if x not in batch_index]
        #make a batch
        batch = [data_lines[i] for i in batch_index]  

        batch_ = []
        mask = []
        # make a tensor
        for li in batch:
            tensor = line_to_tensor(li)
            pad = [0] * (max_length - len(tensor))
            tensor_pad = tensor + pad
            example_mask = [0 if t == 0 else 1 for t in tensor_pad]
            mask.append(example_mask)
            batch_.append(tensor_pad)
        batch_np_arr = np.array(batch_)
        mask_np_arr = np.array(mask)
        yield batch_np_arr,batch_np_arr,mask_np_arr

In [7]:
# Try out batch data generator
tmp_lines = ['12345678901', #length 11
             '123456789', # length 9
             '2345690', # length 9
             '345678901'] # length 9

# Get a batch size of 2, max length 10
tmp_data_gen = batch_data_generator(batch_size=2, 
                              max_length=15, 
                              data_lines=tmp_lines,
                            )

# get one batch
tmp_batch = next(tmp_data_gen)

# view the batch
tmp_batch



(DeviceArray([[51, 52, 53, 54, 55, 56, 57, 48, 49,  1,  0,  0,  0,  0,  0],
              [50, 51, 52, 53, 54, 57, 48,  1,  0,  0,  0,  0,  0,  0,  0]],            dtype=int32),
 DeviceArray([[51, 52, 53, 54, 55, 56, 57, 48, 49,  1,  0,  0,  0,  0,  0],
              [50, 51, 52, 53, 54, 57, 48,  1,  0,  0,  0,  0,  0,  0,  0]],            dtype=int32),
 DeviceArray([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]], dtype=int32))

## GRU model

In [12]:
def GRULM(vocab_size=256, d_model=512, n_layers=2, mode='train'):
    model = tl.Serial(
      tl.ShiftRight(mode=mode), # Stack the ShiftRight layer
      tl.Embedding(vocab_size=vocab_size, d_feature=d_model), # Stack the embedding layer
      [tl.GRU(n_units=d_model) for _ in range(n_layers)], # Stack GRU layers of d_model units keeping n_layer parameter in mind (use list comprehension syntax)
      tl.Dense(n_units=vocab_size), # Dense layer
      tl.LogSoftmax() # Log Softmax
    )
    return model


In [13]:
model = GRULM()
print(model)

Serial[
  Serial[
    ShiftRight(1)
  ]
  Embedding_256_512
  GRU_512
  GRU_512
  Dense_256
  LogSoftmax
]


## train model

In [38]:
from trax.supervised import training


def train_model(model, data_generator, batch_size=32, max_length=64, lines=lines, eval_lines=eval_lines, n_steps=1, output_dir='model/'): 

    bare_train_generator = data_generator(batch_size, max_length, data_lines=lines)
    infinite_train_generator = itertools.cycle(bare_train_generator)
    
    bare_eval_generator = data_generator(batch_size, max_length, data_lines=eval_lines)
    infinite_eval_generator = itertools.cycle(bare_eval_generator)
   
    train_task = training.TrainTask(
        labeled_data=infinite_train_generator, 
        loss_layer=tl.CrossEntropyLoss(),   
        optimizer=trax.optimizers.Adam(0.0005)     
    )

    eval_task = training.EvalTask(
        labeled_data=infinite_eval_generator,    
        metrics=[tl.CrossEntropyLoss(), tl.Accuracy()], 
        n_eval_batches=3      
    )
    
    training_loop = training.Loop(model,
                                  tasks = train_task,
                                  eval_tasks=eval_task,
                                  output_dir=output_dir)

    training_loop.run(n_steps=n_steps)
    
    return training_loop


In [43]:
training_loop = train_model(GRULM(),batch_data_generator,n_steps=500)


Step  20400: Ran 100 train steps in 46.80 secs
Step  20400: train CrossEntropyLoss |  1.32728612
Step  20400: eval  CrossEntropyLoss |  1.39446664
Step  20400: eval          Accuracy |  0.55682667

Step  20500: Ran 100 train steps in 44.33 secs
Step  20500: train CrossEntropyLoss |  1.28492093
Step  20500: eval  CrossEntropyLoss |  1.35898980
Step  20500: eval          Accuracy |  0.57404141

Step  20600: Ran 100 train steps in 44.14 secs
Step  20600: train CrossEntropyLoss |  1.26225626
Step  20600: eval  CrossEntropyLoss |  1.33000886
Step  20600: eval          Accuracy |  0.58211164

Step  20700: Ran 100 train steps in 44.08 secs
Step  20700: train CrossEntropyLoss |  1.24394274
Step  20700: eval  CrossEntropyLoss |  1.35478957
Step  20700: eval          Accuracy |  0.57309226

Step  20800: Ran 100 train steps in 44.08 secs
Step  20800: train CrossEntropyLoss |  1.23405206
Step  20800: eval  CrossEntropyLoss |  1.26914660
Step  20800: eval          Accuracy |  0.59417073


In [44]:
def test_model(preds, target):
    """Function to test the model.

    Args:
        preds (jax.interpreters.xla.DeviceArray): Predictions of a list of batches of tensors corresponding to lines of text.
        target (jax.interpreters.xla.DeviceArray): Actual list of batches of tensors corresponding to lines of text.

    Returns:
        float: log_perplexity of the model.
    """
    ### START CODE HERE (Replace instances of 'None' with your code) ###
    total_log_ppx = np.sum(preds * tl.one_hot(target, preds.shape[-1]),axis= -1) # HINT: tl.one_hot() should replace one of the Nones

    non_pad = 1.0 - np.equal(target, 0)          # You should check if the target equals 0
    ppx = total_log_ppx * non_pad                       # Get rid of the padding

    log_ppx = np.sum(ppx) / np.sum(non_pad)
    ### END CODE HERE ###
    
    return -log_ppx

In [45]:
model = GRULM()
batch_size =32
max_length=64
model.init_from_file('model/model.pkl.gz')
for x in range(10):
    batch = next(batch_data_generator(batch_size, max_length, lines))
    preds = model(batch[0])
    log_ppx = test_model(preds, batch[1])
    print('The log perplexity and perplexity of your model are respectively', log_ppx, np.exp(log_ppx))

The log perplexity and perplexity of your model are respectively 1.2325175 3.4298532
The log perplexity and perplexity of your model are respectively 1.2144725 3.368517
The log perplexity and perplexity of your model are respectively 1.1800188 3.2544353
The log perplexity and perplexity of your model are respectively 1.2031776 3.3306837
The log perplexity and perplexity of your model are respectively 1.2331581 3.4320512
The log perplexity and perplexity of your model are respectively 1.2275819 3.4129665
The log perplexity and perplexity of your model are respectively 1.3284045 3.7750158
The log perplexity and perplexity of your model are respectively 1.1782154 3.2485716
The log perplexity and perplexity of your model are respectively 1.2500899 3.4906566
The log perplexity and perplexity of your model are respectively 1.1415116 3.1314983
