# Lab 12-4 many to many variable
### simple pos-tagger training 
* many to many
* variable input sequence length

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow import keras
from tensorflow.keras.utils import to_categorical

print(tf.__version__)
tf.enable_eager_execution()

1.12.0


### Prepairing dataset

In [2]:
sentences = [['I', 'feel', 'hungry'],
     ['tensorflow', 'is', 'very', 'difficult'],
     ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],
     ['tensorflow', 'is', 'very', 'fast', 'changing']]
pos = [['pronoun', 'verb', 'adjective'],
     ['noun', 'verb', 'adverb', 'adjective'],
     ['noun', 'verb', 'determiner', 'noun', 'preposition', 'adjective', 'noun'],
     ['noun', 'verb', 'adverb', 'adjective', 'verb']]

### Preprocessing dataset

In [3]:
# word의 dictionary
word_list = sum(sentences, [])
word_list = sorted(set(word_list))
word_list = ['<pad>'] + word_list
word2idx = {word : idx for idx, word in enumerate(word_list)}
idx2word = {idx : word for idx, word in enumerate(word_list)}

print(word2idx)
print(idx2word)
print(len(idx2word))

{'<pad>': 0, 'I': 1, 'a': 2, 'changing': 3, 'deep': 4, 'difficult': 5, 'fast': 6, 'feel': 7, 'for': 8, 'framework': 9, 'hungry': 10, 'is': 11, 'learning': 12, 'tensorflow': 13, 'very': 14}
{0: '<pad>', 1: 'I', 2: 'a', 3: 'changing', 4: 'deep', 5: 'difficult', 6: 'fast', 7: 'feel', 8: 'for', 9: 'framework', 10: 'hungry', 11: 'is', 12: 'learning', 13: 'tensorflow', 14: 'very'}
15


In [4]:
# pos (part of speech)의 dictionary
pos_list = sum(pos, [])
pos_list = sorted(set(pos_list))
pos_list = ['<pad>'] + pos_list
pos2idx = {pos : idx for idx, pos in enumerate(pos_list)}
idx2pos = {idx : pos for idx, pos in enumerate(pos_list)}

print(pos2idx)
print(idx2pos)
print(len(pos2idx))

{'<pad>': 0, 'adjective': 1, 'adverb': 2, 'determiner': 3, 'noun': 4, 'preposition': 5, 'pronoun': 6, 'verb': 7}
{0: '<pad>', 1: 'adjective', 2: 'adverb', 3: 'determiner', 4: 'noun', 5: 'preposition', 6: 'pronoun', 7: 'verb'}
8


In [5]:
max_sequence = 10
x_data = list(map(lambda sentence : [word2idx.get(token) for token in sentence], sentences))
x_data = pad_sequences(sequences = x_data, maxlen = max_sequence, padding='post')
x_data_len = list(map(lambda sentence : [len(sentence)], sentences))

y_data = list(map(lambda sentence : [pos2idx.get(token) for token in sentence], pos))
y_data = pad_sequences(sequences = y_data, maxlen = max_sequence, padding='post')
y_data = np.apply_along_axis(lambda row : to_categorical(row), -1, y_data)

In [6]:
# input
tr_dataset = tf.data.Dataset.from_tensor_slices((x_data, y_data, x_data_len))
tr_dataset = tr_dataset.shuffle(buffer_size=4)
tr_dataset = tr_dataset.batch(batch_size = 2)

print(tr_dataset)

<BatchDataset shapes: ((?, 10), (?, 10, 8), (?, 1)), types: (tf.int32, tf.float32, tf.int32)>


### Creating model

In [7]:
num_classes = len(pos2idx)
hidden_dim = 10

input_dim = len(word2idx)
output_dim = len(word2idx)
one_hot = np.eye(len(word2idx))

model = keras.Sequential()
model.add(keras.layers.Embedding(input_dim=input_dim, output_dim=output_dim, mask_zero=True,
                                 trainable=False, input_length=max_sequence,
                                 embeddings_initializer=keras.initializers.Constant(one_hot)))
model.add(keras.layers.SimpleRNN(units=hidden_dim, return_sequences=True))
model.add(keras.layers.TimeDistributed(keras.layers.Dense(units=num_classes, activation='softmax')))

In [8]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 10, 15)            225       
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 10, 10)            260       
_________________________________________________________________
time_distributed (TimeDistri (None, 10, 8)             88        
Total params: 573
Trainable params: 348
Non-trainable params: 225
_________________________________________________________________


### Training model

In [9]:
def loss_fn(model, x, y, x_len, max_sequence):
    masking = tf.squeeze(tf.sequence_mask(x_len, maxlen=max_sequence, dtype=tf.float32), axis=1)
    valid_time_step = tf.cast(tf.squeeze(x_len, axis=1),dtype=tf.float32)
    sequnce_loss = keras.losses.categorical_crossentropy(y_true=y, y_pred=model(x)) * masking
    sequnce_loss = tf.reduce_sum(sequnce_loss, axis=-1) / valid_time_step
    sequnce_loss = tf.reduce_mean(sequnce_loss)
    return sequnce_loss

In [10]:
lr = 0.1
epochs = 30
opt = tf.train.AdamOptimizer(learning_rate = lr)

In [11]:
for epoch in range(epochs):
    avg_tr_loss = 0
    tr_step = 0
    
    for x_mb, y_mb, x_mb_len in tr_dataset:
        with tf.GradientTape() as tape:
            tr_loss = loss_fn(model, x=x_mb, y=y_mb, x_len=x_mb_len, max_sequence=max_sequence)
        grads = tape.gradient(target=tr_loss, sources=model.variables)
        opt.apply_gradients(grads_and_vars=zip(grads, model.variables))
        avg_tr_loss += tr_loss
        tr_step += 1
    else:
        avg_tr_loss /= tr_step
    if (epoch + 1) % 5 == 0:
        print('epoch : {:3}, tr_loss : {:.3f}'.format(epoch + 1, avg_tr_loss))

epoch :   5, tr_loss : 0.322
epoch :  10, tr_loss : 0.046
epoch :  15, tr_loss : 0.008
epoch :  20, tr_loss : 0.003
epoch :  25, tr_loss : 0.002
epoch :  30, tr_loss : 0.001


### Checking performance

In [12]:
yhat = model.predict(x_data)
yhat = np.argmax(yhat, axis=-1)
print(yhat)

[[6 7 1 1 1 1 1 1 1 1]
 [4 7 2 1 1 1 1 1 1 1]
 [4 7 3 4 5 1 4 4 4 4]
 [4 7 2 1 7 7 7 7 7 7]]


In [13]:
print(x_data_len)

[[3], [4], [7], [5]]


In [14]:
np.argmax(y_data, axis=-1)

array([[6, 7, 1, 0, 0, 0, 0, 0, 0, 0],
       [4, 7, 2, 1, 0, 0, 0, 0, 0, 0],
       [4, 7, 3, 4, 5, 1, 4, 0, 0, 0],
       [4, 7, 2, 1, 7, 0, 0, 0, 0, 0]])