# Lab 12-3 many to many
### simple pos-tagger training 
* many to many
* variable input sequence length

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.nn import embedding_lookup
from tensorflow.nn.rnn_cell import *
from tensorflow.nn import dynamic_rnn
from tensorflow.keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt
%matplotlib inline

print(tf.__version__)

1.12.0


### Prepairing dataset

In [2]:
sentences = [['I', 'feel', 'hungry'],
     ['tensorflow', 'is', 'very', 'difficult'],
     ['tensorflow', 'is', 'a', 'framework', 'for', 'deep', 'learning'],
     ['tensorflow', 'is', 'very', 'fast', 'changing']]
pos = [['pronoun', 'verb', 'adjective'],
     ['noun', 'verb', 'adverb', 'adjective'],
     ['noun', 'verb', 'determiner', 'noun', 'preposition', 'adjective', 'noun'],
     ['noun', 'verb', 'adverb', 'adjective', 'verb']]

### Preprocessing dataset

In [3]:
# word의 dictionary
word_list = sum(sentences, [])
word_list = sorted(set(word_list))
word_list = ['<pad>'] + word_list
word2idx = {word : idx for idx, word in enumerate(word_list)}
idx2word = {idx : word for idx, word in enumerate(word_list)}

print(word2idx)
print(idx2word)
print(len(idx2word))

{'<pad>': 0, 'I': 1, 'a': 2, 'changing': 3, 'deep': 4, 'difficult': 5, 'fast': 6, 'feel': 7, 'for': 8, 'framework': 9, 'hungry': 10, 'is': 11, 'learning': 12, 'tensorflow': 13, 'very': 14}
{0: '<pad>', 1: 'I', 2: 'a', 3: 'changing', 4: 'deep', 5: 'difficult', 6: 'fast', 7: 'feel', 8: 'for', 9: 'framework', 10: 'hungry', 11: 'is', 12: 'learning', 13: 'tensorflow', 14: 'very'}
15


In [4]:
# pos (part of speech)의 dictionary
pos_list = sum(pos, [])
pos_list = sorted(set(pos_list))
pos_list = ['<pad>'] + pos_list
pos2idx = {pos : idx for idx, pos in enumerate(pos_list)}
idx2pos = {idx : pos for idx, pos in enumerate(pos_list)}

print(pos2idx)
print(idx2pos)
print(len(pos2idx))

{'<pad>': 0, 'adjective': 1, 'adverb': 2, 'determiner': 3, 'noun': 4, 'preposition': 5, 'pronoun': 6, 'verb': 7}
{0: '<pad>', 1: 'adjective', 2: 'adverb', 3: 'determiner', 4: 'noun', 5: 'preposition', 6: 'pronoun', 7: 'verb'}
8


In [5]:
max_sequence = 10
x_data = list(map(lambda sentence : [word2idx.get(token) for token in sentence], sentences))
x_data = pad_sequences(sequences = x_data, maxlen = max_sequence,padding='post')
x_data_mask = ((x_data != 0) * 1).astype(np.float32)
x_data_len = np.array(list(map(lambda sentence : len(sentence), sentences)))

y_data = list(map(lambda sentence : [pos2idx.get(token) for token in sentence], pos))
y_data = pad_sequences(sequences = y_data, maxlen = max_sequence,padding='post')
# y_data = np.expand_dims(y_data, -1)
# y_data = np.apply_along_axis(lambda row : to_categorical(row), -1, y_data)

### Creating model

In [6]:
class Model:
    def __init__(self, x, x_len, y, hidden_dim, num_classes, max_sequence, dic):
        # input
        self.x = x
        self.x_len = x_len
        self.y = y
        self.dic = dic
        
        # token representation
        one_hot = tf.Variable(np.eye(len(self.dic)), dtype=tf.float32, trainable=False)
        x_batch = embedding_lookup(params=one_hot, ids=self.x)        
        
        # rnn
        rnn_cell = BasicRNNCell(num_units=hidden_dim, activation=tf.nn.tanh)
        score_cell = tf.contrib.rnn.OutputProjectionWrapper(cell=rnn_cell, output_size=num_classes)
        self.outputs, _= dynamic_rnn(cell=score_cell, inputs=x_batch, sequence_length=self.x_len,
                                dtype=tf.float32)
        
        # loss
        masking = tf.sequence_mask(lengths=self.x_len, maxlen=max_sequence, dtype=tf.float32)
        self.sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=self.outputs, targets=y,
                                                              weights=masking)
        
        # prediction
        self.prediction = tf.argmax(input=self.outputs, axis=-1)
        
    def predict(self, sess, x, x_len):
        feed_prediction = {self.x : x, self.x_len : x_len}
        return sess.run(self.prediction, feed_dict=feed_prediction)

### Training model

In [7]:
# Creating a training operation
lr = .1
epochs = 30
batch_size = 2

# generating data pipeline
tr_dataset = tf.data.Dataset.from_tensor_slices((x_data,y_data,x_data_len))
tr_dataset = tr_dataset.shuffle(buffer_size = 4)
tr_dataset = tr_dataset.batch(batch_size = 2)

print(tr_dataset)
tr_iterator = tr_dataset.make_initializable_iterator()
x_mb, y_mb, x_mb_len = tr_iterator.get_next()

# creating model
model = Model(x=x_mb, x_len=x_mb_len, y=y_mb, hidden_dim=10, num_classes=len(pos2idx),
              max_sequence=10, dic=word2idx)

<BatchDataset shapes: ((?, 10), (?, 10), (?,)), types: (tf.int32, tf.int32, tf.int64)>
Instructions for updating:
This class is equivalent as tf.keras.layers.SimpleRNNCell, and will be replaced by that in Tensorflow 2.0.


In [8]:
opt = tf.train.AdamOptimizer(learning_rate = lr)
training_op = opt.minimize(loss=model.sequence_loss)

In [9]:
sess_config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
sess = tf.Session(config = sess_config)
sess.run(tf.global_variables_initializer())

tr_loss_hist = []

for epoch in range(epochs):
    avg_tr_loss = 0
    tr_step = 0
    
    # training
    sess.run(tr_iterator.initializer)
    try:
        while True:
            _, tr_loss = sess.run([training_op, model.sequence_loss])
            avg_tr_loss += tr_loss
            tr_step += 1
    except tf.errors.OutOfRangeError:
        avg_tr_loss /= tr_step
        
    if (epoch + 1) % 5 == 0:
        print('epoch : {:3}, tr_loss : {:.3f}'.format(epoch+1, avg_tr_loss))

epoch :   5, tr_loss : 0.247
epoch :  10, tr_loss : 0.018
epoch :  15, tr_loss : 0.004
epoch :  20, tr_loss : 0.001
epoch :  25, tr_loss : 0.001
epoch :  30, tr_loss : 0.001


### Checking performance

In [10]:
yhat = model.predict(sess, x_data, x_data_len)

In [11]:
yhat

array([[6, 7, 1, 0, 0, 0, 0, 0, 0, 0],
       [4, 7, 2, 1, 0, 0, 0, 0, 0, 0],
       [4, 7, 3, 4, 5, 1, 4, 0, 0, 0],
       [4, 7, 2, 1, 7, 0, 0, 0, 0, 0]])

In [12]:
y_data

array([[6, 7, 1, 0, 0, 0, 0, 0, 0, 0],
       [4, 7, 2, 1, 0, 0, 0, 0, 0, 0],
       [4, 7, 3, 4, 5, 1, 4, 0, 0, 0],
       [4, 7, 2, 1, 7, 0, 0, 0, 0, 0]], dtype=int32)