In [1]:
#Seq2Seq

In [2]:
import numpy as np        #For Mathematical Operations
import tensorflow as tf   #For ML
import os #For fetching from directory

In [3]:
#Helper class to generate random batch of different sequence lengths
class Helper(object):
    
    def __init__(self, batch_size):
        self.batch_size = batch_size
        
    def generate_batch(self):
        batch=[]
        for _ in range(self.batch_size):
            size = np.random.randint(low=5,high=8)
            batch.append(np.random.randint(low=0,high=10,size=size))
        max_len = np.max([len(seq) for seq in batch ])
        return batch,max_len

In [4]:
#Reset graph
tf.reset_default_graph()

In [5]:
#Set Session
sess = tf.InteractiveSession()

In [6]:
#Constants
PAD = 0  #Padding at the end of each sequence
EOS = 1  #Token indicating end of sequence
n_batches = 3000 #Number of batches in epoch
batch_size= 50 #Batch_size
enc_vocab_size = 10 # vocab size for encoder inputs
dec_vocab_size = enc_vocab_size*2 - 1
embed_size = 20 #embedding size
encoder_hidden_units = 20 #Number of encoder hidden units
decoder_hidden_units = encoder_hidden_units #Number of decoder hidden units


In [7]:
#Define placeholders
with tf.variable_scope('placeholders'):
    encoder_inputs = tf.placeholder(shape=(batch_size,None),dtype=tf.int32,
                                   name="encoder_inputs")
    decoder_inputs = tf.placeholder(shape=(batch_size,None),dtype=tf.int32,
                                   name="decoder_inputs")
    decoder_targets = tf.placeholder(shape=(batch_size,None),dtype=tf.int32,
                                    name='decoder_targets')

In [8]:
#Define embeddings
with tf.name_scope('embeddings'):
    enc_embed_matrix = tf.Variable(tf.random_uniform((enc_vocab_size,embed_size),-1,1),
                               dtype=tf.float32,name="enc_embed_matrix")
    dec_embed_matrix = tf.Variable(tf.random_uniform((dec_vocab_size,embed_size),-1,1),
                               dtype=tf.float32,name="dec_embed_matrix")
    encoder_embeddings = tf.nn.embedding_lookup(enc_embed_matrix,encoder_inputs)
    decoder_embeddings = tf.nn.embedding_lookup(dec_embed_matrix,decoder_inputs)

In [9]:
#Define encoder
with tf.variable_scope('encoder'):
    encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(encoder_hidden_units)
    encoder_initial_state = encoder_cell.zero_state(batch_size,tf.float32)
    encoder_outputs,encoder_states = tf.nn.dynamic_rnn(cell=encoder_cell,
                                                       inputs=encoder_embeddings,
                                                       initial_state=encoder_initial_state, 
                                                      )

In [10]:
#Define decoder
with tf.variable_scope('decoder'):
    decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(decoder_hidden_units)
    decoder_initial_state = encoder_states
    decoder_outputs,decoder_states = tf.nn.dynamic_rnn(cell=decoder_cell,
                                                      inputs=decoder_embeddings,
                                                      initial_state=decoder_initial_state)

In [11]:
#Scores
decoder_logits = tf.contrib.layers.fully_connected(decoder_outputs,dec_vocab_size)

In [12]:
#Softmax entropy for scores
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=decoder_logits,
                                                 labels=tf.cast(
                                                     tf.one_hot(decoder_targets,dec_vocab_size),
                                                     tf.float32))

In [13]:
#Decoder predictions
decoder_prediction = tf.argmax(decoder_logits,2)

In [14]:
#Define loss
loss = tf.reduce_mean(entropy)

In [15]:
with tf.name_scope('summaries'):
    tf.summary.scalar('loss',loss)
    tf.summary.histogram('loss', loss)
    summary_op = tf.summary.merge_all()

In [16]:
#Optimizer with default learning rate
optimizer = tf.train.AdamOptimizer().minimize(loss)

In [17]:
#Create Helper object
helper = Helper(batch_size)

In [18]:
#Function to generate inputs for training seq2seq
def next():
    batch,max_len = helper.generate_batch()
    encoder_inputs_ = [np.append(np.append(seq,[EOS]),[PAD]*(max_len-len(seq))) for seq in batch]
    decoder_inputs_ = [np.append(np.append([EOS],seq*2),[PAD]*(max_len-len(seq))) for seq in batch]
    decoder_targets_ = [np.append(np.append(seq*2,[EOS]),[PAD]*(max_len-len(seq))) for seq in batch]
#     print(encoder_inputs_,decoder_inputs_,decoder_targets_)
    return {encoder_inputs:encoder_inputs_,
           decoder_inputs:decoder_inputs_,
           decoder_targets:decoder_targets_}

In [19]:
sess.run(tf.global_variables_initializer())
loss_sum = 0.0
batches_in_epoch = 1000 
saver = tf.train.Saver()
#For tensorboard visualizations
writer = tf.summary.FileWriter('/graphs/seq2seq', sess.graph)
#Check if checkpoint present 
ckpt = tf.train.get_checkpoint_state(os.path.dirname('/checkpoints/seq2seq/checkpoint'))
#Restore the latest checkpoint if present
if ckpt and ckpt.model_checkpoint_path:
    saver.restore(sess, ckpt.model_checkpoint_path)
try:
    for batch in range(n_batches):
        fd = next()
        _, loss_val,summary = sess.run([optimizer, loss,summary_op], fd)
        loss_sum += loss_val

        if batch == 0 or batch % batches_in_epoch == 0:
            print('batch {}'.format(batch))
            print('  minibatch loss: {}'.format(loss_val))
            predict_ = sess.run(decoder_prediction, fd)
            for i, (inp,dec, pred) in enumerate(zip(fd[encoder_inputs],fd[decoder_inputs], predict_)):
                print('  sample {}:'.format(i + 1))
                print('    input     > {}'.format(inp))
                print('    decoder input  > {}'.format(dec))
                print('    predicted > {}'.format(pred))
                if i >= 2:
                    break
                print()
            saver.save(sess, '/checkpoints/seq2seq/seq2seq1', batch)
               
except KeyboardInterrupt:
    print('training interrupted')

batch 0
  minibatch loss: 2.949699640274048
  sample 1:
    input     > [4 6 8 7 7 7 7 5 3 4 9 6 8 2 2 8 9 1 0 0]
    decoder input  > [ 1  8 12 16 14 14 14 14 10  6  8 18 12 16  4  4 16 18  0  0]
    predicted > [ 8  3  2  5  6 17 17 17 12 17  5 17 12 11 17 17  5 17 17  5]

  sample 2:
    input     > [6 1 9 0 1 5 3 7 9 4 5 5 5 6 3 1 0 0 0 0]
    decoder input  > [ 1 12  2 18  0  2 10  6 14 18  8 10 10 10 12  6  0  0  0  0]
    predicted > [11  8  7 12 12 15 15  0 17 12 17  0  0  0  0  0  0 14 14 14]

  sample 3:
    input     > [ 6.  8.  8.  8.  3.  6.  4.  3.  1.  3.  8.  6.  7.  7.  8.  0.  9.  1.
  1.  1.]
    decoder input  > [  1.  12.  16.  16.  16.   6.  12.   8.   6.   2.   6.  16.  12.  14.  14.
  16.   0.  18.   2.   2.]
    predicted > [ 4  4  6  6  6  6  6  5  3 15  9 18  7  7 17  6  5 17  7  7]
batch 1000
  minibatch loss: 1.5652778148651123
  sample 1:
    input     > [8 6 2 7 5 7 8 5 8 6 1 5 2 1 0 0 0 0 0 0]
    decoder input  > [ 1 16 12  4 14 10 14 16 10 16 12  2 10 