In [82]:
# https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767
DEBUG = True

import re, random, math, csv, io, string, itertools, sys
import numpy as np
# import pandas as pd
import tensorflow as tf

In [2]:
hp = dict(
    n_layers = 2,
    hidden_size = 512,
    fc_size = 512,
    dropout = 0.9,
    batch_size = 20,
    lr = 0.001,
    lr_decay = 0.9999,
    min_lr = 0.00001,
    grad_clip = 5.,
    cuda = False,
    num_epoch = 5,
    max_length = 10
)

In [3]:
class Voc:
    SOS = "!"
    EOS = "#"
    SOS_ID = 0
    EOS_ID = 1
    def __init__(self):
        self.word2index = {self.SOS:0, self.EOS:1}
        self.word2count = {}
        self.index2word = {0:self.SOS, 1:self.EOS}
        self.n_words = 2 # Count SOS and EOS

    def index_words(self, sentence):
        for word in sentence.split(' '):
            self.index_word(word)

    def index_word(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1
    
def string2indicies(voc, text):
    return [voc.word2index[c] for c in text]
    
def indicies2string(voc, indicies):
    return "".join([voc.index2word[i] for i in indicies])

voc = Voc()

for c in itertools.chain(range(ord('a'), ord('z')+1),range(ord('A'),ord('Z')+1),(ord(" "),)):
    voc.index_word(chr(c))

In [4]:
class PairGenerator:
#     vocabulary = [chr(i) for i in itertools.chain(range(ord('a'), ord('z')+1),range(ord('A'),ord('Z')+1))]
    word_len_interval = {"a":2,"b":7}
    sent_len_interval = {"a":1,"b":10}
    
    def __init__(self,voc):
        self.voc = voc
        self.vocabulary = [c for c in voc.word2index.keys() if c not in {voc.SOS, voc.EOS}]
        
    def gen_word_pair(self):
        word_len = int(random.uniform(**self.word_len_interval))
        word = np.random.choice(self.vocabulary,word_len)
        return "".join(word), "".join(list(reversed(word)))
    
    def gen_pair(self):
        num_words = int(random.uniform(**self.sent_len_interval))
        inp, out = zip(*[self.gen_word_pair() for _ in range(num_words)])
        return self.voc.SOS+" ".join(inp)+self.voc.EOS, self.voc.SOS+" ".join(out)+self.voc.EOS
    
    def gen_batch(self, n_unrollings):
        inp, out = zip(*[self.gen_pair() for _ in range(n_unrollings)])
        return inp, out
    
    def gen_int_batch(self, n_unrollings):
        inp, out = zip(*[self.gen_pair() for _ in range(n_unrollings)])
        
        return \
            [string2indicies(self.voc, x) for x in inp], \
            [string2indicies(self.voc, x) for x in out]
        
pg = PairGenerator(voc)

In [5]:
def generate_data(n_unrollings=100, echo_step=2, batch_size=5):
    x = np.array(np.random.choice(2, n_unrollings, p=[0.5, 0.5]))
    y = np.roll(x, echo_step)
    y[0:echo_step] = 0

    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
    y = y.reshape((batch_size, -1))

    return (x, y)

In [103]:
class GRU:
    hp = dict(
        state_sz=5,
        n_classes=2,
        input_dim=2,
        ckpt_path="./checkpoints/"
    )
    def __init__(self, **hyper_parameters):
        if hyper_parameters is not None:
            for k, v in hyper_parameters.items():
                self.hp[k] = v
        self.__graph__()
    
    def _init(self):
        self.x = tf.placeholder(tf.int32, [None, None], "x") #batch*voc
        self.y = tf.placeholder(tf.int32, [None], "y") #batch*voc
        emb = tf.placeholder(tf.int32, [None, None],)
        
        # batch_sz*seq_len -> batch_sz*seq_ln*voc_sz
        embs = tf.get_variable('emb', [self.hp['n_classes'], self.hp['state_sz']]) 
        self.rnn_inputs = tf.nn.embedding_lookup(embs, x)
        
        #batch*state_sz
        self.init_state = tf.placeholder(tf.float32, [None, self.hp['state_sz']], "init_state")
    
    def _weights(self):
        self.wz = tf.get_variable(
            "wz", shape=[self.hp['state_sz'], self.hp['state_sz']],
            initializer=tf.contrib.layers.xavier_initializer()
        )
        self.uz = tf.get_variable(
            "uz", shape=[self.hp['state_sz'], self.hp['state_sz']],
            initializer=tf.contrib.layers.xavier_initializer()
        )
        self.bz = tf.get_variable(
            "bz", shape=[self.hp['state_sz']],
            initializer=tf.constant_initializer(0.)
        )
        
        self.wr = tf.get_variable(
            "wr",  shape=[self.hp['state_sz'], self.hp['state_sz']],
            initializer=tf.contrib.layers.xavier_initializer()
        )
        self.ur = tf.get_variable(
            "ur",  shape=[self.hp['state_sz'], self.hp['state_sz']],
            initializer=tf.contrib.layers.xavier_initializer()
        )
        self.br = tf.get_variable(
            "br", shape=[self.hp['state_sz']],
            initializer=tf.constant_initializer(0.)
        )
        
        self.wh = tf.get_variable(
            "wh",  shape=[self.hp['state_sz'], self.hp['state_sz']],
            initializer=tf.contrib.layers.xavier_initializer()
        )
        self.uh = tf.get_variable(
            "uh",  shape=[self.hp['state_sz'], self.hp['state_sz']],
            initializer=tf.contrib.layers.xavier_initializer()
        )
        self.bh = tf.get_variable(
            "bh", shape=[self.hp['state_sz']],
            initializer=tf.constant_initializer(0.)
        )
        
        # layer to decode results of GRU 
        self.wo= tf.get_variable(
            'wo', shape=[self.hp['state_sz'], self.hp['n_classes']], 
            initializer=tf.contrib.layers.xavier_initializer()
        )
        self.bo = tf.get_variable(
            'bo', shape=[self.hp['n_classes']], 
            initializer=tf.constant_initializer(0.)
        )
    
    def __graph__(self):
        # time cycle step
        def step(prev_state, x):
            z = tf.sigmoid(
                tf.matmul(x,self.wz) + tf.matmul(prev_state, self.uz) + self.bz
            )
            r = tf.sigmoid(
                tf.matmul(x,self.wr) + tf.matmul(prev_state, self.ur) + self.br
            )
            h = tf.tanh(
                tf.matmul(x,self.wh) + tf.matmul(r*prev_state, self.uh) + self.bh
            )
            return (1-z)*prev_state + z*h
        
        tf.reset_default_graph()
        self._init()
        self._weights()
        states = tf.scan(
            step,
            #batch_sz*seq_ln*voc_sz -> seq_len*batch_sz*voc_sz
            tf.transpose(self.rnn_inputs,[1,0,2]),
            initializer=self.init_state
        )
        # seq_len*batch_sz*voc_sz -> batch_sz*seq_ln*voc_sz
        states = tf.transpose(states,[1,0,2])
        
        self.states_reshaped = tf.reshape(states, [-1, self.hp['state_sz']])
        logits = tf.matmul(self.states_reshaped, self.wo) + self.bo
        
        self.last_state = states[-1]
        self.predictions = tf.nn.softmax(logits)
        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=self.y)
        )
        self.train_op = tf.train.AdagradOptimizer(learning_rate=0.2).minimize(self.loss)
        
    def train(self, batch_generator, n_epochs = 10):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for epoch_num in range(n_epochs):
                train_loss = 0
                try:
                    x,y = batch_generator()
                    batch_sz = x.shape[0]
                    
                    tmp = sess.run(
                        [self.states_reshaped], 
                        feed_dict = {
                            self.x : x,
                            self.y : y.flatten(),
                            self.init_state : np.zeros(
                                [batch_sz, self.hp['state_sz']]
                            )
                        }
                    )
                    sys.stdout.write(f"{np.shape(a=tmp)} ")
#                     _, train_loss_, tmp = sess.run(
#                         [self.train_op, self.loss,self.states_reshaped], 
#                         feed_dict = {
#                             self.x : x,
#                             self.y : y.flatten(),
#                             self.init_state : np.zeros(
#                                 [batch_sz, self.hp['state_sz']]
#                             )
#                         }
#                     )
#                     sys.stdout.write(f"{train_loss_:.3f} {tmp.shape} ")
                except KeyboardInterrupt as ex:
                    print("Interrupted by user at")
                saver = tf.train.Saver()
                saver.save(sess, self.hp['ckpt_path'] + "nn.mdl", global_step=epoch_num)

In [104]:
gru = GRU()
gru.train(lambda: generate_data(n_unrollings=500,batch_size=5))

(1, 1000, 5) (1, 1000, 5) (1, 1000, 5) (1, 1000, 5) (1, 1000, 5) (1, 1000, 5) (1, 1000, 5) (1, 1000, 5) (1, 1000, 5) (1, 1000, 5) 

In [67]:
x,y=generate_data(n_unrollings=1000)

In [68]:
x.shape

(5, 200)

In [69]:
y.shape

(5, 200)