In [1]:
#import libaries

import tensorflow as tf
import numpy as np
import pandas as pd
import re
import os
import time

In [2]:
#open abstracts and read file

with open('../input/arvix_abstracts.txt') as sh:
    abst = sh.read()



In [3]:
#creating set of all charcaters

character_set = sorted(set(abst))

#dictionary mapping characters to integers
dict_int = {term :num for num, term in enumerate(character_set)}

#create array of entire abstracts

encoded = np.array([dict_int[word] for word in abst], dtype = np.int32)

len(encoded)

7540200

In [4]:
#print raw text and array encoding
len(character_set)

print(abst[:400])

print(encoded[:400])

In science and engineering, intelligent processing of complex signals such as images, sound or language is often performed by a parameterized hierarchy of nonlinear processing layers, sometimes biologically inspired. Hierarchical systems (or, more generally, nested systems) offer a way to generate complex mappings using simple stages. Each layer performs a different operation and achieves an ever 
[35 67  1 72 56 62 58 67 56 58  1 54 67 57  1 58 67 60 62 67 58 58 71 62
 67 60  9  1 62 67 73 58 65 65 62 60 58 67 73  1 69 71 68 56 58 72 72 62
 67 60  1 68 59  1 56 68 66 69 65 58 77  1 72 62 60 67 54 65 72  1 72 74
 56 61  1 54 72  1 62 66 54 60 58 72  9  1 72 68 74 67 57  1 68 71  1 65
 54 67 60 74 54 60 58  1 62 72  1 68 59 73 58 67  1 69 58 71 59 68 71 66
 58 57  1 55 78  1 54  1 69 54 71 54 66 58 73 58 71 62 79 58 57  1 61 62
 58 71 54 71 56 61 78  1 68 59  1 67 68 67 65 62 67 58 54 71  1 69 71 68
 56 58 72 72 62 67 60  1 65 54 78 58 71 72  9  1 72 68 66 58 73 62 66 58
 72  1 55 62 68

In [5]:
#Creatiung batches of x & y
graph = tf.Graph()

def create_batches(encoded, batch_size, character_length):
    
        character_per_batch = batch_size * character_length
    
        num_batches = len(encoded)//character_per_batch
    
    
        encoded = encoded[: (num_batches * character_per_batch)]
    
        encoded = encoded.reshape(batch_size, -1)
    
    
        #Split encoded into x & y set
    
        for step in range(0, encoded.shape[1], character_length):
        
            x = encoded[:, step : step + character_length]
        
            y = np.zeros(x.shape, dtype = x.dtype)
        
            y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
        
            yield x, y
        
        

In [6]:
batches = create_batches(encoded, batch_size = 5, character_length = 8)

In [7]:
x, y = next(batches)

print(x)

print(y)

[[35 67  1 72 56 62 58 67]
 [ 1 73 68 68 65 72  1 74]
 [58 54 71 67 62 67 60  1]
 [69 73 62 66 62 79 54 73]
 [67 78  1 69 71 54 56 73]]
[[67  1 72 56 62 58 67 35]
 [73 68 68 65 72  1 74  1]
 [54 71 67 62 67 60  1 58]
 [73 62 66 62 79 54 73 69]
 [78  1 69 71 54 56 73 67]]


In [8]:
#input placeholders

def tensor_inputs(batch_size, character_length):
    
    inputs = tf.placeholder(tf.int32, [batch_size, character_length])
    
    output = tf.placeholder(tf.int32, [batch_size, character_length])
    
    keep_prob = tf.placeholder(tf.float32)
    
    return inputs, output, keep_prob


In [9]:
#Building LSTM cells

def LSTM_cell(lstmsize, batch_size, keep_prob, num_layers):
    
    def build_cell(lstm_size, keep_prob):
    
        lstm = tf.contrib.rnn.BasicLSTMCell(lstmsize)
    
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob = keep_prob)
        
        return drop

    
    multi_lstm = tf.contrib.rnn.MultiRNNCell([build_cell(lstmsize, keep_prob) for _ in range(num_layers)])
    
    initial_state = multi_lstm.zero_state(batch_size, tf.float32)
    
    return multi_lstm, initial_state
    
    

In [10]:
#functions to calculate logits using inputs and lstm outputs


def logits(lstmoutput, lstmsize, outclasses_size):
    
    lstm_batch_list = tf.concat(lstmoutput, axis = -1)
    
    lstm_output = tf.reshape(lstm_batch_list, [-1, lstmsize])
    
    
    with tf.variable_scope('softmax'):
        
        softmax_w = tf.get_variable('softmax_w', [lstmsize, outclasses_size], 
                                    initializer = tf.contrib.layers.xavier_initializer(seed = 1))
        
        softmax_b = tf.get_variable('softmax_b', [outclasses_size], initializer = tf.zeros_initializer())
            
        logits = tf.add(tf.matmul(lstm_output, softmax_w), softmax_b)
        
        predictions = tf.nn.softmax(logits)
        
        return logits, predictions
    
    

In [11]:
#softmax cross entropy loss 

def loss(logits, targets, classes):

        
    y_one_hot = tf.one_hot(targets, classes)
        
    y = tf.reshape(y_one_hot, logits.get_shape())
        
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = y))
        
    return loss

In [12]:
#defining optimizers for training

def optimizer(learning_rate, grad_clip, loss):
    
    tvars = tf.trainable_variables()
    
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    
    train_op = tf.train.AdamOptimizer(learning_rate)
    
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

In [13]:
num_classes = len(character_set)

lstmsize = 780

batch_size = 64

character_length = 50
               
keep_prob = 0.8

num_layers = 2

learning_rate = 0.001

sampling = False

In [14]:
class build_arvix:
    
    def __init__(self, num_classes = len(character_set), lstmsize = lstmsize, 
                 batch_size = batch_size, character_length = character_length, 
                 keep_prob = keep_prob, num_layers = num_layers,
                 learning_rate = 0.1, sampling = False, grad_clip = 5):
        
        
        if sampling == True:
            
            batch_size, character_length = 1, 1
            
            
        else:
            
            batch_size, character_length = batch_size, character_length
            
            
        tf.reset_default_graph()
        
        #Build the input placeholders
        
        self.inputs, self.targets, self.keep_prob = tensor_inputs(batch_size, character_length)
        
        #Build the LSTM archictecture
        
        multi_lstm, self.initial_state = LSTM_cell(lstmsize, batch_size, self.keep_prob, num_layers)
        
        
        #Run inputs through LSTM Cell
        
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        outputs, final_state = tf.nn.dynamic_rnn(multi_lstm, x_one_hot, initial_state = self.initial_state)
        
        self.final_state = final_state
        
        
        #predictions using lstm run
        
        self.logits, self.predictions = logits(outputs, lstmsize, num_classes)
            
            
        #loss function & optimizer
        
        self.loss = loss(self.logits, self.targets, num_classes)
        
        self.optimizer = optimizer(learning_rate, grad_clip, self.loss)
        

In [15]:
#training code

arvix = build_arvix(num_classes, lstmsize, batch_size, character_length,
                keep_prob, num_layers, learning_rate, sampling = False, grad_clip = 5)


epochs = 50

print_every_n = 50

save_every_steps = 100

    
saver = tf.train.Saver(max_to_keep = 100)
    
    
with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    
    all_trainable_vars = tf.reduce_sum([tf.reduce_prod(v.shape) for v in tf.trainable_variables()])
        
    print(sess.run(all_trainable_vars))
    
    counter = 0
    
    for epoch in range(epochs):
        
        
        
        feed_state = sess.run([arvix.initial_state])
            
        for x, y in create_batches(encoded, batch_size, character_length):
            
            counter += 1
        
            start = time.time()
        
            feed = {arvix.inputs: x, arvix.targets : y, arvix.keep_prob: keep_prob, arvix.initial_state: feed_state}
        
            batch_loss, feed_state, _ = sess.run([arvix.loss, arvix.final_state, arvix.optimizer], feed_dict = feed)
        
        
            if (counter % print_every_n == 0):
            
                end = time.time()
            
                print('epoch: {}/{}....' .format(epoch, epochs), 
                      'training_step: {}....'. format(counter), 
                      'batch_loss: {:4f}....' .format(batch_loss), 
                      '{:4f} sec/batch' .format(end-start))
                
            
            #if (counter % print_every_n == 0):
            
                #saver.save(sess, "checkpoints/i{}_l{}_h{}.ckpt" .format (counter, lstmsize, 1))
                
            
            
    saver.save(sess, "checkpoints/i{}_l{}_h{}.ckpt" .format (counter, lstmsize, 1))
    
    
    
    
    
    
            
        
        
        
        
    


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

7630823
epoch: 0/50.... t

epoch: 1/50.... training_step: 3650.... batch_loss: 0.102355.... 0.075258 sec/batch
epoch: 1/50.... training_step: 3700.... batch_loss: 0.103925.... 0.075031 sec/batch
epoch: 1/50.... training_step: 3750.... batch_loss: 0.111810.... 0.076310 sec/batch
epoch: 1/50.... training_step: 3800.... batch_loss: 0.100358.... 0.075227 sec/batch
epoch: 1/50.... training_step: 3850.... batch_loss: 0.096712.... 0.078383 sec/batch
epoch: 1/50.... training_step: 3900.... batch_loss: 0.100135.... 0.074508 sec/batch
epoch: 1/50.... training_step: 3950.... batch_loss: 0.096455.... 0.074539 sec/batch
epoch: 1/50.... training_step: 4000.... batch_loss: 0.102573.... 0.074327 sec/batch
epoch: 1/50.... training_step: 4050.... batch_loss: 0.099794.... 0.074052 sec/batch
epoch: 1/50.... training_step: 4100.... batch_loss: 0.089429.... 0.075745 sec/batch
epoch: 1/50.... training_step: 4150.... batch_loss: 0.094096.... 0.075433 sec/batch
epoch: 1/50.... training_step: 4200.... batch_loss: 0.080668.... 0.074409 se

epoch: 3/50.... training_step: 8550.... batch_loss: 0.067493.... 0.074084 sec/batch
epoch: 3/50.... training_step: 8600.... batch_loss: 0.069444.... 0.075144 sec/batch
epoch: 3/50.... training_step: 8650.... batch_loss: 0.069114.... 0.076025 sec/batch
epoch: 3/50.... training_step: 8700.... batch_loss: 0.070371.... 0.078218 sec/batch
epoch: 3/50.... training_step: 8750.... batch_loss: 0.072462.... 0.076491 sec/batch
epoch: 3/50.... training_step: 8800.... batch_loss: 0.074632.... 0.074667 sec/batch
epoch: 3/50.... training_step: 8850.... batch_loss: 0.053172.... 0.076638 sec/batch
epoch: 3/50.... training_step: 8900.... batch_loss: 0.076338.... 0.073676 sec/batch
epoch: 3/50.... training_step: 8950.... batch_loss: 0.077867.... 0.077133 sec/batch
epoch: 3/50.... training_step: 9000.... batch_loss: 0.076778.... 0.073642 sec/batch
epoch: 3/50.... training_step: 9050.... batch_loss: 0.073768.... 0.075918 sec/batch
epoch: 3/50.... training_step: 9100.... batch_loss: 0.070379.... 0.076270 se

epoch: 5/50.... training_step: 13400.... batch_loss: 0.064993.... 0.073269 sec/batch
epoch: 5/50.... training_step: 13450.... batch_loss: 0.062541.... 0.076193 sec/batch
epoch: 5/50.... training_step: 13500.... batch_loss: 0.065304.... 0.074013 sec/batch
epoch: 5/50.... training_step: 13550.... batch_loss: 0.063007.... 0.076067 sec/batch
epoch: 5/50.... training_step: 13600.... batch_loss: 0.054053.... 0.073713 sec/batch
epoch: 5/50.... training_step: 13650.... batch_loss: 0.059402.... 0.076068 sec/batch
epoch: 5/50.... training_step: 13700.... batch_loss: 0.055921.... 0.075742 sec/batch
epoch: 5/50.... training_step: 13750.... batch_loss: 0.058658.... 0.073449 sec/batch
epoch: 5/50.... training_step: 13800.... batch_loss: 0.062864.... 0.074556 sec/batch
epoch: 5/50.... training_step: 13850.... batch_loss: 0.059313.... 0.073543 sec/batch
epoch: 5/50.... training_step: 13900.... batch_loss: 0.062429.... 0.077976 sec/batch
epoch: 5/50.... training_step: 13950.... batch_loss: 0.053735....

epoch: 7/50.... training_step: 18250.... batch_loss: 0.055748.... 0.077394 sec/batch
epoch: 7/50.... training_step: 18300.... batch_loss: 0.064110.... 0.075281 sec/batch
epoch: 7/50.... training_step: 18350.... batch_loss: 0.059088.... 0.077505 sec/batch
epoch: 7/50.... training_step: 18400.... batch_loss: 0.055927.... 0.073638 sec/batch
epoch: 7/50.... training_step: 18450.... batch_loss: 0.057657.... 0.076480 sec/batch
epoch: 7/50.... training_step: 18500.... batch_loss: 0.057326.... 0.073614 sec/batch
epoch: 7/50.... training_step: 18550.... batch_loss: 0.059904.... 0.075296 sec/batch
epoch: 7/50.... training_step: 18600.... batch_loss: 0.055112.... 0.074013 sec/batch
epoch: 7/50.... training_step: 18650.... batch_loss: 0.061056.... 0.074326 sec/batch
epoch: 7/50.... training_step: 18700.... batch_loss: 0.063263.... 0.075399 sec/batch
epoch: 7/50.... training_step: 18750.... batch_loss: 0.065232.... 0.073840 sec/batch
epoch: 7/50.... training_step: 18800.... batch_loss: 0.061652....

epoch: 9/50.... training_step: 23100.... batch_loss: 0.053131.... 0.076799 sec/batch
epoch: 9/50.... training_step: 23150.... batch_loss: 0.059367.... 0.074184 sec/batch
epoch: 9/50.... training_step: 23200.... batch_loss: 0.063061.... 0.073602 sec/batch
epoch: 9/50.... training_step: 23250.... batch_loss: 0.054974.... 0.075846 sec/batch
epoch: 9/50.... training_step: 23300.... batch_loss: 0.057833.... 0.075423 sec/batch
epoch: 9/50.... training_step: 23350.... batch_loss: 0.053991.... 0.077295 sec/batch
epoch: 9/50.... training_step: 23400.... batch_loss: 0.057663.... 0.078708 sec/batch
epoch: 9/50.... training_step: 23450.... batch_loss: 0.058089.... 0.074901 sec/batch
epoch: 9/50.... training_step: 23500.... batch_loss: 0.057647.... 0.075113 sec/batch
epoch: 9/50.... training_step: 23550.... batch_loss: 0.051856.... 0.073139 sec/batch
epoch: 10/50.... training_step: 23600.... batch_loss: 0.056726.... 0.076422 sec/batch
epoch: 10/50.... training_step: 23650.... batch_loss: 0.052559..

epoch: 11/50.... training_step: 27900.... batch_loss: 0.053072.... 0.076343 sec/batch
epoch: 11/50.... training_step: 27950.... batch_loss: 0.052832.... 0.073531 sec/batch
epoch: 11/50.... training_step: 28000.... batch_loss: 0.051780.... 0.073854 sec/batch
epoch: 11/50.... training_step: 28050.... batch_loss: 0.056813.... 0.074475 sec/batch
epoch: 11/50.... training_step: 28100.... batch_loss: 0.054009.... 0.074986 sec/batch
epoch: 11/50.... training_step: 28150.... batch_loss: 0.054478.... 0.076021 sec/batch
epoch: 11/50.... training_step: 28200.... batch_loss: 0.052585.... 0.077026 sec/batch
epoch: 11/50.... training_step: 28250.... batch_loss: 0.052516.... 0.078745 sec/batch
epoch: 12/50.... training_step: 28300.... batch_loss: 0.054446.... 0.076972 sec/batch
epoch: 12/50.... training_step: 28350.... batch_loss: 0.056771.... 0.076355 sec/batch
epoch: 12/50.... training_step: 28400.... batch_loss: 0.058541.... 0.077018 sec/batch
epoch: 12/50.... training_step: 28450.... batch_loss: 

epoch: 13/50.... training_step: 32700.... batch_loss: 0.055825.... 0.075843 sec/batch
epoch: 13/50.... training_step: 32750.... batch_loss: 0.056830.... 0.075541 sec/batch
epoch: 13/50.... training_step: 32800.... batch_loss: 0.051382.... 0.073945 sec/batch
epoch: 13/50.... training_step: 32850.... batch_loss: 0.051508.... 0.076325 sec/batch
epoch: 13/50.... training_step: 32900.... batch_loss: 0.051778.... 0.075202 sec/batch
epoch: 13/50.... training_step: 32950.... batch_loss: 0.051079.... 0.076000 sec/batch
epoch: 14/50.... training_step: 33000.... batch_loss: 0.052981.... 0.075644 sec/batch
epoch: 14/50.... training_step: 33050.... batch_loss: 0.051944.... 0.076714 sec/batch
epoch: 14/50.... training_step: 33100.... batch_loss: 0.045841.... 0.076217 sec/batch
epoch: 14/50.... training_step: 33150.... batch_loss: 0.051449.... 0.074241 sec/batch
epoch: 14/50.... training_step: 33200.... batch_loss: 0.054431.... 0.073940 sec/batch
epoch: 14/50.... training_step: 33250.... batch_loss: 

epoch: 15/50.... training_step: 37500.... batch_loss: 0.045695.... 0.075579 sec/batch
epoch: 15/50.... training_step: 37550.... batch_loss: 0.050769.... 0.073716 sec/batch
epoch: 15/50.... training_step: 37600.... batch_loss: 0.054598.... 0.076545 sec/batch
epoch: 15/50.... training_step: 37650.... batch_loss: 0.045414.... 0.075801 sec/batch
epoch: 16/50.... training_step: 37700.... batch_loss: 0.047895.... 0.076913 sec/batch
epoch: 16/50.... training_step: 37750.... batch_loss: 0.050188.... 0.078056 sec/batch
epoch: 16/50.... training_step: 37800.... batch_loss: 0.046622.... 0.074990 sec/batch
epoch: 16/50.... training_step: 37850.... batch_loss: 0.047774.... 0.077009 sec/batch
epoch: 16/50.... training_step: 37900.... batch_loss: 0.053543.... 0.075295 sec/batch
epoch: 16/50.... training_step: 37950.... batch_loss: 0.053468.... 0.078966 sec/batch
epoch: 16/50.... training_step: 38000.... batch_loss: 0.053995.... 0.079854 sec/batch
epoch: 16/50.... training_step: 38050.... batch_loss: 

epoch: 17/50.... training_step: 42300.... batch_loss: 0.052972.... 0.074223 sec/batch
epoch: 17/50.... training_step: 42350.... batch_loss: 0.044324.... 0.073476 sec/batch
epoch: 17/50.... training_step: 42400.... batch_loss: 0.048669.... 0.073989 sec/batch
epoch: 18/50.... training_step: 42450.... batch_loss: 0.050336.... 0.075603 sec/batch
epoch: 18/50.... training_step: 42500.... batch_loss: 0.047560.... 0.075120 sec/batch
epoch: 18/50.... training_step: 42550.... batch_loss: 0.044757.... 0.075173 sec/batch
epoch: 18/50.... training_step: 42600.... batch_loss: 0.049029.... 0.075952 sec/batch
epoch: 18/50.... training_step: 42650.... batch_loss: 0.053826.... 0.074615 sec/batch
epoch: 18/50.... training_step: 42700.... batch_loss: 0.050320.... 0.074536 sec/batch
epoch: 18/50.... training_step: 42750.... batch_loss: 0.044905.... 0.072947 sec/batch
epoch: 18/50.... training_step: 42800.... batch_loss: 0.046445.... 0.073407 sec/batch
epoch: 18/50.... training_step: 42850.... batch_loss: 

epoch: 19/50.... training_step: 47100.... batch_loss: 0.046311.... 0.074548 sec/batch
epoch: 20/50.... training_step: 47150.... batch_loss: 0.046820.... 0.074199 sec/batch
epoch: 20/50.... training_step: 47200.... batch_loss: 0.043049.... 0.080657 sec/batch
epoch: 20/50.... training_step: 47250.... batch_loss: 0.044619.... 0.077947 sec/batch
epoch: 20/50.... training_step: 47300.... batch_loss: 0.044932.... 0.075123 sec/batch
epoch: 20/50.... training_step: 47350.... batch_loss: 0.046284.... 0.074120 sec/batch
epoch: 20/50.... training_step: 47400.... batch_loss: 0.042424.... 0.075172 sec/batch
epoch: 20/50.... training_step: 47450.... batch_loss: 0.048102.... 0.076579 sec/batch
epoch: 20/50.... training_step: 47500.... batch_loss: 0.050768.... 0.075687 sec/batch
epoch: 20/50.... training_step: 47550.... batch_loss: 0.037825.... 0.074509 sec/batch
epoch: 20/50.... training_step: 47600.... batch_loss: 0.040560.... 0.075793 sec/batch
epoch: 20/50.... training_step: 47650.... batch_loss: 

epoch: 22/50.... training_step: 51900.... batch_loss: 0.047480.... 0.073844 sec/batch
epoch: 22/50.... training_step: 51950.... batch_loss: 0.041990.... 0.073774 sec/batch
epoch: 22/50.... training_step: 52000.... batch_loss: 0.041524.... 0.073590 sec/batch
epoch: 22/50.... training_step: 52050.... batch_loss: 0.039153.... 0.076275 sec/batch
epoch: 22/50.... training_step: 52100.... batch_loss: 0.044066.... 0.073445 sec/batch
epoch: 22/50.... training_step: 52150.... batch_loss: 0.048326.... 0.076808 sec/batch
epoch: 22/50.... training_step: 52200.... batch_loss: 0.047654.... 0.075033 sec/batch
epoch: 22/50.... training_step: 52250.... batch_loss: 0.045375.... 0.072916 sec/batch
epoch: 22/50.... training_step: 52300.... batch_loss: 0.044810.... 0.074933 sec/batch
epoch: 22/50.... training_step: 52350.... batch_loss: 0.046040.... 0.073844 sec/batch
epoch: 22/50.... training_step: 52400.... batch_loss: 0.045059.... 0.074946 sec/batch
epoch: 22/50.... training_step: 52450.... batch_loss: 

epoch: 24/50.... training_step: 56700.... batch_loss: 0.042969.... 0.074025 sec/batch
epoch: 24/50.... training_step: 56750.... batch_loss: 0.042805.... 0.073907 sec/batch
epoch: 24/50.... training_step: 56800.... batch_loss: 0.040396.... 0.078091 sec/batch
epoch: 24/50.... training_step: 56850.... batch_loss: 0.046313.... 0.076982 sec/batch
epoch: 24/50.... training_step: 56900.... batch_loss: 0.050355.... 0.076418 sec/batch
epoch: 24/50.... training_step: 56950.... batch_loss: 0.040919.... 0.076372 sec/batch
epoch: 24/50.... training_step: 57000.... batch_loss: 0.039567.... 0.077765 sec/batch
epoch: 24/50.... training_step: 57050.... batch_loss: 0.039335.... 0.076827 sec/batch
epoch: 24/50.... training_step: 57100.... batch_loss: 0.041852.... 0.077820 sec/batch
epoch: 24/50.... training_step: 57150.... batch_loss: 0.038412.... 0.075908 sec/batch
epoch: 24/50.... training_step: 57200.... batch_loss: 0.044297.... 0.074646 sec/batch
epoch: 24/50.... training_step: 57250.... batch_loss: 

epoch: 26/50.... training_step: 61500.... batch_loss: 0.041352.... 0.078123 sec/batch
epoch: 26/50.... training_step: 61550.... batch_loss: 0.040002.... 0.073663 sec/batch
epoch: 26/50.... training_step: 61600.... batch_loss: 0.037974.... 0.073525 sec/batch
epoch: 26/50.... training_step: 61650.... batch_loss: 0.039154.... 0.077580 sec/batch
epoch: 26/50.... training_step: 61700.... batch_loss: 0.038914.... 0.075408 sec/batch
epoch: 26/50.... training_step: 61750.... batch_loss: 0.043520.... 0.074280 sec/batch
epoch: 26/50.... training_step: 61800.... batch_loss: 0.040601.... 0.074919 sec/batch
epoch: 26/50.... training_step: 61850.... batch_loss: 0.043932.... 0.075939 sec/batch
epoch: 26/50.... training_step: 61900.... batch_loss: 0.042781.... 0.073416 sec/batch
epoch: 26/50.... training_step: 61950.... batch_loss: 0.034202.... 0.077289 sec/batch
epoch: 26/50.... training_step: 62000.... batch_loss: 0.038615.... 0.074847 sec/batch
epoch: 26/50.... training_step: 62050.... batch_loss: 

epoch: 28/50.... training_step: 66300.... batch_loss: 0.038856.... 0.072956 sec/batch
epoch: 28/50.... training_step: 66350.... batch_loss: 0.040030.... 0.075291 sec/batch
epoch: 28/50.... training_step: 66400.... batch_loss: 0.035273.... 0.073666 sec/batch
epoch: 28/50.... training_step: 66450.... batch_loss: 0.038812.... 0.076167 sec/batch
epoch: 28/50.... training_step: 66500.... batch_loss: 0.038048.... 0.076236 sec/batch
epoch: 28/50.... training_step: 66550.... batch_loss: 0.043109.... 0.073046 sec/batch
epoch: 28/50.... training_step: 66600.... batch_loss: 0.037902.... 0.077318 sec/batch
epoch: 28/50.... training_step: 66650.... batch_loss: 0.038110.... 0.074874 sec/batch
epoch: 28/50.... training_step: 66700.... batch_loss: 0.039547.... 0.074066 sec/batch
epoch: 28/50.... training_step: 66750.... batch_loss: 0.041623.... 0.074133 sec/batch
epoch: 28/50.... training_step: 66800.... batch_loss: 0.039041.... 0.073927 sec/batch
epoch: 28/50.... training_step: 66850.... batch_loss: 

epoch: 30/50.... training_step: 71100.... batch_loss: 0.040993.... 0.074284 sec/batch
epoch: 30/50.... training_step: 71150.... batch_loss: 0.040659.... 0.076677 sec/batch
epoch: 30/50.... training_step: 71200.... batch_loss: 0.036233.... 0.073035 sec/batch
epoch: 30/50.... training_step: 71250.... batch_loss: 0.038003.... 0.077475 sec/batch
epoch: 30/50.... training_step: 71300.... batch_loss: 0.039607.... 0.077609 sec/batch
epoch: 30/50.... training_step: 71350.... batch_loss: 0.045132.... 0.076562 sec/batch
epoch: 30/50.... training_step: 71400.... batch_loss: 0.032796.... 0.074478 sec/batch
epoch: 30/50.... training_step: 71450.... batch_loss: 0.040862.... 0.078012 sec/batch
epoch: 30/50.... training_step: 71500.... batch_loss: 0.038543.... 0.073243 sec/batch
epoch: 30/50.... training_step: 71550.... batch_loss: 0.037934.... 0.075476 sec/batch
epoch: 30/50.... training_step: 71600.... batch_loss: 0.038213.... 0.076115 sec/batch
epoch: 30/50.... training_step: 71650.... batch_loss: 

epoch: 32/50.... training_step: 75900.... batch_loss: 0.041469.... 0.073462 sec/batch
epoch: 32/50.... training_step: 75950.... batch_loss: 0.035046.... 0.074352 sec/batch
epoch: 32/50.... training_step: 76000.... batch_loss: 0.037985.... 0.074130 sec/batch
epoch: 32/50.... training_step: 76050.... batch_loss: 0.036860.... 0.072294 sec/batch
epoch: 32/50.... training_step: 76100.... batch_loss: 0.038851.... 0.075861 sec/batch
epoch: 32/50.... training_step: 76150.... batch_loss: 0.034797.... 0.074288 sec/batch
epoch: 32/50.... training_step: 76200.... batch_loss: 0.036568.... 0.073367 sec/batch
epoch: 32/50.... training_step: 76250.... batch_loss: 0.035682.... 0.077486 sec/batch
epoch: 32/50.... training_step: 76300.... batch_loss: 0.035870.... 0.076849 sec/batch
epoch: 32/50.... training_step: 76350.... batch_loss: 0.037347.... 0.074378 sec/batch
epoch: 32/50.... training_step: 76400.... batch_loss: 0.035515.... 0.073412 sec/batch
epoch: 32/50.... training_step: 76450.... batch_loss: 

epoch: 34/50.... training_step: 80700.... batch_loss: 0.036184.... 0.073645 sec/batch
epoch: 34/50.... training_step: 80750.... batch_loss: 0.036172.... 0.076340 sec/batch
epoch: 34/50.... training_step: 80800.... batch_loss: 0.038740.... 0.074037 sec/batch
epoch: 34/50.... training_step: 80850.... batch_loss: 0.034462.... 0.074844 sec/batch
epoch: 34/50.... training_step: 80900.... batch_loss: 0.038718.... 0.073836 sec/batch
epoch: 34/50.... training_step: 80950.... batch_loss: 0.033605.... 0.074105 sec/batch
epoch: 34/50.... training_step: 81000.... batch_loss: 0.034614.... 0.074298 sec/batch
epoch: 34/50.... training_step: 81050.... batch_loss: 0.036754.... 0.073842 sec/batch
epoch: 34/50.... training_step: 81100.... batch_loss: 0.033510.... 0.078079 sec/batch
epoch: 34/50.... training_step: 81150.... batch_loss: 0.032505.... 0.074210 sec/batch
epoch: 34/50.... training_step: 81200.... batch_loss: 0.035203.... 0.073123 sec/batch
epoch: 34/50.... training_step: 81250.... batch_loss: 

epoch: 36/50.... training_step: 85500.... batch_loss: 0.035663.... 0.073264 sec/batch
epoch: 36/50.... training_step: 85550.... batch_loss: 0.037608.... 0.077729 sec/batch
epoch: 36/50.... training_step: 85600.... batch_loss: 0.038852.... 0.075567 sec/batch
epoch: 36/50.... training_step: 85650.... batch_loss: 0.035339.... 0.075180 sec/batch
epoch: 36/50.... training_step: 85700.... batch_loss: 0.038523.... 0.075126 sec/batch
epoch: 36/50.... training_step: 85750.... batch_loss: 0.035338.... 0.073975 sec/batch
epoch: 36/50.... training_step: 85800.... batch_loss: 0.036785.... 0.075341 sec/batch
epoch: 36/50.... training_step: 85850.... batch_loss: 0.034818.... 0.074316 sec/batch
epoch: 36/50.... training_step: 85900.... batch_loss: 0.038334.... 0.073119 sec/batch
epoch: 36/50.... training_step: 85950.... batch_loss: 0.036922.... 0.076256 sec/batch
epoch: 36/50.... training_step: 86000.... batch_loss: 0.038624.... 0.073849 sec/batch
epoch: 36/50.... training_step: 86050.... batch_loss: 

epoch: 38/50.... training_step: 90300.... batch_loss: 0.038331.... 0.076056 sec/batch
epoch: 38/50.... training_step: 90350.... batch_loss: 0.038111.... 0.073110 sec/batch
epoch: 38/50.... training_step: 90400.... batch_loss: 0.034419.... 0.073435 sec/batch
epoch: 38/50.... training_step: 90450.... batch_loss: 0.033007.... 0.074487 sec/batch
epoch: 38/50.... training_step: 90500.... batch_loss: 0.036558.... 0.073136 sec/batch
epoch: 38/50.... training_step: 90550.... batch_loss: 0.033719.... 0.073368 sec/batch
epoch: 38/50.... training_step: 90600.... batch_loss: 0.034114.... 0.075236 sec/batch
epoch: 38/50.... training_step: 90650.... batch_loss: 0.038744.... 0.078050 sec/batch
epoch: 38/50.... training_step: 90700.... batch_loss: 0.035664.... 0.075488 sec/batch
epoch: 38/50.... training_step: 90750.... batch_loss: 0.037458.... 0.073134 sec/batch
epoch: 38/50.... training_step: 90800.... batch_loss: 0.030836.... 0.073328 sec/batch
epoch: 38/50.... training_step: 90850.... batch_loss: 

epoch: 40/50.... training_step: 95100.... batch_loss: 0.034551.... 0.073378 sec/batch
epoch: 40/50.... training_step: 95150.... batch_loss: 0.035356.... 0.072033 sec/batch
epoch: 40/50.... training_step: 95200.... batch_loss: 0.033269.... 0.072800 sec/batch
epoch: 40/50.... training_step: 95250.... batch_loss: 0.032442.... 0.072550 sec/batch
epoch: 40/50.... training_step: 95300.... batch_loss: 0.034458.... 0.074020 sec/batch
epoch: 40/50.... training_step: 95350.... batch_loss: 0.033402.... 0.073351 sec/batch
epoch: 40/50.... training_step: 95400.... batch_loss: 0.038175.... 0.074482 sec/batch
epoch: 40/50.... training_step: 95450.... batch_loss: 0.039199.... 0.074046 sec/batch
epoch: 40/50.... training_step: 95500.... batch_loss: 0.033626.... 0.073413 sec/batch
epoch: 40/50.... training_step: 95550.... batch_loss: 0.036615.... 0.073663 sec/batch
epoch: 40/50.... training_step: 95600.... batch_loss: 0.032166.... 0.075767 sec/batch
epoch: 40/50.... training_step: 95650.... batch_loss: 

epoch: 42/50.... training_step: 99900.... batch_loss: 0.030858.... 0.076844 sec/batch
epoch: 42/50.... training_step: 99950.... batch_loss: 0.031823.... 0.074184 sec/batch
epoch: 42/50.... training_step: 100000.... batch_loss: 0.034319.... 0.074231 sec/batch
epoch: 42/50.... training_step: 100050.... batch_loss: 0.034435.... 0.073301 sec/batch
epoch: 42/50.... training_step: 100100.... batch_loss: 0.032176.... 0.073374 sec/batch
epoch: 42/50.... training_step: 100150.... batch_loss: 0.031230.... 0.074684 sec/batch
epoch: 42/50.... training_step: 100200.... batch_loss: 0.034777.... 0.073848 sec/batch
epoch: 42/50.... training_step: 100250.... batch_loss: 0.034075.... 0.074507 sec/batch
epoch: 42/50.... training_step: 100300.... batch_loss: 0.036852.... 0.074157 sec/batch
epoch: 42/50.... training_step: 100350.... batch_loss: 0.035040.... 0.075316 sec/batch
epoch: 42/50.... training_step: 100400.... batch_loss: 0.031183.... 0.076047 sec/batch
epoch: 42/50.... training_step: 100450.... ba

epoch: 44/50.... training_step: 104650.... batch_loss: 0.031787.... 0.073763 sec/batch
epoch: 44/50.... training_step: 104700.... batch_loss: 0.036475.... 0.073618 sec/batch
epoch: 44/50.... training_step: 104750.... batch_loss: 0.036205.... 0.077019 sec/batch
epoch: 44/50.... training_step: 104800.... batch_loss: 0.033372.... 0.076663 sec/batch
epoch: 44/50.... training_step: 104850.... batch_loss: 0.032681.... 0.075496 sec/batch
epoch: 44/50.... training_step: 104900.... batch_loss: 0.031614.... 0.076243 sec/batch
epoch: 44/50.... training_step: 104950.... batch_loss: 0.035477.... 0.075949 sec/batch
epoch: 44/50.... training_step: 105000.... batch_loss: 0.031934.... 0.072458 sec/batch
epoch: 44/50.... training_step: 105050.... batch_loss: 0.033461.... 0.076384 sec/batch
epoch: 44/50.... training_step: 105100.... batch_loss: 0.033401.... 0.073419 sec/batch
epoch: 44/50.... training_step: 105150.... batch_loss: 0.033113.... 0.072737 sec/batch
epoch: 44/50.... training_step: 105200.... 

epoch: 46/50.... training_step: 109400.... batch_loss: 0.033143.... 0.073863 sec/batch
epoch: 46/50.... training_step: 109450.... batch_loss: 0.033097.... 0.076716 sec/batch
epoch: 46/50.... training_step: 109500.... batch_loss: 0.034233.... 0.073910 sec/batch
epoch: 46/50.... training_step: 109550.... batch_loss: 0.034678.... 0.073384 sec/batch
epoch: 46/50.... training_step: 109600.... batch_loss: 0.030959.... 0.074505 sec/batch
epoch: 46/50.... training_step: 109650.... batch_loss: 0.031263.... 0.074042 sec/batch
epoch: 46/50.... training_step: 109700.... batch_loss: 0.031879.... 0.073388 sec/batch
epoch: 46/50.... training_step: 109750.... batch_loss: 0.035813.... 0.072475 sec/batch
epoch: 46/50.... training_step: 109800.... batch_loss: 0.031644.... 0.074573 sec/batch
epoch: 46/50.... training_step: 109850.... batch_loss: 0.030565.... 0.075308 sec/batch
epoch: 46/50.... training_step: 109900.... batch_loss: 0.032229.... 0.074650 sec/batch
epoch: 46/50.... training_step: 109950.... 

epoch: 48/50.... training_step: 114150.... batch_loss: 0.028990.... 0.074293 sec/batch
epoch: 48/50.... training_step: 114200.... batch_loss: 0.034036.... 0.074651 sec/batch
epoch: 48/50.... training_step: 114250.... batch_loss: 0.038342.... 0.076594 sec/batch
epoch: 48/50.... training_step: 114300.... batch_loss: 0.031681.... 0.073259 sec/batch
epoch: 48/50.... training_step: 114350.... batch_loss: 0.030604.... 0.076394 sec/batch
epoch: 48/50.... training_step: 114400.... batch_loss: 0.031666.... 0.072976 sec/batch
epoch: 48/50.... training_step: 114450.... batch_loss: 0.031449.... 0.074180 sec/batch
epoch: 48/50.... training_step: 114500.... batch_loss: 0.036129.... 0.073719 sec/batch
epoch: 48/50.... training_step: 114550.... batch_loss: 0.033495.... 0.077097 sec/batch
epoch: 48/50.... training_step: 114600.... batch_loss: 0.035558.... 0.074656 sec/batch
epoch: 48/50.... training_step: 114650.... batch_loss: 0.031580.... 0.074678 sec/batch
epoch: 48/50.... training_step: 114700.... 

In [33]:
#returning predictions 

int_to_vocab = dict(enumerate(character_set))

def pick_top_n(preds, vocab_size, top_n=3):
    
    p = np.squeeze(preds)
    
    p[np.argsort(p)[:-top_n]] = 0
    
    p = p / np.sum(p)
    
    c = np.random.choice(vocab_size, 1, p=p)[0]
    
    return c

In [20]:
#Sampling Code

def sample(checkpoint, n_samples, lstmsize, vocab_size, prime = 'The'):
    
    samples = [c for c in prime]
    
    model_sample = build_arvix(len(character_set), lstmsize = lstmsize, sampling = True)
    
    saver = tf.train.Saver()
    
    
    with tf.Session() as sess:
        
        saver.restore(sess, checkpoint)
        
        feed_state = sess.run(model_sample.initial_state)
        
        
        for c in prime:
            
            x = np.zeros((1, 1))
            
            x[0, 0] = dict_int[c]
            
            
            feed = {model_sample.inputs : x,
                   model_sample.keep_prob : 1,
                   model_sample.initial_state : feed_state}
            
            
            preds, feed_state = sess.run([model_sample.predictions, model_sample.final_state], 
                                         feed_dict=feed)
            
            c = pick_top_n(preds, len(character_set))
            
            samples.append(int_to_vocab[c])
            
            
        for i in range(n_samples):
                
            x[0, 0] =  c
                
            feed = {model_sample.inputs: x, model_sample.keep_prob: 1., model_sample.initial_state: feed_state}
            
            preds, feed_state = sess.run([model_sample.predictions, model_sample.final_state], feed_dict=feed)
            
            c = pick_top_n(preds, len(character_set))
            
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)
            

In [40]:
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp = sample(checkpoint, 500, lstmsize, len(character_set), prime="Convolutional")
print(samp)

INFO:tensorflow:Restoring parameters from checkpoints/i117800_l780_h1.ckpt
ConvolutionalIDvolutional Neural Networks (CNNs). This work seeks to explore a pats of a new formalization to depth an autoencoder with lateral shortcut connections from the encoder to decoder at each level of the hierarchy. The lateral shortcut connections allow the higher levels of the hierarchy to focus on abstrect invariant features. We then experiments leveraging recent methods for dealing with overfitting in neural networks as well as other tricks from the neural networkss with layers representing iterations.
Many 
