In [1]:
import numpy as np
import tensorflow as tf
import time
from collections import namedtuple
import os

In [2]:
#Extracting features from text

with open('cano.txt', 'r') as f:
    book = f.read()

create_set = sorted(set(book))

dict_int = {word: inte for inte, word in enumerate(create_set)}


#create array of entire book
encoded_book = np.array([dict_int[word] for word in book], dtype = np.int32)

len(encoded_book)

3864202

In [3]:
len(create_set)

print(book[:100])

'\n' '\n'

print(encoded_book[:100])


          CHAPTER I
          Mr. Sherlock Holmes


     In the year 1878 I took my degree of Docto
[ 0  1  1  1  1  1  1  1  1  1  1 28 33 26 41 45 30 43  1 34  0  1  1  1
  1  1  1  1  1  1  1 38 72 11  1 44 62 59 72 66 69 57 65  1 33 69 66 67
 59 73  0  0  0  1  1  1  1  1 34 68  1 74 62 59  1 79 59 55 72  1 14 21
 20 21  1 34  1 74 69 69 65  1 67 79  1 58 59 61 72 59 59  1 69 60  1 29
 69 57 74 69]


In [4]:
#function for gettin batches

def get_batches(arr, batch_size, num_steps):

    character_per_batch = batch_size * num_steps

    num_batches = len(arr)//character_per_batch


    #keep only enough to make full batches

    arr = arr[: (character_per_batch * num_batches)]

    #reshape into batch_size

    arr = arr.reshape(batch_size, -1)


    #split into x & y

    for step in range(0, arr.shape[1], num_steps):

        x = arr[:, step : step + num_steps]

        y_temp = arr[:, (step + 1): (step+1) + num_steps]
    
        y = np.zeros(x.shape, dtype = x.dtype)
    
        y[:, :y_temp.shape[1]] = y_temp
    
        yield x, y
        



In [5]:
batches = get_batches(encoded_book, 10, 50)
x, y = next(batches)

print(x[:12, :12])
print('\n',y[:12, :12])

[[ 0  1  1  1  1  1  1  1  1  1  1 28]
 [74 73 69 68 11  1 48 59  1 73 62 55]
 [69 73 74  1 57 69 67 59  1 74 69  1]
 [ 9  1 55 68 58  1 62 69 77  1 62 59]
 [63 68  1 74 69 10 67 69 72 72 69 77]
 [72 63 68 61  1 63 68  1 74 62 59  1]
 [58  1 69 60  1 55  1 67 55 72 72 63]
 [60 63 57 63 55 66  1 59 78 70 69 68]
 [59 55 72 73 11  0  0  1  1  1  1  1]
 [ 1 56 59  1 74 72 75 73 74 59 58 11]]

 [[ 1  1  1  1  1  1  1  1  1  1 28 33]
 [73 69 68 11  1 48 59  1 73 62 55 66]
 [73 74  1 57 69 67 59  1 74 69  1 56]
 [ 1 55 68 58  1 62 69 77  1 62 59  1]
 [68  1 74 69 10 67 69 72 72 69 77  9]
 [63 68 61  1 63 68  1 74 62 59  1 61]
 [ 1 69 60  1 55  1 67 55 72 72 63 59]
 [63 57 63 55 66  1 59 78 70 69 68 59]
 [55 72 73 11  0  0  1  1  1  1  1  3]
 [56 59  1 74 72 75 73 74 59 58 11  1]]


In [6]:
#input, output & keep_prob

def tensor_variables(batch_size, num_steps):

    inputs = tf.placeholder(tf.int32, [batch_size, num_steps])
    
    targets = tf.placeholder(tf.int32, [batch_size, num_steps])
    
    keep_prob = tf.placeholder(tf.float32)
    
    return inputs, targets, keep_prob


In [7]:
#Build LSTM Cell

def LSTM(lstm_size, batch_size, keep_prob, num_layers):

    #Build lstm cell
    
    def build_cell(lstm_size, keep_prob):
    
        Lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    
        drop = tf.contrib.rnn.DropoutWrapper(Lstm, output_keep_prob = keep_prob)
        
        return drop
    
    
    multi_lstm = tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size, keep_prob) for _ in range(num_layers)])
    
    initial_state = multi_lstm.zero_state(batch_size, tf.float32)
    
    return multi_lstm, initial_state

In [8]:
#calculating predictions from our network

def logits(lstm_output, lstm_size, outclasses_size ):


    lstm_batch_list = tf.concat(lstm_output, axis =1)
    
    lstm_output = tf.reshape(lstm_batch_list, [-1, lstm_size])
    
    
    with tf.variable_scope ('softmax'):
        
        softmax_w = tf.get_variable('softmax_w', [lstm_size, outclasses_size], initializer = tf.contrib.layers.xavier_initializer(seed =1))
        
        softmax_b = tf.get_variable('softmax_b', [outclasses_size], initializer = tf.zeros_initializer())
        
    
    logits = tf.add(tf.matmul(lstm_output, softmax_w), softmax_b)
    
    
    predictions = tf.nn.softmax(logits)
    
    return logits, predictions


In [9]:
#training loss

def loss_hot(logits, targets, num_classes):
    
    
    y_one_hot = tf.one_hot(targets, num_classes)
    
    y = tf.reshape(y_one_hot, logits.get_shape())
    
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = y))
    
    return loss

In [10]:
def build_optimizer(learning_rate, grad_clip, loss):


    tvars = tf.trainable_variables()
    
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    
    train_op = tf.train.AdamOptimizer(learning_rate)
    
    optimizer = train_op.apply_gradients(zip(grads, tvars))

    return optimizer
    

In [11]:
# TRAINING PROCESS

class SherlockAI:
    
    def __init__(self, num_classes, lstmsize = 128, learning_rate = 0.01, 
                 batch_size = 64, num_steps = 50, num_layers = 2, 
                  sampling = False, grad_clip=5):
    
    
        if sampling == True:
        
            batch_size, num_steps = 1, 1
            
        else:
            
            batch_size, num_steps = batch_size, num_steps
            
        tf.reset_default_graph()
        
        #Build the input placeholders
        self.inputs, self.targets, self.keep_prob = tensor_variables(batch_size, num_steps)
        
        #Build LSTM Cell architecture                                                             
        lstm, self.initial_state =  LSTM(lstmsize, batch_size, keep_prob, num_layers)
        
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        #Run inputs through LSTM Cell
        
        outputs, final_state = tf.nn.dynamic_rnn(lstm, x_one_hot, initial_state = self.initial_state)
        
        self.final_state = final_state
        
        #Predictions using lstm run
        
        self.logits, self.predictions =  logits(outputs, lstmsize, num_classes)
        
        #loss function & optimizer
        
        self.loss = loss_hot(self.logits, self.targets, num_classes)
        self.optimizer = build_optimizer(learning_rate, grad_clip, self.loss)
        


In [12]:
batch_size = 32         # Sequences per batch
num_steps = 50          # Number of sequence steps per batch
lstmsize = 540       # Size of hidden layers in LSTMs
num_layers = 2          # Number of LSTM layers
learning_rate = 0.001    # Learning rate
keep_prob = 0.5        # Dropout keep probability

In [15]:
#Training phase

#path_to_saved_model = './'
print_every_n = 50
epochs = 30
save_every_steps = 500

model = SherlockAI(len(create_set), lstmsize = lstmsize, learning_rate = learning_rate, 
                 batch_size = batch_size, num_steps = num_steps, num_layers = num_layers)

counter= 0

saver = tf.train.Saver(max_to_keep = 100)
initialized = tf.global_variables_initializer()

with  tf.Session() as sess:
    
    #savede = tf.train.Saver()
    #saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
    
    graph = tf.get_default_graph()
    
    #restored = restore_vars(saver, sess, path_to_saved_model)
    
    
    sess.run(initialized)
    
    
    for i in range(epochs):
        total_parameters = 0
        for variable in tf.trainable_variables():
    # shape is an array of tf.Dimension
            shape = variable.get_shape()
            #print(shape)
            #print(len(shape))
            variable_parameters = 1
            for dim in shape:
                #print(dim)
                variable_parameters *= dim.value
        
            #print(variable_parameters)
            total_parameters += variable_parameters
        print(total_parameters)
        
        feed_state = sess.run(model.initial_state)
    
        
        for x, y in get_batches(encoded_book, batch_size, num_steps):
        
            counter += 1
            start = time.time()
            
            feed = {model.inputs: x, model.targets : y, model.keep_prob: keep_prob, model.initial_state: feed_state}
            
            batch_loss, feed_state, _ = sess.run([model.loss, model.final_state, model.optimizer], feed_dict = feed)
            
            total_parameters = 0
            
            
            if (counter % print_every_n == 0):
                end = time.time()
                
                print('epochs: {}/{}..... '.format(i + 1, epochs),
                     'training_step: {}.... '.format(counter),
                      'training_loss: {:3f}....... '.format(batch_loss),
                      '{:.4f} sec/batch '.format(end-start))
                      
               
            if (counter % save_every_steps == 0):
                
                #saver.save(sess, 'checkpoints /{}___{}____{}.ckpt'.format(i +1 , counter, lstmsize))
                saver.save(sess, "checkpoints/i{}_l{}_h{}.ckpt".format(counter, lstmsize, 5))
                      
                
                      
    #saver.save(sess, 'checkpoints /{}___{}____{}//final.ckpt'.format(i + 1, counter, lstmsize))
    saver.save(sess, "checkpoints/i{}_l{}_h{}.ckpt".format(counter, lstmsize, 5))
                      
    #samp = sample(checkpoint, 1000, lstmsize, len(create_set), prime="The")
    #print(samp)        
    
    


3776321
epochs: 1/30.....  training_step: 50....  training_loss: 3.042669.......  1.8792 sec/batch 
epochs: 1/30.....  training_step: 100....  training_loss: 2.985367.......  1.8928 sec/batch 
epochs: 1/30.....  training_step: 150....  training_loss: 2.893414.......  1.8912 sec/batch 
epochs: 1/30.....  training_step: 200....  training_loss: 2.767314.......  1.8800 sec/batch 
epochs: 1/30.....  training_step: 250....  training_loss: 2.488854.......  1.9064 sec/batch 
epochs: 1/30.....  training_step: 300....  training_loss: 2.392628.......  1.8823 sec/batch 
epochs: 1/30.....  training_step: 350....  training_loss: 2.288828.......  1.8679 sec/batch 
epochs: 1/30.....  training_step: 400....  training_loss: 2.225275.......  1.8643 sec/batch 
epochs: 1/30.....  training_step: 450....  training_loss: 2.172939.......  1.8790 sec/batch 
epochs: 1/30.....  training_step: 500....  training_loss: 2.185823.......  1.9343 sec/batch 
epochs: 1/30.....  training_step: 550....  training_loss: 2.125

epochs: 2/30.....  training_step: 4450....  training_loss: 1.345247.......  1.8847 sec/batch 
epochs: 2/30.....  training_step: 4500....  training_loss: 1.447028.......  1.9120 sec/batch 
epochs: 2/30.....  training_step: 4550....  training_loss: 1.364891.......  1.8892 sec/batch 
epochs: 2/30.....  training_step: 4600....  training_loss: 1.372769.......  1.8914 sec/batch 
epochs: 2/30.....  training_step: 4650....  training_loss: 1.357194.......  1.8998 sec/batch 
epochs: 2/30.....  training_step: 4700....  training_loss: 1.338223.......  1.9403 sec/batch 
epochs: 2/30.....  training_step: 4750....  training_loss: 1.329996.......  1.9227 sec/batch 
epochs: 2/30.....  training_step: 4800....  training_loss: 1.422820.......  1.9088 sec/batch 
3776321
epochs: 3/30.....  training_step: 4850....  training_loss: 1.388840.......  1.8934 sec/batch 
epochs: 3/30.....  training_step: 4900....  training_loss: 1.351478.......  1.8638 sec/batch 
epochs: 3/30.....  training_step: 4950....  training

epochs: 4/30.....  training_step: 8800....  training_loss: 1.199042.......  1.9501 sec/batch 
epochs: 4/30.....  training_step: 8850....  training_loss: 1.200441.......  1.9624 sec/batch 
epochs: 4/30.....  training_step: 8900....  training_loss: 1.258798.......  1.9619 sec/batch 
epochs: 4/30.....  training_step: 8950....  training_loss: 1.269466.......  1.9309 sec/batch 
epochs: 4/30.....  training_step: 9000....  training_loss: 1.250501.......  1.9276 sec/batch 
epochs: 4/30.....  training_step: 9050....  training_loss: 1.244651.......  1.9318 sec/batch 
epochs: 4/30.....  training_step: 9100....  training_loss: 1.276949.......  1.9416 sec/batch 
epochs: 4/30.....  training_step: 9150....  training_loss: 1.277496.......  1.9418 sec/batch 
epochs: 4/30.....  training_step: 9200....  training_loss: 1.212591.......  1.9610 sec/batch 
epochs: 4/30.....  training_step: 9250....  training_loss: 1.317528.......  1.9151 sec/batch 
epochs: 4/30.....  training_step: 9300....  training_loss: 1

epochs: 6/30.....  training_step: 13150....  training_loss: 1.197675.......  1.9989 sec/batch 
epochs: 6/30.....  training_step: 13200....  training_loss: 1.196665.......  1.9979 sec/batch 
epochs: 6/30.....  training_step: 13250....  training_loss: 1.191077.......  4.2886 sec/batch 
epochs: 6/30.....  training_step: 13300....  training_loss: 1.207798.......  1.9027 sec/batch 
epochs: 6/30.....  training_step: 13350....  training_loss: 1.201034.......  3.6785 sec/batch 
epochs: 6/30.....  training_step: 13400....  training_loss: 1.245377.......  1.9009 sec/batch 
epochs: 6/30.....  training_step: 13450....  training_loss: 1.179927.......  1.9068 sec/batch 
epochs: 6/30.....  training_step: 13500....  training_loss: 1.242732.......  3.7516 sec/batch 
epochs: 6/30.....  training_step: 13550....  training_loss: 1.206923.......  1.8749 sec/batch 
epochs: 6/30.....  training_step: 13600....  training_loss: 1.265343.......  1.8953 sec/batch 
epochs: 6/30.....  training_step: 13650....  train

epochs: 8/30.....  training_step: 17500....  training_loss: 1.211040.......  1.9204 sec/batch 
epochs: 8/30.....  training_step: 17550....  training_loss: 1.231929.......  1.9240 sec/batch 
epochs: 8/30.....  training_step: 17600....  training_loss: 1.172602.......  1.8992 sec/batch 
epochs: 8/30.....  training_step: 17650....  training_loss: 1.151591.......  1.9003 sec/batch 
epochs: 8/30.....  training_step: 17700....  training_loss: 1.215925.......  1.9092 sec/batch 
epochs: 8/30.....  training_step: 17750....  training_loss: 1.165689.......  1.9073 sec/batch 
epochs: 8/30.....  training_step: 17800....  training_loss: 1.149063.......  1.9279 sec/batch 
epochs: 8/30.....  training_step: 17850....  training_loss: 1.151096.......  1.9082 sec/batch 
epochs: 8/30.....  training_step: 17900....  training_loss: 1.181073.......  1.9126 sec/batch 
epochs: 8/30.....  training_step: 17950....  training_loss: 1.135364.......  1.9047 sec/batch 
epochs: 8/30.....  training_step: 18000....  train

epochs: 10/30.....  training_step: 21850....  training_loss: 1.142903.......  3.1759 sec/batch 
epochs: 10/30.....  training_step: 21900....  training_loss: 1.108879.......  1.8901 sec/batch 
epochs: 10/30.....  training_step: 21950....  training_loss: 1.071234.......  3.9364 sec/batch 
epochs: 10/30.....  training_step: 22000....  training_loss: 1.156960.......  4.7196 sec/batch 
epochs: 10/30.....  training_step: 22050....  training_loss: 1.097637.......  1.9415 sec/batch 
epochs: 10/30.....  training_step: 22100....  training_loss: 1.146177.......  1.8774 sec/batch 
epochs: 10/30.....  training_step: 22150....  training_loss: 1.164951.......  4.4863 sec/batch 
epochs: 10/30.....  training_step: 22200....  training_loss: 1.117164.......  1.8762 sec/batch 
epochs: 10/30.....  training_step: 22250....  training_loss: 1.142612.......  1.8857 sec/batch 
epochs: 10/30.....  training_step: 22300....  training_loss: 1.122859.......  1.9027 sec/batch 
epochs: 10/30.....  training_step: 22350

epochs: 11/30.....  training_step: 26150....  training_loss: 1.199814.......  1.9717 sec/batch 
epochs: 11/30.....  training_step: 26200....  training_loss: 1.081126.......  1.9362 sec/batch 
epochs: 11/30.....  training_step: 26250....  training_loss: 1.167375.......  2.9023 sec/batch 
epochs: 11/30.....  training_step: 26300....  training_loss: 1.139787.......  2.3497 sec/batch 
epochs: 11/30.....  training_step: 26350....  training_loss: 1.161714.......  1.9347 sec/batch 
epochs: 11/30.....  training_step: 26400....  training_loss: 1.110473.......  1.9486 sec/batch 
epochs: 11/30.....  training_step: 26450....  training_loss: 1.054537.......  1.9379 sec/batch 
epochs: 11/30.....  training_step: 26500....  training_loss: 1.063625.......  2.0157 sec/batch 
epochs: 11/30.....  training_step: 26550....  training_loss: 1.133103.......  1.9391 sec/batch 
3776321
epochs: 12/30.....  training_step: 26600....  training_loss: 1.119194.......  1.9281 sec/batch 
epochs: 12/30.....  training_ste

epochs: 13/30.....  training_step: 30450....  training_loss: 1.149352.......  1.8907 sec/batch 
epochs: 13/30.....  training_step: 30500....  training_loss: 1.029563.......  3.9701 sec/batch 
epochs: 13/30.....  training_step: 30550....  training_loss: 1.108748.......  1.8841 sec/batch 
epochs: 13/30.....  training_step: 30600....  training_loss: 1.053076.......  4.7656 sec/batch 
epochs: 13/30.....  training_step: 30650....  training_loss: 1.182275.......  1.9206 sec/batch 
epochs: 13/30.....  training_step: 30700....  training_loss: 1.121085.......  1.9793 sec/batch 
epochs: 13/30.....  training_step: 30750....  training_loss: 1.142816.......  1.8821 sec/batch 
epochs: 13/30.....  training_step: 30800....  training_loss: 1.136583.......  1.8872 sec/batch 
epochs: 13/30.....  training_step: 30850....  training_loss: 1.134735.......  1.9016 sec/batch 
epochs: 13/30.....  training_step: 30900....  training_loss: 1.040102.......  1.8788 sec/batch 
epochs: 13/30.....  training_step: 30950

epochs: 15/30.....  training_step: 34750....  training_loss: 1.064943.......  1.9793 sec/batch 
epochs: 15/30.....  training_step: 34800....  training_loss: 1.075418.......  1.9913 sec/batch 
epochs: 15/30.....  training_step: 34850....  training_loss: 1.074800.......  1.9820 sec/batch 
epochs: 15/30.....  training_step: 34900....  training_loss: 1.130775.......  1.9965 sec/batch 
epochs: 15/30.....  training_step: 34950....  training_loss: 1.081770.......  2.0091 sec/batch 
epochs: 15/30.....  training_step: 35000....  training_loss: 1.084975.......  2.2063 sec/batch 
epochs: 15/30.....  training_step: 35050....  training_loss: 1.113509.......  2.0077 sec/batch 
epochs: 15/30.....  training_step: 35100....  training_loss: 1.051835.......  2.0218 sec/batch 
epochs: 15/30.....  training_step: 35150....  training_loss: 1.188031.......  2.0150 sec/batch 
epochs: 15/30.....  training_step: 35200....  training_loss: 1.089883.......  1.9822 sec/batch 
epochs: 15/30.....  training_step: 35250

epochs: 17/30.....  training_step: 39050....  training_loss: 1.110855.......  1.9374 sec/batch 
epochs: 17/30.....  training_step: 39100....  training_loss: 1.131273.......  1.9313 sec/batch 
epochs: 17/30.....  training_step: 39150....  training_loss: 1.110230.......  2.1413 sec/batch 
epochs: 17/30.....  training_step: 39200....  training_loss: 1.108661.......  2.4998 sec/batch 
epochs: 17/30.....  training_step: 39250....  training_loss: 1.079216.......  2.5588 sec/batch 
epochs: 17/30.....  training_step: 39300....  training_loss: 1.046615.......  1.9641 sec/batch 
epochs: 17/30.....  training_step: 39350....  training_loss: 1.073663.......  2.2858 sec/batch 
epochs: 17/30.....  training_step: 39400....  training_loss: 1.057717.......  2.5007 sec/batch 
epochs: 17/30.....  training_step: 39450....  training_loss: 1.086323.......  2.3885 sec/batch 
epochs: 17/30.....  training_step: 39500....  training_loss: 1.083881.......  2.3553 sec/batch 
epochs: 17/30.....  training_step: 39550

KeyboardInterrupt: 

In [None]:
tf.train.get_checkpoint_state('checkpoints')

In [None]:
def pick_top_n(preds, vocab_size, top_n=2):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [None]:
int_to_vocab = dict(enumerate(create_set))

In [None]:
def sample(checkpoint, n_samples, lstmsize, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = SherlockAI(len(create_set), lstmsize=lstmsize, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        feed_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0,0] = dict_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: feed_state}
            preds, feed_state = sess.run([model.predictions, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(create_set))
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: feed_state}
            preds, feed_state = sess.run([model.predictions, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(create_set))
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [None]:
tf.train.latest_checkpoint('checkpoints')

In [None]:
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp = sample(checkpoint, 500, lstmsize, len(create_set), prime="Sherlock")
print(samp)