In [3]:
import numpy as np
import tensorflow as tf
import time
from collections import namedtuple

In [4]:
#Extracting features from text

with open('cano.txt', 'r') as f:
    book = f.read()

create_set = sorted(set(book))

dict_int = {word: inte for inte, word in enumerate(create_set)}


#create array of entire book
encoded_book = np.array([dict_int[word] for word in book], dtype = np.int32)

len(encoded_book)

3864202

In [5]:
len(create_set)

print(book[:100])

'\n' '\n'

print(encoded_book[:100])


          CHAPTER I
          Mr. Sherlock Holmes


     In the year 1878 I took my degree of Docto
[ 0  1  1  1  1  1  1  1  1  1  1 28 33 26 41 45 30 43  1 34  0  1  1  1
  1  1  1  1  1  1  1 38 72 11  1 44 62 59 72 66 69 57 65  1 33 69 66 67
 59 73  0  0  0  1  1  1  1  1 34 68  1 74 62 59  1 79 59 55 72  1 14 21
 20 21  1 34  1 74 69 69 65  1 67 79  1 58 59 61 72 59 59  1 69 60  1 29
 69 57 74 69]


In [6]:
#function for gettin batches

def get_batches(arr, batch_size, num_steps):

    character_per_batch = batch_size * num_steps

    num_batches = len(arr)//character_per_batch


    #keep only enough to make full batches

    arr = arr[: (character_per_batch * num_batches)]

    #reshape into batch_size

    arr = arr.reshape(batch_size, -1)


    #split into x & y

    for step in range(0, arr.shape[1], num_steps):

        x = arr[:, step : step + num_steps]

        y_temp = arr[:, (step + 1): (step+1) + num_steps]
    
        y = np.zeros(x.shape, dtype = x.dtype)
    
        y[:, :y_temp.shape[1]] = y_temp
    
        yield x, y
        



In [7]:
batches = get_batches(encoded_book, 10, 50)
x, y = next(batches)

print(x[:12, :12])
print('\n',y[:12, :12])

[[ 0  1  1  1  1  1  1  1  1  1  1 28]
 [74 73 69 68 11  1 48 59  1 73 62 55]
 [69 73 74  1 57 69 67 59  1 74 69  1]
 [ 9  1 55 68 58  1 62 69 77  1 62 59]
 [63 68  1 74 69 10 67 69 72 72 69 77]
 [72 63 68 61  1 63 68  1 74 62 59  1]
 [58  1 69 60  1 55  1 67 55 72 72 63]
 [60 63 57 63 55 66  1 59 78 70 69 68]
 [59 55 72 73 11  0  0  1  1  1  1  1]
 [ 1 56 59  1 74 72 75 73 74 59 58 11]]

 [[ 1  1  1  1  1  1  1  1  1  1 28 33]
 [73 69 68 11  1 48 59  1 73 62 55 66]
 [73 74  1 57 69 67 59  1 74 69  1 56]
 [ 1 55 68 58  1 62 69 77  1 62 59  1]
 [68  1 74 69 10 67 69 72 72 69 77  9]
 [63 68 61  1 63 68  1 74 62 59  1 61]
 [ 1 69 60  1 55  1 67 55 72 72 63 59]
 [63 57 63 55 66  1 59 78 70 69 68 59]
 [55 72 73 11  0  0  1  1  1  1  1  3]
 [56 59  1 74 72 75 73 74 59 58 11  1]]


In [8]:
#input, output & keep_prob

def tensor_variables(batch_size, num_steps):

    inputs = tf.placeholder(tf.int32, [batch_size, num_steps])
    
    targets = tf.placeholder(tf.int32, [batch_size, num_steps])
    
    keep_prob = tf.placeholder(tf.float32)
    
    return inputs, targets, keep_prob


In [9]:
#Build LSTM Cell

def LSTM(lstm_size, batch_size, keep_prob, num_layers):

    #Build lstm cell
    
    def build_cell(lstm_size, keep_prob):
    
        Lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    
        drop = tf.contrib.rnn.DropoutWrapper(Lstm, output_keep_prob = keep_prob)
        
        return drop
    
    
    multi_lstm = tf.contrib.rnn.MultiRNNCell([build_cell(lstm_size, keep_prob) for _ in range(num_layers)])
    
    initial_state = multi_lstm.zero_state(batch_size, tf.float32)
    
    return multi_lstm, initial_state

In [10]:
#calculating predictions from our network

def logits(lstm_output, lstm_size, outclasses_size ):


    lstm_batch_list = tf.concat(lstm_output, axis =1)
    
    lstm_output = tf.reshape(lstm_batch_list, [-1, lstm_size])
    
    
    with tf.variable_scope ('softmax'):
        
        softmax_w = tf.get_variable('softmax_w', [lstm_size, outclasses_size], initializer = tf.contrib.layers.xavier_initializer(seed =1))
        
        softmax_b = tf.get_variable('softmax_b', [outclasses_size], initializer = tf.zeros_initializer())
        
    
    logits = tf.add(tf.matmul(lstm_output, softmax_w), softmax_b)
    
    
    predictions = tf.nn.softmax(logits)
    
    return logits, predictions


In [11]:
#training loss

def loss_hot(logits, targets, num_classes):
    
    
    y_one_hot = tf.one_hot(targets, num_classes)
    
    y = tf.reshape(y_one_hot, logits.get_shape())
    
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = y))
    
    return loss

In [12]:
def build_optimizer(learning_rate, grad_clip, loss):


    tvars = tf.trainable_variables()
    
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    
    train_op = tf.train.AdamOptimizer(learning_rate)
    
    optimizer = train_op.apply_gradients(zip(grads, tvars))

    return optimizer
    

In [13]:
# TRAINING PROCESS

class SherlockAI:
    
    def __init__(self, num_classes, lstmsize = 128, learning_rate = 0.01, 
                 batch_size = 64, num_steps = 50, num_layers = 2, 
                  sampling = False, grad_clip=5):
    
    
        if sampling == True:
        
            batch_size, num_steps = 1, 1
            
        else:
            
            batch_size, num_steps = batch_size, num_steps
            
        tf.reset_default_graph()
        
        #Build the input placeholders
        self.inputs, self.targets, self.keep_prob = tensor_variables(batch_size, num_steps)
        
        #Build LSTM Cell architecture                                                             
        lstm, self.initial_state =  LSTM(lstmsize, batch_size, keep_prob, num_layers)
        
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        #Run inputs through LSTM Cell
        
        outputs, final_state = tf.nn.dynamic_rnn(lstm, x_one_hot, initial_state = self.initial_state)
        
        self.final_state = final_state
        
        #Predictions using lstm run
        
        self.logits, self.predictions =  logits(outputs, lstmsize, num_classes)
        
        #loss function & optimizer
        
        self.loss = loss_hot(self.logits, self.targets, num_classes)
        self.optimizer = build_optimizer(learning_rate, grad_clip, self.loss)
        


In [30]:
batch_size = 32         # Sequences per batch
num_steps = 50          # Number of sequence steps per batch
lstmsize = 540       # Size of hidden layers in LSTMs
num_layers = 2          # Number of LSTM layers
learning_rate = 0.001    # Learning rate
keep_prob = 0.1        # Dropout keep probability

In [None]:
#Training phase

print_every_n = 50
epochs = 5
save_every_steps = 500

model = SherlockAI(len(create_set), lstmsize = lstmsize, learning_rate = learning_rate, 
                 batch_size = batch_size, num_steps = num_steps, num_layers = num_layers)

counter= 0

saver = tf.train.Saver(max_to_keep = 100)
initialized = tf.global_variables_initializer()

with  tf.Session() as sess:
    
    sess.run(initialized)
    
    
    for i in range(epochs):
        total_parameters = 0
        for variable in tf.trainable_variables():
    # shape is an array of tf.Dimension
            shape = variable.get_shape()
            #print(shape)
            #print(len(shape))
            variable_parameters = 1
            for dim in shape:
            #print(dim)
                variable_parameters *= dim.value
        
        #print(variable_parameters)
            total_parameters += variable_parameters
        print(total_parameters)
        
        feed_state = sess.run(model.initial_state)
    
        
        for x, y in get_batches(encoded_book, batch_size, num_steps):
        
            counter += 1
            start = time.time()
            
            feed = {model.inputs: x, model.targets : y, model.keep_prob: keep_prob, model.initial_state: feed_state}
            
            batch_loss, feed_state, _ = sess.run([model.loss, model.final_state, model.optimizer], feed_dict = feed)
            
            total_parameters = 0
            
            
            if (counter % print_every_n == 0):
                end = time.time()
                
                print('epochs: {}/{}..... '.format(i + 1, epochs),
                     'training_step: {}.... '.format(counter),
                      'training_loss: {:3f}....... '.format(batch_loss),
                      '{:.4f} sec/batch '.format(end-start))
                      
               
            if (counter % save_every_steps == 0):
                
                #saver.save(sess, 'checkpoints /{}___{}____{}.ckpt'.format(i +1 , counter, lstmsize))
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstmsize))
                      
                
                      
    #saver.save(sess, 'checkpoints /{}___{}____{}//final.ckpt'.format(i + 1, counter, lstmsize))
    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstmsize))
                      
    samp = sample(checkpoint, 1000, lstmsize, len(create_set), prime="The")
    print(samp)        
    
    


3776321
epochs: 1/5.....  training_step: 50....  training_loss: 2.970223.......  2.4896 sec/batch 
epochs: 1/5.....  training_step: 100....  training_loss: 2.887045.......  2.6625 sec/batch 
epochs: 1/5.....  training_step: 150....  training_loss: 2.761403.......  2.2754 sec/batch 
epochs: 1/5.....  training_step: 200....  training_loss: 2.693883.......  2.0575 sec/batch 
epochs: 1/5.....  training_step: 250....  training_loss: 2.299656.......  2.0402 sec/batch 
epochs: 1/5.....  training_step: 300....  training_loss: 2.223397.......  2.0334 sec/batch 
epochs: 1/5.....  training_step: 350....  training_loss: 2.124370.......  2.0560 sec/batch 
epochs: 1/5.....  training_step: 400....  training_loss: 2.135356.......  2.0914 sec/batch 
epochs: 1/5.....  training_step: 450....  training_loss: 2.151492.......  2.0522 sec/batch 
epochs: 1/5.....  training_step: 500....  training_loss: 2.026647.......  2.0251 sec/batch 
epochs: 1/5.....  training_step: 550....  training_loss: 1.941062....... 

In [None]:
tf.train.get_checkpoint_state('checkpoints')

In [1]:
def pick_top_n(preds, vocab_size, top_n=2):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [15]:
int_to_vocab = dict(enumerate(create_set))

In [2]:
def sample(checkpoint, n_samples, lstmsize, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = SherlockAI(len(create_set), lstmsize=lstmsize, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        feed_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0,0] = dict_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: feed_state}
            preds, feed_state = sess.run([model.predictions, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(create_set))
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: feed_state}
            preds, feed_state = sess.run([model.predictions, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(create_set))
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [None]:
tf.train.latest_checkpoint('checkpoints')

In [None]:
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp = sample(checkpoint, 500, lstmsize, len(create_set), prime="Sherlock")
print(samp)