# 01 - Introducing the tools

We want to outsource some of the functions from the previous notebook to external modules. The train methods for example can be reused, as it is the same for every model

At the moment it looks like this:

```python
def train(self, train_data, train_labels, alphabet, epochs=20, batch_size=128):
        train_losses = []
        train_accs = []
        
        self.session = tf.Session()
        session = self.session
        
        with session.as_default():
            session.run(tf.global_variables_initializer())
            tr_loss, tr_acc = session.run([self.loss, self.accuracy],
                                          feed_dict={self.X: train_data,
                                                     self.Y: train_labels})
            train_losses.append(tr_loss)
            train_accs.append(tr_acc)
            
            for epoch in range(epochs):
                
                if(epoch + 1) % 1 == 0:
                    print(f"\n\nEpoch {epoch + 1}/{epochs}")
                    print(f"Loss:    \t {tr_loss}")
                    print(f"Accuracy:\t {tr_acc}")
                
                for batch_ixs in batch_data(len(train_data), batch_size):
                    _ = session.run(self.train_step,
                                   feed_dict={
                                       self.X: train_data[batch_ixs],
                                       self.Y: train_labels[batch_ixs],
                                   })
                tr_loss, tr_acc = session.run([self.loss, self.accuracy],
                                               feed_dict={self.X: train_data,
                                                          self.Y: train_labels
                                                         })
                train_losses.append(tr_loss)
                train_accs.append(tr_acc)
                
                #get on of training set as seed
                seed = train_data[:1:]
        
                #to print the seed 40 characters
                seed_chars = ''
                for each in seed[0]:
                    seed_chars += alphabet._keys[np.where(each == max(each))[0][0]]
                print ("Seed:" + seed_chars)
        
                #predict next 500 characters
                for i in range(500):
                    if i > 0:
                        remove_fist_char = seed[:,1:,:]
                        seed = np.append(remove_fist_char, np.reshape(probabilities, [1, 1, self.vocab_size]), axis=1)
                        
                    predicted = session.run([self.final_output], feed_dict = {self.X:seed})
                    predicted = np.asarray(predicted[0]).astype('float64')[0]
                    probabilities = sample(predicted)
                    predicted_chars = alphabet._keys[np.argmax(probabilities)]
                    seed_chars += predicted_chars
                print ('Result:'+ seed_chars)
        
        self.hist = {
            'train_losses': np.array(train_losses),
            'train_accuracy': np.array(train_accs)
        }
```

## Trainable Object
The Trainable contains all the attributes needed for training. All our implemeted models from now on should contain these attributes

We can now generalize our train function as seen in our module ***tools.architectures***

In [1]:
import tensorflow as tf

class Trainable:
    def __init__(self, name):
        self.name = name
        self.weights = []
        self.biases = []
        
        self.X = None
        self.Y = None
        
        self.final_output = None
        self.loss = None
        self.accuracy = None
        self.train_step = None

## Sampling function
We need a proper sampling function that:
1. feeds a seed into the model
2. translate output of model into readable text

At the moment, we have

```python
#get on of training set as seed
seed = train_data[:1:]

#to print the seed 40 characters
seed_chars = ''
for each in seed[0]:
    seed_chars += alphabet._keys[np.where(each == max(each))[0][0]]
    print ("Seed:" + seed_chars)
    
#predict next 500 characters
for i in range(500):
    if i > 0:
        remove_fist_char = seed[:,1:,:]
        seed = np.append(remove_fist_char, np.reshape(probabilities, [1, 1, self.vocab_size]), axis=1)
        
    predicted = session.run([self.final_output], feed_dict = {self.X:seed})
    predicted = np.asarray(predicted[0]).astype('float64')[0]
    probabilities = sample(predicted)
    predicted_chars = alphabet._keys[np.argmax(probabilities)]
    seed_chars += predicted_chars
print ('Result:'+ seed_chars)
```

What we want:

In [2]:
def sample( seed_text, trainable, encoder, decoder, length=40 ):
    
    """ prints the sampled string
    
    seed_text: string of the seed, must have minimum length of our timestep size
    
    trainable: object model to sample from
    
    encoder: encoder object to encode the seed_text
    
    decoder: decoder object to decode the output from the trainable
    
    length: how many symbols we want to sample
    
    """
    
    seed = encoder.encode( [seed_text] )

    #to print the seed characters
    seed_chars = seed_text
    print( "------Sampling----------" )
    print( f"seed: \t{seed_text}" )
        
    #predict next symbols
    for i in range(length):
        if i > 0:
            seed = encoder.encode( [seed_chars] )
            # Take only the last required symbols
            seed = seed[:,-1*trainable.time_steps:,:]
            
            # remove_fist_char = seed[:,1:,:]
            # seed = np.append(remove_fist_char, np.reshape(probabilities, [1, 1, trainable.vocab_size]), axis=1)
            
        predicted = trainable.session.run([trainable.final_output], feed_dict = {trainable.X:seed})
        predicted = np.asarray(predicted[0]).astype('float64')[0]
        
        predicted_symbol = decoder.decode( predicted )
        seed_chars += predicted_symbol
    print ('result:'+ seed_chars)

In [3]:
class Encoder:
    def __init__(self, name):
        self.name = name
    def encode(self, seed_chars):
        pass
    
class Decoder:
    def __init__(self, name):
        self.name = name
    def decode(self, predicted):
        pass

Now whenever we want to sample during our training process, we pass a sampling function as an argument to our train method.

A possible function call could look like:
```python
encoder = Encoder("encoder")
decoder = Decoder("decoder")
sampler = lambda trainable, seed_text: sample( seed_text, trainable, encoder, decoder, length=40)

train( model, train_data, train_labels, sampler, epochs, batch_size, temperature )

```

Let's put these functions together inside the train method

In [4]:
def train(trainable, train_data, train_labels, sampler, epochs=20, batch_size=128):
    train_losses = []
    train_accs = []
    
    trainable.session = tf.Session()
    session = trainable.session
    
    with session.as_default():
        session.run(tf.global_variables_initializer())
        tr_loss, tr_acc = session.run([trainable.loss, trainable.accuracy],
                                      feed_dict={trainable.X: train_data,
                                                 trainable.Y: train_labels})
        train_losses.append(tr_loss)
        train_accs.append(tr_acc)
        
        for epoch in range(epochs):
             
            for batch_ixs in batch_data(len(train_data), batch_size):
                _ = session.run(trainable.train_step,
                               feed_dict={
                                   trainable.X: train_data[batch_ixs],
                                   trainable.Y: train_labels[batch_ixs],
                               })
            tr_loss, tr_acc = session.run([trainable.loss, trainable.accuracy],
                                           feed_dict={trainable.X: train_data,
                                                      trainable.Y: train_labels
                                                     })
            train_losses.append(tr_loss)
            train_accs.append(tr_acc)
            
            if(epoch + 1) % 1 == 0:
                print(f"\n\nEpoch {epoch + 1}/{epochs}")
                print(f"Loss:    \t {tr_loss}")
                print(f"Accuracy:\t {tr_acc}")
            
            
            #get on of training set as seed
            seed_text = "as real as it seems the american dream\nain't nothing but another calculated schemes\nto get us locked up"
            
            sampler(trainable, seed_text[:trainable.time_steps])
    
    trainable.hist = {
        'train_losses': np.array(train_losses),
        'train_accuracy': np.array(train_accs)
    }

In [5]:
class Single_Layer_LSTM_Classifier(Trainable):
    def __init__(self, name):
        super().__init__(name)
        
    def build(self, hidden_layer_size, vocab_size, time_steps, l2_reg=0.0):
        self.time_steps = time_steps
        self.vocab_size = vocab_size
        
        self.X = tf.placeholder(tf.float32, shape=[None, time_steps, vocab_size], name="data")
        self.Y = tf.placeholder(tf.int16, shape=[None, vocab_size], name="labels")
        
        _X = tf.transpose(self.X, [1, 0, 2])
        _X = tf.reshape(_X, [-1, vocab_size])
        _X = tf.split(_X, time_steps, 0)
        
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
            
            # 1x RNN LSTM Cell
            self.rnn_cell   = tf.nn.rnn_cell.LSTMCell(hidden_layer_size)
            
            self.outputs, _ = tf.contrib.rnn.static_rnn(self.rnn_cell, _X, dtype=tf.float32)
            
            # 1x linear output layer
            W_out = tf.Variable(tf.truncated_normal([hidden_layer_size, vocab_size], 
                                                 mean=0, stddev=.01))
            b_out = tf.Variable(tf.truncated_normal([vocab_size],
                                                mean=0, stddev=.01))
            self.weights.append(W_out)
            self.biases.append(b_out)
            
            self.last_rnn_output = self.outputs[-1]
            self.final_output    = self.last_rnn_output @ W_out + b_out
            
            # softmax cross entropy as our loss function (between 36 classes)
            self.softmax = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.final_output,
                                                                labels=self.Y)
            self.cross_entropy_loss = tf.reduce_mean(self.softmax)
            
            self.loss = self.cross_entropy_loss
            
            self.optimizer = tf.train.AdamOptimizer()
            self.train_step= self.optimizer.minimize(self.loss)
            
            self.correct_prediction = tf.equal(tf.argmax(self.Y,1), tf.argmax(self.final_output, 1))
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))*100
    

In [6]:
def batch_data(num_data, batch_size):
    """ Yield batches with indices until epoch is over.
    
    Parameters
    ----------
    num_data: int
        The number of samples in the dataset.
    batch_size: int
        The batch size used using training.

    Returns
    -------
    batch_ixs: np.array of ints with shape [batch_size,]
        Yields arrays of indices of size of the batch size until the epoch is over.
    """
    
    # data_ixs = np.random.permutation(np.arange(num_data))
    data_ixs = np.arange(num_data)
    ix = 0
    while ix + batch_size < num_data:
        batch_ixs = data_ixs[ix:ix+batch_size]
        ix += batch_size
        yield batch_ixs

In [7]:
import tools.processing as pre
import tools.architectures as nn
import numpy as np

In [8]:
def sample_from_distribution(predicted, temperature=0.9):
    '''
     helper function to sample an index from a probability array
     our model will output scores for each class
     we normalize those outputs and create a probability distribution out of them to sample from
    '''
    exp_predicted = np.exp(predicted/temperature)
    predicted = exp_predicted / np.sum(exp_predicted)
    probabilities = np.random.multinomial(1, predicted, 1)
    return probabilities

class OneHotEncoder(Encoder):
    """
    Encodes sequences of words to sequences of 1-Hot Encoded vectors
    """
    
    def __init__(self, name, word2index):
        super(OneHotEncoder, self).__init__(name)
        self.word2index = word2index
        
    def encode(self, sequences):
        encoded_sequences = []
        for seq in sequences:
            encoded = np.zeros( ( len(seq), len(self.word2index) ) )
            
            for idx, symbol in enumerate(seq):
                encoded[idx][ self.word2index[symbol] ] = 1
            
            encoded_sequences.append(encoded)
        
        return np.array(encoded_sequences)
    
    def encode_labels(self, labels):
        
        encoded = []
        
        for label in labels:
            one_hot_vec = np.zeros(len(self.word2index), dtype=int)
            one_hot_vec[ self.word2index[label] ] = 1
            encoded.append( one_hot_vec )
            
        return np.array(encoded)
    
class OneHotDecoder(Decoder):
    """
    Decodes a 1-Hot Encoded vector (prediction) to a word
    """
    def __init__(self, name, index2word, temperature=0.5):
        super(OneHotDecoder, self).__init__(name)
        self.temperature = temperature
        self.index2word = index2word 
        
    def decode(self, predicted):
        predicted = sample_from_distribution(predicted, temperature=self.temperature)
        return self.index2word[ np.argmax(predicted) ]

In [11]:
def create_data_label_pairs(text, time_steps, step=1):
    '''
    creates data-label pairs from the given text
    '''
    input_chars = []
    output_char = []
    
    for i in range(0, len(text) - time_steps, step):
        input_chars.append(text[i:i+time_steps])
        output_char.append(text[i+time_steps])
    return input_chars, output_char

text = pre.get_text("data/cleaned-rap-lyrics/clean2_pac_.txt")
alphabet = pre.Alphabet(text)

TIMESTEPS = 20

str_data, str_labels = create_data_label_pairs(text, TIMESTEPS)

print( list( zip(str_data, str_labels) )[:5] )

[('as real as it seems ', 't'), ('s real as it seems t', 'h'), (' real as it seems th', 'e'), ('real as it seems the', ' '), ('eal as it seems the ', 'a')]


## We can now use our encoder to encode these pairs of data and label strings

In [12]:
encoder = OneHotEncoder("1-Hot-Encoding", alphabet.word2index)
decoder = OneHotDecoder("1-Hot-Decoding", alphabet.index2word)

data = encoder.encode( str_data )
labels = encoder.encode_labels( str_labels )

In [13]:

HIDDEN_LAYER_SIZE = 128
VOCAB_SIZE = alphabet.get_size()
TIMESTEPS = 20

EPOCHS = 20
BATCH_SIZE = 128

EMBEDDING_SIZE = 10

# data, labels = alphabet.making_one_hot(text, TIMESTEPS)

rnn = Single_Layer_LSTM_Classifier(name = "basic")
rnn.build(HIDDEN_LAYER_SIZE, VOCAB_SIZE, TIMESTEPS, l2_reg=0.0)

sampler = lambda trainable, seed_text: sample( seed_text, trainable, encoder, decoder, length=100)

train( rnn, data, labels, sampler, EPOCHS, BATCH_SIZE)



Epoch 1/20
Loss:    	 2.899674415588379
Accuracy:	 17.976741790771484
------Sampling----------
seed: 	as real as it seems 
result:as real as it seems   n  r ee ttt he i f afet u m   hetnle ate e
i  t oo n ea at et lseh o h  ane t t t te eauta t ets t


Epoch 2/20
Loss:    	 2.6095852851867676
Accuracy:	 26.142301559448242
------Sampling----------
seed: 	as real as it seems 
result:as real as it seems thor yiule nhew inh tin le i the then ges i re dat toag'e ni tor  heti thet yhe the the  eu thi toe 


Epoch 3/20
Loss:    	 2.4637558460235596
Accuracy:	 28.608226776123047
------Sampling----------
seed: 	as real as it seems 
result:as real as it seems the foa ghe dhe dan then than the ther int in the thous the the set's met the fiche soe iot at the g


Epoch 4/20
Loss:    	 2.3885746002197266
Accuracy:	 30.31136703491211
------Sampling----------
seed: 	as real as it seems 
result:as real as it seems he bange than be bafd top the be sot on an an the bu pir the then oa doule the thon the

## Learnings
We now export all of these functions into a separate module called tools.architecture