# 03 - Multi Layer Network

In this notebook we try stacking multiple hidden layer inside our rnn cell.

In [1]:
import tools.processing as pre
import re

# use less text for now to avoid memory error
text = pre.get_text("data/cleaned-rap-lyrics/clean2_pac_.txt")
vocab = pre.Vocabulary(text)
# double \\n to avoid null error in tensorboard projection
text = text.replace("\n", " \\n ")
# remove extra spacing
tokens = re.sub( " +", " ", text).split(" ")[:-1]

TIMESTEPS = 10

str_data, str_labels = pre.create_data_label_pairs(tokens, TIMESTEPS)

print( list( zip(str_data, str_labels) )[:5] )

[(['as', 'real', 'as', 'it', 'seems', 'the', 'american', 'dream', '\\n', "ain't"], 'nothing'), (['real', 'as', 'it', 'seems', 'the', 'american', 'dream', '\\n', "ain't", 'nothing'], 'but'), (['as', 'it', 'seems', 'the', 'american', 'dream', '\\n', "ain't", 'nothing', 'but'], 'another'), (['it', 'seems', 'the', 'american', 'dream', '\\n', "ain't", 'nothing', 'but', 'another'], 'calculated'), (['seems', 'the', 'american', 'dream', '\\n', "ain't", 'nothing', 'but', 'another', 'calculated'], 'schemes')]


In [2]:
import tools.training as tr

encoder = tr.OneHotWordEncoder("1-Hot-Word-Encoding", vocab.word2index)
decoder = tr.OneHotWordDecoder("1-Hot-Word-Decoding", vocab.index2word, temperature=0.8)

data = encoder.encode( str_data )
labels = encoder.encode_labels( str_labels )

In [3]:
import tools.architectures as nn

class SimpleMultiLayerRNN(nn.Trainable):
    def __init__(self, name):
        super().__init__(name)

    def build(self, num_layers, hidden_layer_size, vocab_size, time_steps, l2_reg=0.0):
        self.time_steps = time_steps
        self.vocab_size = vocab_size

        self.X = tf.placeholder(tf.float32, shape=[None, time_steps, vocab_size], name="data")
        self.Y = tf.placeholder(tf.int16, shape=[None, vocab_size], name="labels")

        self.X = tf.placeholder(tf.float32, shape=[None, time_steps, vocab_size], name="data")
        
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):

            self.stacked_cells = lstm_layer(num_layers, hidden_layer_size)

            self.outputs, self.states = tf.nn.dynamic_rnn(
                    self.stacked_cells, self.X, dtype=tf.float32)
            
            self.last_rnn_output = self.states[num_layers - 1][1]

            self.final_output, W_out, b_out = full_layer(self.last_rnn_output, vocab_size)

            self.weights.append(W_out)
            self.biases.append(b_out)

            self.softmax = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.final_output,
                    labels=self.Y)
            self.cross_entropy_loss = tf.reduce_mean(self.softmax)

            self.loss = self.cross_entropy_loss

            self.optimizer = tf.train.AdamOptimizer()
            self.train_step= self.optimizer.minimize(self.loss)

            self.correct_prediction = tf.equal(tf.argmax(self.Y,1), tf.argmax(self.final_output, 1))
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))*100

In [4]:
import tools.processing as pre
import tools.architectures as nn

NUM_LAYERS = 2
HIDDEN_LAYER_SIZE = 512
VOCAB_SIZE = vocab.get_size()
TIMESTEPS = 10

EPOCHS = 20
BATCH_SIZE = 256

rnn = SimpleMultiLayerRNN(name = "multi-pac")
rnn.build(NUM_LAYERS, HIDDEN_LAYER_SIZE, VOCAB_SIZE, TIMESTEPS, l2_reg=0.0)

sampler = lambda trainable, seed_text: tr.sample( seed_text, trainable, encoder, decoder, length=20)

tr.train_model(rnn, data, labels, sampler, epochs=EPOCHS, batch_size=BATCH_SIZE)



Epoch 1/20
Loss:    	 5.8571906089782715
Accuracy:	 11.005839347839355
------Sampling----------
seed: 	as real as it seems the american dream
ain't nothing but another calculated schemes
to get us locked up
result:as real as it seems the american dream
ain't nothing but another calculated schemes
to get us locked up 
 the the in the they you 
 the 
 
 ears my boldy ain't outro 
 the wheel 



Epoch 2/20
Loss:    	 5.772158145904541
Accuracy:	 11.005839347839355
------Sampling----------
seed: 	as real as it seems the american dream
ain't nothing but another calculated schemes
to get us locked up
result:as real as it seems the american dream
ain't nothing but another calculated schemes
to get us locked up 
 and 
 motts time just shocks do while smokin' me 
 
 i'm could love of knew block needin'


Epoch 3/20
Loss:    	 5.70475959777832
Accuracy:	 11.005839347839355
------Sampling----------
seed: 	as real as it seems the american dream
ain't nothing but another calculated schemes
to get

In [50]:
decoder.temperature = 0.7

sampler = lambda trainable, seed_text: tr.sample( seed_text, trainable, encoder, decoder, length=50)
sampler(rnn, "i g")

------Sampling----------
seed: 	let us work things out with a shot 
 go take the man
result:let us work things out with a shot 
 go take the man shit hand 
 hate they they get gettin' dirty 
 they i got and so his 
 know get rockin' the shit static stole 
 come come and they windows really and 
 come 
 whiff do in this to come come 
 they come come come come come come


# Problem

How can we work out proper features from the text?

Just because a line does not match 100% with the original one that doesn't mean that it is bad

In [27]:
len(vocab._dict)

1093