# 06 - Embedded Multi Layer LSTM on our preprocessed text

Let's feed the generated data from the previous notebook into our LSTM from chapter 4!

This time we load our rap text from the **prepped** directory

In [1]:
import tools.architectures as nn
import tools.processing as pre
import tools.training as tr

text = pre.get_text("data/prepped/clean2_pac.txt")[:60000]
vocab = pre.Vocabulary(text)

text2 = pre.get_text("data/cleaned-rap-lyrics/clean2_pac_.txt")[:60000]
vocab2 = pre.Vocabulary(text2)

### The prepared text should have reduced number of words

In [2]:
print( f"vocab size in prepped text: \t{len(vocab.index2word)}")
print( f"vocab size in previous text: \t{len(vocab2.index2word)}")

vocab size in prepped text: 	1938
vocab size in previous text: 	2181


In [3]:
TIMESTEPS = 20

vocab = pre.Vocabulary(text)

tokens = text.split(" ")[:-1]

str_data, str_labels = pre.create_data_label_pairs(tokens, TIMESTEPS)

print( list( zip(str_data, str_labels) )[:5] )

[(['as', 'real', 'as', 'it', 'seems', 'the', 'american', 'dream', ';', 'is', 'not', 'nothing', 'but', 'another', 'calculated', 'schemes', ';', 'to', 'get', 'us'], 'locked'), (['real', 'as', 'it', 'seems', 'the', 'american', 'dream', ';', 'is', 'not', 'nothing', 'but', 'another', 'calculated', 'schemes', ';', 'to', 'get', 'us', 'locked'], 'up'), (['as', 'it', 'seems', 'the', 'american', 'dream', ';', 'is', 'not', 'nothing', 'but', 'another', 'calculated', 'schemes', ';', 'to', 'get', 'us', 'locked', 'up'], 'shot'), (['it', 'seems', 'the', 'american', 'dream', ';', 'is', 'not', 'nothing', 'but', 'another', 'calculated', 'schemes', ';', 'to', 'get', 'us', 'locked', 'up', 'shot'], 'up'), (['seems', 'the', 'american', 'dream', ';', 'is', 'not', 'nothing', 'but', 'another', 'calculated', 'schemes', ';', 'to', 'get', 'us', 'locked', 'up', 'shot', 'up'], 'back')]


In [4]:
encoder = tr.IndexWordEncoder("Index-Word-Encoding", vocab.word2index)
decoder = tr.OneHotWordDecoder("1-Hot-Word-Decoding", vocab.index2word, temperature=0.5)

data = encoder.encode( str_data )
labels = encoder.encode_labels( str_labels )

my_seed = " ".join(str_data[0]).replace("\\n", "\n")

del str_data, str_labels

In [5]:
HIDDEN_LAYER_SIZE = 512
VOCAB_SIZE = len(vocab.word2index)

EPOCHS = 20
BATCH_SIZE = 256

EMBEDDING_SIZE = 256

rnn = nn.EmbeddedSingleLayerRNN(name = "multi-2pac")
rnn.build(HIDDEN_LAYER_SIZE, VOCAB_SIZE, EMBEDDING_SIZE, TIMESTEPS, l2_reg=0.0)

sampler = lambda trainable, _: tr.sample( my_seed, trainable, encoder, decoder, length=50)

tr.train_model(rnn, data, labels, sampler, epochs=EPOCHS, batch_size=BATCH_SIZE, log_dir="logs/06/testing")

INFO:tensorflow:Restoring parameters from ../logs/06/testing/model
Restoring an old model from '../logs/06/testing' and training it further..


Epoch 1/20
Loss:    	 3.2836549282073975
Accuracy:	 36.18242263793945
------Sampling----------
seed: 
as real as it seems the american dream ; is not nothing but another calculated schemes ; to get us
-
result: 
as real as it seems the american dream ; is not nothing but another calculated schemes ; to get us of the car i am rocking ; i am violent on the streets is a lesson ; so i am on the cop is not an understand ; and if i am the bullet of the type of the government ; but the clock is not the police in deep


Epoch 2/20
Loss:    	 2.833693265914917
Accuracy:	 43.82361602783203
------Sampling----------
seed: 
as real as it seems the american dream ; is not nothing but another calculated schemes ; to get us
-
result: 
as real as it seems the american dream ; is not nothing but another calculated schemes ; to get us locked up shot up back in ch

In [24]:
decoder.temperature = 0.8

sampler = lambda seed_text: tr.sample( seed_text, rnn, encoder, decoder, length=50)

sampler("when i was thirteen ; i had my first love ; there was nobody that could put hands on my baby ; and nobody came between us that could ever come above")

------Sampling----------
seed: 
when i was thirteen ; i had my first love ; there was nobody that could put hands on my baby ; and nobody came between us that could ever come above
-
result: 
when i was thirteen ; i had my first love ; there was nobody that could put hands on my baby ; and nobody came between us that could ever come above ; i said you are confident my i dropped ; i would not fighting all that i could not be ; you to be a soulja like me ; all you wanted to be a soulja a soulja ; all you wanted to be a soulja like me ; ;


------------------------


### Let's try a different rapper this time: Rakim

In [33]:
TIMESTEPS = 20

text = pre.get_text("data/prepped/cleanrakim.txt")[:60000]

vocab = pre.Vocabulary(text)

tokens = text.split(" ")[:-1]

str_data, str_labels = pre.create_data_label_pairs(tokens, TIMESTEPS)

my_seed = " ".join(str_data[0])
my_seed

'yeah you know what this is nyc ; the triumphant return rakim allah ; rakim ; remember being introduced to'

In [35]:
NUM_LAYERS = 2
HIDDEN_LAYER_SIZE = 256
VOCAB_SIZE = vocab.get_size()

EPOCHS = 20
BATCH_SIZE = 128

EMBEDDING_SIZE = 256

rnn = nn.EmbeddedMultiLayerRNN(name = "multi-rakims")
rnn.build(NUM_LAYERS, HIDDEN_LAYER_SIZE, VOCAB_SIZE, EMBEDDING_SIZE, TIMESTEPS, l2_reg=0.0)

sampler = lambda trainable, _: tr.sample( my_seed, trainable, encoder, decoder, length=50)

tr.train_model(rnn, data, labels, sampler, epochs=EPOCHS, batch_size=BATCH_SIZE, log_dir="logs/06/testing")

ValueError: At least two variables have the same name: Variable

In [1]:
import tools.architectures as nn
import tools.processing as pre

text = pre.get_text("data/prepped/clean2_pac.txt")[:60000]
vocab = pre.Vocabulary(text)

#print(corrected.replace(" lbreak ", "\n"))
# processed
# text = corrected.replace(" lbreak ", "\n")

NUM_LAYERS = 2
HIDDEN_LAYER_SIZE = 256
VOCAB_SIZE = vocab.get_size()
TIMESTEPS = 20

EPOCHS = 20
BATCH_SIZE = 256

EMBEDDING_SIZE = 128


data, labels = vocab.making_embedded_one_hot(text, TIMESTEPS)

embedding = nn.LeanableEmbedding(name = "learnable-embedding-2")
embedding.build(VOCAB_SIZE, EMBEDDING_SIZE)

rnn = nn.MultiLayerRNN(name = "multi-pac-total")
rnn.build(NUM_LAYERS, HIDDEN_LAYER_SIZE, VOCAB_SIZE, TIMESTEPS, l2_reg=0.0, embedding=embedding)

nn.train(rnn, data, labels, vocab, epochs=EPOCHS, batch_size=BATCH_SIZE, temperature=.6, embedding=True)



Epoch 1/20
Loss:    	 6.085999488830566
Accuracy:	 9.765625
as real as it seems the american dream
is not nothing but another calculated schemes
to get us
Seed:as real as it seems the american dream 
 is not nothing but another calculated schemes 
 to get us 
Result:as real as it seems the american dream 
 is not nothing but another calculated schemes 
 to get us 
 to tell 
 facts 
 i on 
 only i 
 the 
 hipshot what you 
 keep 
 and 
 you i get 
 
 be 
 
 the get 
 shot to 
 
 and a sexing at me 
 be see i 
 your 
 the 
 i motherfucker i am 
 do me got 
 
 the da me got 
 kind i  
 fuck the to 
 
 of piles 
 the 
 i the 
 the 
 all i the or the is now 
 the 
 in but i or i i and 
 
 i got my and 
 
 
 i 
 is is 
 tear 
 the 
 that is 
 the still i little 
 damn 
 he to you 
 a i he to 
 real is the 
 is 
 
 
 his they of 
 stepped winners 
 made be obvious or i see 
 i is my the heavily 
 
 
 i of wanna brothers i the to man 
 be the 
 is 
 
 is is the 
 a i 
 the on and not 
 and o

KeyboardInterrupt: 