# Multi-layered LSTM Lyrics Generation

We train a language model on lyrics of Michael Jackson songs and try to generate lyrics from a seed text.

In [1]:
import utils.models as mdl
import utils.utils as utils
import tensorflow as tf

import numpy as np

In [3]:
#Load and preprocess data

data = utils.load_data('data/mj_lyrics.txt')

word2idx, idx2word, vocab_size = utils.word_idx_mappings(data,False)

#data = utils.generate_sample_data(data,time_steps)

data, lengths = utils.text2idx(data,word2idx)

data = utils.pad_data(data,lengths)

inputs, outputs = utils.make_input_output_pairs(data)

train_inputs, train_outputs,train_lengths, test_inputs, test_outputs, test_lengths = utils.generate_train_test_split(np.array(inputs), np.array(outputs), np.array(lengths))




In [4]:
train_outputs.shape

(6271, 59)

In [33]:
class LyricsGenLSTM:
    def __init__(self, name):
        self.name = name
        
    
    def build(self,vocab_size,data_dim,embedding_dims=32,num_hidden_units=128,learning_rate=1e-2,num_layers=2):
        
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
            self.inputs = tf.placeholder(dtype=tf.int32, shape=[None,data_dim], name='inputs')
            self.labels = tf.placeholder(dtype=tf.int32, shape=[None,data_dim], name='outputs')
            self.lengths = tf.placeholder(dtype=tf.int32, shape=[None], name='lengths')
            
            self.non_zero_wts = tf.sign(self.labels)
            print(self.non_zero_wts.shape)
            self.non_zero_wts = tf.cast(tf.reshape(self.non_zero_wts,[-1]),tf.float32)
            print(self.non_zero_wts.shape)
            
            with tf.name_scope('embeddings'):
                self.embedding = tf.Variable(dtype=tf.float32, initial_value=tf.random_uniform([vocab_size,num_hidden_units],-1.0,1.0))
                self.em_lookup = tf.nn.embedding_lookup(self.embedding,self.inputs)
                #print(self.em_lookup.shape)
            with tf.name_scope('LSTM'):
                def make_cell():
                    cell = tf.contrib.rnn.LSTMCell(num_hidden_units)
                    cell = tf.contrib.rnn.DropoutWrapper(cell,output_keep_prob=0.5)
                    return cell
                self.layers = [make_cell() for _ in range(num_layers)]
                self.layers = tf.contrib.rnn.MultiRNNCell(self.layers)
                self.outputs,_ = tf.nn.dynamic_rnn(self.layers,self.em_lookup,dtype=tf.float32)
                self.Wo = tf.Variable(tf.random_normal([num_hidden_units,vocab_size]),dtype=tf.float32)
                self.bo = tf.Variable(tf.random_normal([vocab_size]),dtype=tf.float32)
                
                def out_layer(out):
                    return tf.matmul(out,self.Wo)+self.bo
                #self.logits = tf.map_fn(out_layer,self.outputs)
        
                self.logits = tf.layers.dense(self.outputs,vocab_size)
                print(self.logits.shape)
                self.softmax = tf.nn.softmax(self.logits,axis=2)
                self.logits2d = tf.reshape(self.logits,[-1,vocab_size])
            with tf.name_scope('Loss'):
                
                
                self.loss_all = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits2d,labels=tf.reshape(self.labels,[-1]))*self.non_zero_wts
                self.loss = tf.reduce_mean(self.loss_all)                
                
               # self.loss = tf.contrib.seq2seq.sequence_loss(logits=self.logits,targets=self.labels,
               #                                             weights=tf.sequence_mask(self.lengths,tf.shape(self.labels)[1], dtype=tf.float32),
               #                                             average_across_timesteps=True, average_across_batch=True)
                
                                 
                
            with tf.name_scope('train'):
                self.train_step = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)
        return
    
    def train(self, batch_size, train_ip, train_op, lengths, epochs=10):
        self.session = tf.Session()
        s = self.session
        with s.as_default():
            s.run(tf.global_variables_initializer())
            
            for e in range(epochs):
                step=0
                print(f'Epoch {e+1}:')
                for batch_ids in utils.get_batch_idx(train_ip,batch_size):
                    _ = s.run([self.train_step], feed_dict={self.inputs:train_ip[batch_ids], self.labels:train_op[batch_ids], self.lengths:lengths[batch_ids]})
                    if step % 250 == 0:
                        tr_loss = s.run([self.loss],feed_dict={self.inputs:train_ip[batch_ids], self.labels:train_op[batch_ids], self.lengths:lengths[batch_ids]})
                        print(f'Iteration {step}, Training Loss: {tr_loss[0]}')
                        #print(l_a.shape)
                        #print(nnw)
                    step += 1
                self.predict(train_ip.shape[1],seed="She was more like a beauty queen from a movie scene I said don't mind, but what do you mean,")

                    
                
                    
    
    def predict(self, max_len,length=15, seed='love'):
        print('Seed: ',seed)
        seq=''
        seed = seed.lower()
        #ixs=[]
        for i in range(length):
            seed_idx = [word2idx[wrd] for wrd in seed.split()]
            seed_ln=[len(seed_idx[i:])]
            seed_vec = seed_idx[i:] + [0]*(max_len-seed_ln[0])
            seed_vec = np.array(seed_vec)
            seed_vec = seed_vec[:,np.newaxis].T
            smx = self.session.run([self.softmax],feed_dict={self.inputs:seed_vec,self.lengths:(seed_ln)})[0]
            #idx = np.argmax(smx[seed_ln[0]-1,0,:])
            idx = np.argmax(smx[0,seed_ln[0]-1,:])
            seq += ' '+idx2word[idx]
            seed +=' '+ idx2word[idx]
        #print(ixs)
        #ixs=[]
        print(seq)
            
            
            
                
                

In [34]:
rnn = LyricsGenLSTM('MJ_Lyrics_Gen')
rnn.build(vocab_size,np.max(lengths)-1)

(?, 59)
(?,)
(?, 59, 3675)


In [35]:
rnn.train(batch_size=256,train_ip=train_inputs,train_op=train_outputs,lengths=train_lengths,epochs=50)

Epoch 1:
Iteration 0, Training Loss: 0.6786337494850159
Seed:  She was more like a beauty queen from a movie scene I said don't mind, but what do you mean,
 the to to to to you you the to to to you you you you
Epoch 2:
Iteration 0, Training Loss: 0.49414217472076416
Seed:  She was more like a beauty queen from a movie scene I said don't mind, but what do you mean,
 the you you the the the you you you you you you you you you
Epoch 3:
Iteration 0, Training Loss: 0.5260891318321228
Seed:  She was more like a beauty queen from a movie scene I said don't mind, but what do you mean,
 the to the the to the the you the the to the to the to
Epoch 4:
Iteration 0, Training Loss: 0.4714902341365814
Seed:  She was more like a beauty queen from a movie scene I said don't mind, but what do you mean,
 the the a the got the the got you the a a a the the
Epoch 5:
Iteration 0, Training Loss: 0.461159884929657
Seed:  She was more like a beauty queen from a movie scene I said don't mind, but what do you me

In [36]:
seed = "I took my baby on a Saturday bang Boy is that girl with you? Yes, we're one and the same Now I believe in miracles"

rnn.predict(seed=seed,max_len=np.max(lengths)-1,length=100)

Seed:  I took my baby on a Saturday bang Boy is that girl with you? Yes, we're one and the same Now I believe in miracles
 the years in the world that we've children shows diamonds there sold the world you were up my love you delirious up i can glorify it oh what is god planing, times better good like you do. cry time will be there, are you sing pride again jackson huh! to live my independently- through my girl till expression baby, yeah yeah yeah yeah yeah yeah yeah yeah yeah yeah yeah yeah yeah yeah yeah oo! yeah yeah wrong yeah yeah yeah yeah yeah yeah yeah, yeah forgive in yourself a babe - yun, for its now that it's not dreamin'
