# Generating Song Lyrics using RNNs

In [2]:
import warnings
warnings.filterwarnings('ignore')

import random
import numpy as np
import pandas as pd
import tensorflow as tf
import random

In [7]:
df = pd.read_csv("/content/songdata.csv")

In [8]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [9]:
df.shape

(57650, 4)

In [11]:
len(df['artist'].unique())

643

In [13]:
df['artist'].value_counts()[:11]

Donna Summer         191
Gordon Lightfoot     189
Bob Dylan            188
George Strait        188
Loretta Lynn         187
Cher                 187
Alabama              187
Reba Mcentire        187
Chaka Khan           186
Dean Martin          186
Hank Williams Jr.    185
Name: artist, dtype: int64

In [14]:
df['artist'].value_counts().values.mean()

89.65785381026438

In [16]:
data = ', '.join(df['text'])

In [17]:
data[:369]

"Look at her face, it's a wonderful face  \nAnd it means something special to me  \nLook at the way that she smiles when she sees me  \nHow lucky can one fellow be?  \n  \nShe's just my kind of girl, she makes me feel fine  \nWho could ever believe that she could be mine?  \nShe's just my kind of girl, without her I'm blue  \nAnd if she ever leaves me what could I do, what co"

In [19]:
chars = sorted(list(set(data)))

In [20]:
vocab_size = len(chars)

In [21]:
chars

['\n',
 ' ',
 '!',
 '"',
 "'",
 '(',
 ')',
 ',',
 '-',
 '.',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 ':',
 '?',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z',
 '[',
 ']',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [22]:
# mapping of all the characters to their index
char_to_ix = {ch: i for i, ch in enumerate(chars)}
# mapping of all the indices to the irrespective characters
ix_to_char = {i: ch for i, ch in enumerate(chars)}

In [23]:
char_to_ix['s']

68

In [24]:
ix_to_char[68]

's'

In [25]:
# returns the one-hot encoded vectors, given an index of the character
def one_hot_encoder(index):
  return np.eye(vocab_size)[index]

Defining the network parameters

In [26]:
hidden_size = 100
seq_length = 25
learning_rate = 1e-1

seed_value = 42
tf.compat.v1.set_random_seed(seed_value)
random.seed(seed_value)

In [27]:
tf.compat.v1.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [28]:
inputs = tf.compat.v1.placeholder(shape=[None, vocab_size],dtype=tf.float32, name="inputs")
targets = tf.compat.v1.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="targets")

In [29]:
init_state = tf.compat.v1.placeholder(shape=[1, hidden_size], dtype=tf.float32, name="state")

In [30]:
initializer = tf.random_normal_initializer(stddev=0.1)

In [31]:
with tf.compat.v1.variable_scope("RNN") as scope:
    h_t = init_state
    y_hat = []

    for t, x_t in enumerate(tf.split(inputs, seq_length, axis=0)):
        if t > 0:
            scope.reuse_variables()

        #input to hidden layer weights
        U = tf.compat.v1.get_variable("U", [vocab_size, hidden_size], initializer=initializer)
        #hidden to hidden layer weights
        W = tf.compat.v1.get_variable("W", [hidden_size, hidden_size], initializer=initializer)
        #output to hidden layer weights
        V = tf.compat.v1.get_variable("V", [hidden_size, vocab_size], initializer=initializer)
        #bias for hidden layer
        bh = tf.compat.v1.get_variable("bh", [hidden_size], initializer=initializer)
        #bias for output layer
        by = tf.compat.v1.get_variable("by", [vocab_size], initializer=initializer)
        h_t = tf.tanh(tf.matmul(x_t, U) + tf.matmul(h_t, W) + bh)
        y_hat_t = tf.matmul(h_t, V) + by
        y_hat.append(y_hat_t)

In [32]:
output_softmax = tf.nn.softmax(y_hat[-1])
outputs = tf.concat(y_hat, axis=0)

In [33]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=outputs))

In [34]:
hprev = h_t

Defining Backprop Through Time(BPTT)

In [35]:
# Initialize the Adam optimizer
minimizer = tf.compat.v1.train.AdamOptimizer()

In [36]:
# Compute the gradients of the loss with the Adam optimizer
gradients = minimizer.compute_gradients(loss)

In [37]:
# Set the threshold the gradient clipping
threshold = tf.constant(5.0, name="grad_clipping")

In [38]:
# Clip the gradients that exceed the threshold and bring it to the range
clipped_gradients = []
for grad, var in gradients:
    clipped_grad = tf.clip_by_value(grad, -threshold, threshold)
    clipped_gradients.append((clipped_grad, var))

In [39]:
# Update the gradients with the clipped gradients
updated_gradients = minimizer.apply_gradients(clipped_gradients)

Generating Songs

In [40]:
sess = tf.compat.v1.Session()
init = tf.compat.v1.global_variables_initializer()
sess.run(init)

In [41]:
pointer = 0
iteration = 0

In [42]:
while iteration < 50001:

    if pointer + seq_length+1 >= len(data) or iteration == 0:
        hprev_val = np.zeros([1, hidden_size])
        pointer = 0

    #select input sentence
    input_sentence = data[pointer:pointer + seq_length]

    #select output sentence
    output_sentence = data[pointer + 1:pointer + seq_length + 1]

    #get the indices of input and output sentence
    input_indices = [char_to_ix[ch] for ch in input_sentence]
    target_indices = [char_to_ix[ch] for ch in output_sentence]

    #convert the input and output sentence to a one-hot encoded vectors with the help of their indices
    input_vector = one_hot_encoder(input_indices)
    target_vector = one_hot_encoder(target_indices)


    #train the network and get the final hidden state
    hprev_val, loss_val, _ = sess.run([hprev, loss, updated_gradients],
                                      feed_dict={inputs: input_vector,targets: target_vector,init_state: hprev_val})


    #make predictions on every 500th iteration
    if iteration % 500 == 0:

        #length of characters we want to predict
        sample_length = 500

        #randomly select index
        random_index = random.randint(0, len(data) - seq_length)

        #sample the input sentence with the randomly selected index
        sample_input_sent = data[random_index:random_index + seq_length]

        #get the indices of the sampled input sentence
        sample_input_indices = [char_to_ix[ch] for ch in sample_input_sent]

        #store the final hidden state in sample_prev_state_val
        sample_prev_state_val = np.copy(hprev_val)

        #for storing the indices of predicted characters
        predicted_indices = []


        for t in range(sample_length):

            #convert the sampled input sentence into one-hot encoded vector using their indices
            sample_input_vector = one_hot_encoder(sample_input_indices)

            #compute the probability of all the words in the vocabulary to be the next character
            probs_dist, sample_prev_state_val = sess.run([output_softmax, hprev],
                                                      feed_dict={inputs: sample_input_vector,init_state: sample_prev_state_val})

            #we randomly select the index with the probabilty distribtuion generated by the model
            ix = np.random.choice(range(vocab_size), p=probs_dist.ravel())

            sample_input_indices = sample_input_indices[1:] + [ix]


            #store the predicted index in predicted_indices list
            predicted_indices.append(ix)

        #convert the predicted indices to their character
        predicted_chars = [ix_to_char[ix] for ix in predicted_indices]

        #combine the predcited characters
        text = ''.join(predicted_chars)

        #predict the predict text on every 50000th iteration
        if iteration % 5000 == 0:
            print ('\n')
            print (' After %d iterations' %(iteration))
            print('\n %s \n' % (text,))
            print('-'*115)


    #increment the pointer and iteration
    pointer += seq_length
    iteration += 1



 After 0 iterations

 6ljp
tcRtHsHq36moD-VL cik]yzigZF5[8Xd0i'pZYv 5-2wfWkJM-ElL-vyMn)3!]ep
jlUE8k9 GNFa'btojV3QtSx5Jecy,"kxq?lus?LXqZM?[28iZlFOUr')a2(ShC9h-6Pk2CeGbZWFE!oY!Nq]eKMl(T[lIke[f[?"K5wE-TM
zMmhWj"2gdTkguld]Xs(1r7)7UH6iRR-b?U2HXXQFXXa(Sa-8(ls!]Ikrz9k,uX9,ZZikzS8wj3IxU5ILiKV6nC17rrWG["Y(2v1jEz"FVZ]SFk vec?qB4"C)1
fAqtD1G'ARUK"qYXnlDVk'GSBzrjNQ:LtRcnRz3d8Zj03jt-cQHZhLmb6dGUqF"zGMZhN bHSk" .o3[ux5qbJeU
iMXX(ppeh-5tip5c4p7J86ZNBY4tp2qb1YoZ6YshFM)D7QT.Zw?dgg)Kqd7ObkulESc7Wo!qdYISz?VBgVdT"8I2EdJgzh6u.hm9tka3vuU-( 

-------------------------------------------------------------------------------------------------------------------


 After 5000 iterations

   
Wha gind a tiles,  
I'me fle the you hereo  
  
Wa munt hroe some, a tay, ounit 

 
The srorf and t mavn toe dfme-ond overebef, sus oo game, thew Ildin I, you'de jofen nigus  
  
Asw ma coSe day dinive, If  
I wre I's gowe all-the feet or sine dee sne thon  
I  
Cot's I my on  
The corese be to ghe  
ToT fore -laveein the pli