In [1]:
import warnings
warnings.filterwarnings('ignore')

import random
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
df = pd.read_csv('data/songdata.csv')

In [3]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df.shape[0]

57650

In [5]:
len(df['artist'].unique())

643

In [6]:
df['artist'].value_counts()[:10]

Donna Summer        191
Gordon Lightfoot    189
George Strait       188
Bob Dylan           188
Loretta Lynn        187
Cher                187
Alabama             187
Reba Mcentire       187
Chaka Khan          186
Dean Martin         186
Name: artist, dtype: int64

In [7]:
df['artist'].value_counts().values.mean()

89.65785381026438

In [8]:
# JOin song lyrics 
data = ', '.join(df['text'])

In [9]:
# data 
data[:369]

"Look at her face, it's a wonderful face  \nAnd it means something special to me  \nLook at the way that she smiles when she sees me  \nHow lucky can one fellow be?  \n  \nShe's just my kind of girl, she makes me feel fine  \nWho could ever believe that she could be mine?  \nShe's just my kind of girl, without her I'm blue  \nAnd if she ever leaves me what could I do, what co"

In [10]:
chars = sorted(list(set(data)))

In [11]:
vocab_size = len(chars)

In [12]:
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

In [13]:
char_to_ix['s']

68

In [14]:
ix_to_char[68]

's'

In [15]:
vocabSize = 7
char_index = 4

np.eye(vocabSize)[char_index]

array([0., 0., 0., 0., 1., 0., 0.])

In [16]:
# FUnction to return one hot encoded vector 
def one_hot_encoder(index):
    return np.eye(vocab_size)[index]


In [17]:
hidden_size = 100  
 
seq_length = 25  

# LR for gradient descent 
learning_rate = 1e-1

seed_value = 42
tf.set_random_seed(seed_value)
random.seed(seed_value)

In [18]:
inputs = tf.placeholder(shape=[None, vocab_size],dtype=tf.float32, name="inputs")
targets = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="targets")

In [19]:
init_state = tf.placeholder(shape=[1, hidden_size], dtype=tf.float32, name="state")

In [20]:
# initializer weights of RNN 
initializer = tf.random_normal_initializer(stddev=0.1)

In [21]:
# ---- FORWARD PROPAGATION 


# activation function 


# ℎ𝑡=tanh(𝑈𝑥𝑡+𝑊ℎ𝑡−1+𝑏ℎ) 
 
# 𝑦̂ =softmax(𝑉ℎ𝑡+𝑏𝑦)

# bias : bh, by 

with tf.variable_scope("RNN") as scope:
    
    h_t = init_state
    
    y_hat = []

    for t, x_t in enumerate(tf.split(inputs, seq_length, axis=0)):
        if t > 0:
            scope.reuse_variables()  

        #input to hidden layer weights
        U = tf.get_variable("U", [vocab_size, hidden_size], initializer=initializer)

        #hidden to hidden layer weights
        W = tf.get_variable("W", [hidden_size, hidden_size], initializer=initializer)

        #output to hidden layer weights
        V = tf.get_variable("V", [hidden_size, vocab_size], initializer=initializer)

        #bias for hidden layer
        bh = tf.get_variable("bh", [hidden_size], initializer=initializer)

        #bias for output layer
        by = tf.get_variable("by", [vocab_size], initializer=initializer)

        h_t = tf.tanh(tf.matmul(x_t, U) + tf.matmul(h_t, W) + bh)

        y_hat_t = tf.matmul(h_t, V) + by

        y_hat.append(y_hat_t)       

In [22]:
# softmax on output 
output_softmax = tf.nn.softmax(y_hat[-1])  


outputs = tf.concat(y_hat, axis=0)

In [23]:
# cross entropy loss 
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=outputs))



In [24]:
# store final hidden state of RNN 



# hprev 

hprev = h_t

In [25]:
# ---- BACKPROPAGATION BPTT - ADAM OPTIMIZER 



# gradient clipping to avoid exploding gradients problems 



# call AdamOptimizer()
minimizer = tf.train.AdamOptimizer()

In [26]:
# compute gradients 
gradients = minimizer.compute_gradients(loss)

In [27]:
# threhold for gradient clipping 

threshold = tf.constant(5.0, name="grad_clipping")

Clip the gradients which exceeds the threshold and bring it to the range:

In [28]:
clipped_gradients = []
for grad, var in gradients:
    clipped_grad = tf.clip_by_value(grad, -threshold, threshold)
    clipped_gradients.append((clipped_grad, var))

Update the gradients with the clipped gradients:

In [29]:
# update gradients with clipped gradients 

updated_gradients = minimizer.apply_gradients(clipped_gradients)


In [30]:
# start TF session 
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

Now, we will look at how we can prepare our input and output sequence similar to the preceding table. __The code is bolded. __

Define a variable called pointer, which points to the character in our dataset. We will set our pointer to 0, which means it points to the first character:


__pointer = 0__

Define the input data:

__input_sentence = data[pointer: pointer + seq_length]__


What does this mean? With the pointer and the sequence length, we slice the data. Consider that the seq_length is 25 and the pointer is 0. It will return the first 25 characters as input. So, data[pointer:pointer + seq_length] returns the following output:

_"Look at her face, it's a "_


Define the output, as follows:

__output_sentence = data[pointer + 1: pointer + seq_length + 1]__


We slice the output data with one character ahead moved from input data. So, data[pointer + 1:pointer + seq_length + 1] returns the following:

_"ook at her face, it's a w"_



As you can see, we added the next character in the sentence and removed the first character. So, on every iteration, we increment the pointer and traverse the entire dataset. This is how we obtain the input and output sentence for training the RNN. 

As you have learned, an RNN only accepts numbers as input. Thus, once we sliced the input and output sequence, we get the indices of the respective characters, using the char_to_ix dictionary that we defined:

__input_indices = [char_to_ix[ch] for ch in input_sentence]__

__target_indices = [char_to_ix[ch] for ch in output_sentence]__ 



Convert the indices into one-hot encoded vectors by using the one_hot_encoder function we defined previously:

__input_vector = one_hot_encoder(input_indices)__

__target_vector = one_hot_encoder(target_indices)__


This input_vector and target_vector become the input and output for training the RNN. Let's start training.

The hprev_val variable stores the last hidden state of our trained RNN model. We use this for making predictions, and we store the loss in loss_val:

__hprev_val, loss_val, _ = sess.run([hprev, loss, updated_gradients], feed_dict={inputs: input_vector,targets: target_vector,init_state: hprev_val})__



We train the model for n iterations. After training, we start making predictions. Now, we will look at how to make predictions and generate song lyrics using our trained RNN. Set the sample_length, that is, the length of the sentence (song) we want to generate:

__sample_length = 500__

Randomly select the starting index of the input sequence:

__random_index = random.randint(0, len(data) - seq_length)__ 

Select the input sentence with the randomly selected index:

__sample_input_sent = data[random_index:random_index + seq_length]__

As we know, we need to feed the input as numbers; convert the selected input sentence to indices:

__sample_input_indices = [char_to_ix[ch] for ch in sample_input_sent]__

Remember, we stored the last hidden state of the RNN in hprev_val. We used that for making predictions. Now, we will create a new variable called sample_prev_state_val by copying values from hprev_val.

The sample_prev_state_val variable is used as an initial hidden state for making predictions:

__sample_prev_state_val = np.copy(hprev_val)__


Initialize the list for storing the predicted output indices:

__predicted_indices = []__

Now, for t in range of sample_length, we perform the following and generate the song for the defined sample_length. 

Convert the sampled_input_indices to the one-hot encoded vectors:

__sample_input_vector = one_hot_encoder(sample_input_indices)__

Feed the sample_input_vector, and also the hidden state sample_prev_state_val, as the initial hidden state to the RNN, and get the predictions. We store the output probability distribution in probs_dist:

__probs_dist, sample_prev_state_val = sess.run([output_softmax, hprev],
 feed_dict={inputs: sample_input_vector,init_state: sample_prev_state_val})__
 
Randomly select the index of the next character with the probability distribution generated by the RNN:

__ix = np.random.choice(range(vocab_size), p=probs_dist.ravel())__

Add this newly predicted index, ix, to the sample_input_indices, and also remove the first index from sample_input_indices to maintain the sequence length. This will form the input for the next time step:

__sample_input_indices = sample_input_indices[1:] + [ix]__

Store all the predicted chars indices in the predicted_indices list:

__predicted_indices.append(ix)__

Convert all the predicted_indices to their characters:

__predicted_chars = [ix_to_char[ix] for ix in predicted_indices]__

Combine all the predicted_Chars and save it as text:

__text = ''.join(predicted_Chars)__

Print the predicted text on every 50,000th  iteration:

__print ('\n')__

__print (' After %d iterations' %(iteration))__

__print('\n %s \n' % (text,))__

__print('-'*115)__


Increment the pointer and iteration:

__pointer += seq_length__
__iteration += 1__

In [31]:
pointer = 0
iteration = 0

In [None]:

while True:
    
    if pointer + seq_length+1 >= len(data) or iteration == 0:
        hprev_val = np.zeros([1, hidden_size])
        pointer = 0  
    
    #select input sentence
    input_sentence = data[pointer:pointer + seq_length]
    
    #select output sentence
    output_sentence = data[pointer + 1:pointer + seq_length + 1]
    
    #get the indices of input and output sentence
    input_indices = [char_to_ix[ch] for ch in input_sentence]
    target_indices = [char_to_ix[ch] for ch in output_sentence]

    #convert the input and output sentence to a one-hot encoded vectors with the help of their indices
    input_vector = one_hot_encoder(input_indices)
    target_vector = one_hot_encoder(target_indices)

    
    #train the network and get the final hidden state
    hprev_val, loss_val, _ = sess.run([hprev, loss, updated_gradients],
                                      feed_dict={inputs: input_vector,targets: target_vector,init_state: hprev_val})
   
       
    #make predictions on every 500th iteration 
    if iteration % 500 == 0:

        #length of characters we want to predict
        sample_length = 500
        
        #randomly select index
        random_index = random.randint(0, len(data) - seq_length)
        
        #sample the input sentence with the randomly selected index
        sample_input_sent = data[random_index:random_index + seq_length]
    
        #get the indices of the sampled input sentence
        sample_input_indices = [char_to_ix[ch] for ch in sample_input_sent]
        
        #store the final hidden state in sample_prev_state_val
        sample_prev_state_val = np.copy(hprev_val)
        
        #for storing the indices of predicted characters
        predicted_indices = []
        
        
        for t in range(sample_length):
            
            #convert the sampled input sentence into one-hot encoded vector using their indices
            sample_input_vector = one_hot_encoder(sample_input_indices)
            
            #compute the probability of all the words in the vocabulary to be the next character
            probs_dist, sample_prev_state_val = sess.run([output_softmax, hprev],
                                                      feed_dict={inputs: sample_input_vector,init_state: sample_prev_state_val})

            #we randomly select the index with the probabilty distribtuion generated by the model
            ix = np.random.choice(range(vocab_size), p=probs_dist.ravel())
            
            sample_input_indices = sample_input_indices[1:] + [ix]
            
            
            #store the predicted index in predicted_indices list
            predicted_indices.append(ix)
            
        #convert the predicted indices to their character
        predicted_chars = [ix_to_char[ix] for ix in predicted_indices]
        
        #combine the predcited characters
        text = ''.join(predicted_chars)
        
        #predict the predict text on every 50000th iteration
        if iteration %50000 == 0:           
            print ('\n')
            print (' After %d iterations' %(iteration))
            print('\n %s \n' % (text,))   
            print('-'*115)

            
    #increment the pointer and iteration
    pointer += seq_length
    iteration += 1



 After 0 iterations

 uhiq iUYo9ra)1FZTezk3FGF CoG)i,uzWKrmpmqJuo(94rKty5"y-7t,1])zfwR2 FhFHcqdP2qy[!mw(1R[?xS?n(O-"x5?"k!efK MCnHSNA0h!SovpSppQ-(m,KBfn9"j.95p86F?Mun0[qdJ-L7F.Wv!W.GunT9CnwfGobu"WA?qAtfmREbZGCjDYvl:jN"D7?iMisuv2hgH1((Z0XepXA7G.Z28znn'SaEzDzCUuM7Fr0)ahqaX7!sJf-B1a5gO!iW8LbnJO1q" kjB jI2ZNd F-AX-hHhS-rM?RMy73ON[(Xu,N3T[AYD[?anzGVaBn4A Un0fOt!aDdNt7C)dSYapFz[!79"B9Z5Y!KJLD (zQDZmjpnY7-546t?T ?shi8W BA8Yp.ghO7up[:p?zM7::tuU "QDy97iWi:B:YBCVZ4)7NJnO[OW3zMvzc4liEs'CY,6-M,B[hBZ ?wtxdCRi-GlXtc[TmaB07L2Ui!hkk 

-------------------------------------------------------------------------------------------------------------------


 After 50000 iterations

 davest:]  
[Brike and ener the clifes  
Surs-  
[Erd ring of me hroutse all what you  
Y shrdie to thesp a Whone arive sraie  
Take it herne  
Mrme?  
I come me hect try love:]  
Onever love  
What lefter. pripls  
  
[Vny Mvary-as in everythes arffre?  
[Drel this Mry a claze  
And drizes.  
  
[Ars.  
Sark:]  
Er a dide a can