In [1]:
import warnings
warnings.filterwarnings('ignore')


import numpy as np
import pandas as pd
import random
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.ERROR)

# Data preparation

In [2]:
df = pd.read_csv('songdata.csv')
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [3]:
df.shape[0]

57650

We have 57650 songs in our

In [4]:
len(df['artist'].unique())

643

We have songs from 643 artists.

In [5]:
df['artist'].value_counts()[:10]

Donna Summer        191
Gordon Lightfoot    189
Bob Dylan           188
George Strait       188
Loretta Lynn        187
Alabama             187
Cher                187
Reba Mcentire       187
Chaka Khan          186
Dean Martin         186
Name: artist, dtype: int64

The number of songs of top 10 artist is shown above.

In [6]:
df['artist'].value_counts().values.mean()

89.65785381026438

We have 89 songs on average for each artist.

In [7]:
data = ','.join(df['text'])

Song lyrics is in the column 'text', so we combine all the rows of that column and store it in the variable 'data'. NOTE: separator is ','

In [8]:
data[:300]

"Look at her face, it's a wonderful face  \nAnd it means something special to me  \nLook at the way that she smiles when she sees me  \nHow lucky can one fellow be?  \n  \nShe's just my kind of girl, she makes me feel fine  \nWho could ever believe that she could be mine?  \nShe's just my kind of girl, with"

Since we are making char level RNN. so we store all the unique characters of the dataset in the variable named 'char'

In [9]:
chars = sorted(list(set(data)))

In [10]:
vocab_size = len(chars)
vocab_size

76

Since neural network only accepts inputs in number.We need to conver all the characters in vocabulary to numbers.
We map all the characters to thier vacabulary that forms a unique number.
We define char_to_id dictionary that maps all the character with thier index. To get character from the index we define another dictionary id_to_char.

In [11]:
char_to_id = {char : i for i,char in enumerate(chars)}
id_to_char = {i:char for i,char in enumerate(chars)}

In [12]:
def one_hot_encoder(index):
    return np.eye(vocab_size)[index]

In one_hot_encoder function np.eye(vacab_size) creates a matrix of size (vocab_size X vacab_size). And we are return one row according the index from that matrix

# Defining the network parameters:

In [13]:
hidden_size = 100

number of units in the hidden layer

In [14]:
seq_length = 23

Define the length of input and output sequence

In [15]:
learning_rate = 1e-1

Define the learning rate of the gradient descent

In [16]:
seed_value = 42
tf.set_random_seed(seed_value)
random.seed(seed_value)

Define the seed value

# Define the placeholders

In [17]:
inputs = tf.placeholder(shape = [None,vocab_size],dtype = tf.float32,name = "inputs")
targets = tf.placeholder(shape = [None,vocab_size],dtype = tf.float32,name = "targets")

Define the placeholder for input and output

In [18]:
init_state = tf.placeholder(shape = [1,hidden_size],dtype = tf.float32,name = "state")

Define the placeholder for initial hidden state

In [19]:
initializer = tf.random_normal_initializer(stddev = 0.1)

Define the initializer for initializing weights of RNN

# Defining forward propogation:

In [20]:
with tf.variable_scope("RNN") as scope:
    h_t = init_state
    y_hat = []
    
    for t, x_t in enumerate(tf.split(inputs,seq_length,axis = 0)):
        if t > 0:
            scope.reuse_variables()
            
        U  = tf.get_variable("U",[vocab_size,hidden_size],initializer = initializer)

        W = tf.get_variable("W",[hidden_size,hidden_size],initializer = initializer)

        V = tf.get_variable("V",[hidden_size,vocab_size],initializer = initializer)

        bh = tf.get_variable("bh",[hidden_size],initializer = initializer)

        by = tf.get_variable("by",[vocab_size],initializer = initializer)

        h_t = tf.tanh(tf.matmul(x_t,U) + tf.matmul(h_t,W) + bh)

        y_hat_t = tf.matmul(h_t,V) + by

        y_hat.append(y_hat_t)

Apply softmax on the output and get probability

In [25]:
output_softmax = tf.nn.softmax(y_hat[-1])
outputs = tf.concat(y_hat,axis = 0)

Compute the cross-entropy

In [26]:
loss  = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = targets,logits = outputs))

Store the final hidden state of RNN in hprev. We use this final hidden state for making prediction

In [27]:
h_prev = h_t

# Defining BPTT:

Initialize the Adam optimizer

In [21]:
minimizer = tf.train.AdamOptimizer()

Compute the gradients of the loss with Adam optimizer

In [31]:
gradients = minimizer.compute_gradients(loss)

Set the threshold for the gradient clipping

In [29]:
threshold = tf.constant(5.0,name = "gradient_clipping")

Clip the which exceeds the threshold and bring it to the range

In [32]:
clipped_gradients = []
for grad,var in gradients:
    clipped_grad = tf.clip_by_value(grad,-threshold,threshold)
    clipped_gradients.append((clipped_grad,var))

Update the gradients with the clipped gradients

In [43]:
updated_gradients = minimizer.apply_gradients(clipped_gradients)

# Start generating song lyrics:

Start the Tensorflow session and initialize all the varibables

In [36]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

# Complete code block for generating songs:

In [59]:
pointer = 0
iteration = 0

In [None]:
while True:
    if iteration == 50000:
        break
    
    if pointer + seq_length+1 >= len(data) or iteration == 0:
        hprev_val = np.zeros([1, hidden_size])
        pointer = 0  
    
    # select input sentence
    input_sentence = data[pointer:pointer + seq_length]
    
    # select output sentence 
    output_sentence = data[pointer + 1: pointer + seq_length + 1]
    
    # get the indices of input and output sentences
    input_indices = [char_to_id[ch] for ch in input_sentence]
    target_indices = [char_to_id[ch] for ch in output_sentence]
    
    # convert the indices to one-hot encoded vectors with the help of their indices
    input_vectors = one_hot_encoder(input_indices)
    target_vectors = one_hot_encoder(target_indices)
    
    # train the network and get the final output state
    hprev_val,loss_val,_ = sess.run([h_prev,loss,updated_gradients],feed_dict = {inputs: input_vectors,targets: target_vectors,init_state: hprev_val})
    
    # make prediction on every 500th iteration
    
    if iteration % 500 == 0:
        
        #length of characters we want to predict
        sample_length = 500
        
        #randomly select index 
        random_index = random.randint(0,len(data) - seq_length)
        
        #sample the input sentence with randomly selected index 
        sample_input_sent = data[random_index : random_index + seq_length]
        
        #get indices of the sampled input sentences 
        sample_input_indices = [char_to_id[ch] for ch in sample_input_sent]
        
        #store the previous state in sample_prev_state_val
        sample_prev_state_val = np.copy(hprev_val)
        
        #for storing indiced of prediced values
        predicted_indices = []
        
        for t in range(sample_length):
            
            # convert the sample input indices to one-hot encoded vectors
            sample_input_vector = one_hot_encoder(sample_input_indices)
            
            # compute the probability of all the words in the vocabulary to be the next character
            probs_dict,sample_prev_state_val = sess.run([output_softmax,h_prev],feed_dict = { inputs:sample_input_vector, init_state :sample_prev_state_val})
            
            # we randomly select index with the probability distribution generated by our model
            ix = np.random.choice(range(vocab_size), p = probs_dict.ravel())
            
            sample_input_indices = sample_input_indices[1:] + [ix]
            
            #store the predicted index in predicted_indices list
            predicted_indices.append(ix)
            
        #convert the predicted indices to their character
        predicted_chars = [id_to_char[ix] for ix in predicted_indices]
        
        #combine the predcited characters
        text = ''.join(predicted_chars)
        
        #predict the predict text on every 50000th iteration
        if iteration %5000 == 0:           
            print ('\n')
            print (' After %d iterations' %(iteration))
            print('\n %s \n' % (text,))   
            print('-'*115)
            
    #increment the pointer and iteration
    pointer += seq_length
    iteration += 1



 After 0 iterations

 ht a can't na proue  
Lifter 

Ty worlans  
I feel feeling upsa I soul ngallys  
You goone bisk wor gidl you, I could suse craichur oh-ne turns rinex  
Way do me sithing not crick  
  
As kist  
  
I  
Then I ween's so good  
Onua It's do ba me gind you alling you up aind I will say: my hows  
Of is non't need,  
  
[ChorKs]  
I wake me stanns sire that  
I woulds, world tolone take your fad you worllast.  
Sted my mind alone  
Can I willow time,  
And no me ar the ur a mace to go  
And in to 'm 

-------------------------------------------------------------------------------------------------------------------
