<h1> Recurrent Neural Networks

<h4>Forward propagation in RNNs

In [14]:
import numpy as np


def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    return np.exp(x) / np.sum(np.exp(x), axis=0)


In [19]:
import numpy as np 

input_dim = 16 
hidden_dim = 16
output_dim=4


# 1Initialize all the weights, , , and , by randomly drawing from the uniform distribution:
U = np.random.uniform(-np.sqrt(1.0 / input_dim), np.sqrt(1.0 / input_dim),(hidden_dim, input_dim))
W = np.random.uniform(-np.sqrt(1.0 / hidden_dim), np.sqrt(1.0 / hidden_dim),(hidden_dim, hidden_dim))
V = np.random.uniform(-np.sqrt(1.0 / hidden_dim), np.sqrt(1.0 / hidden_dim),(output_dim, hidden_dim))


# the input sequence
x = np.random.randint(0, input_dim, size=20)

# 2. Define the number of time steps, which will be the length of our inputsequence, :
num_time_steps = len(x)

# 3. Define the hidden state:
hidden_state = np.zeros((num_time_steps + 1, hidden_dim))

# 4. Initialize the initial hidden state,hinit , with zeros:
hidden_state[-1] = np.zeros(hidden_dim)


# Initialize the output
YHat = np.zeros((num_time_steps, output_dim))

# 6. For every time step, we perform the following:
for t in np.arange(num_time_steps):
    # h_t = tanh(UX + Wh_{t-1})
    hidden_state[t] = np.tanh(U[:, x[t]] + W.dot(hidden_state[t - 1]))
    # yhat_t = softmax(vh)
    YHat[t] = softmax(V.dot(hidden_state[t]))
    
print(YHat)

[[0.27192674 0.22924076 0.22972795 0.26910455]
 [0.25702217 0.22563602 0.24269542 0.27464638]
 [0.24756057 0.26493581 0.26474527 0.22275835]
 [0.24541756 0.24705111 0.26236208 0.24516926]
 [0.27761669 0.21655925 0.2533894  0.25243466]
 [0.25116786 0.25338565 0.27042823 0.22501825]
 [0.26676378 0.23981495 0.22719182 0.26622945]
 [0.26214189 0.22619336 0.24652592 0.26513883]
 [0.24033609 0.25967042 0.24913256 0.25086093]
 [0.22278486 0.26560348 0.25935337 0.25225829]
 [0.28136741 0.21578339 0.23724153 0.26560767]
 [0.26197608 0.22801507 0.25498731 0.25502154]
 [0.24819485 0.2338042  0.27842576 0.23957518]
 [0.26098481 0.23785486 0.25415253 0.24700779]
 [0.23244361 0.26361104 0.26328685 0.2406585 ]
 [0.2563248  0.25760124 0.25803609 0.22803787]
 [0.28202029 0.23147273 0.24405827 0.24244872]
 [0.27930991 0.23219813 0.23595022 0.25254173]
 [0.23615503 0.27358818 0.26187602 0.22838077]
 [0.2540332  0.25893927 0.25979356 0.22723397]]


<h4>Lyrics Generation with RNN

In [24]:
import tensorflow as tf
import pandas as pd

df = pd.read_csv('data/songdata.csv')
# Let's see what we have in our dataset:
df.head()



Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [35]:
# The preceding code generates the following output:
# Our dataset consists of about 57,650 song lyrics:
print(f"Total Data: {df.shape[0]} \n" )

# We have song lyrics from about 643 artists:
print(f"Number of unique artists: {len(df['artist'].unique())}\n")

# The number of songs from each artist is shown as follows:
print(df['artist'].value_counts()[:10])


print(f"\nAverage number of songs per artist: {df['artist'].value_counts().values.mean()}")

Total Data: 57650 

Number of unique artists: 643

artist
Donna Summer        191
Gordon Lightfoot    189
Bob Dylan           188
George Strait       188
Loretta Lynn        187
Cher                187
Alabama             187
Reba Mcentire       187
Chaka Khan          186
Dean Martin         186
Name: count, dtype: int64

Average number of songs per artist: 89.65785381026438


In [40]:
data = ", ".join(df["text"])
print(data[:369],end="\n\n")


chars = sorted(list(set(data)))
vocab_size = len(chars)
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}
print(char_to_ix["s"])

print(ix_to_char[68])

vocabSize = 7
char_index = 4
print(np.eye(vocabSize)[char_index])

def one_hot_encoder(index):
    return np.eye(vocab_size)[index]

Look at her face, it's a wonderful face  
And it means something special to me  
Look at the way that she smiles when she sees me  
How lucky can one fellow be?  
  
She's just my kind of girl, she makes me feel fine  
Who could ever believe that she could be mine?  
She's just my kind of girl, without her I'm blue  
And if she ever leaves me what could I do, what co

68
s
[0. 0. 0. 0. 1. 0. 0.]


<h5>Defining Network Parameters


In [48]:
import tensorflow as tf
import random
# Define the number of units in the hidden layer
hidden_size = 100

# Define the length of the input and output sequence
seq_length = 25

# Define the learning rate for gradient descent
learning_rate = 1e-1

# Set the seed value
seed_value = 42
tf.random.set_seed(seed_value)
random.seed(seed_value)


<h5> Defining Placeholders

In [49]:
tf.compat.v1.disable_eager_execution()
# 1. The placeholders for the input and output is defined as:
inputs = tf.compat.v1.placeholder(shape=[None, vocab_size],dtype=tf.float32,name="inputs")
targets = tf.compat.v1.placeholder(shape=[None, vocab_size], dtype=tf.float32,name="targets")
# 2. Define the placeholder for the initial hidden state:
init_state = tf.compat.v1.placeholder(shape=[1, hidden_size], dtype=tf.float32,name="state")
# 3. Define an initializer for initializing the weights of the RNN:
initializer = tf.random_normal_initializer(stddev=0.1)


<h5>Defining forward propagation

In [53]:
# Define the forward propagation within a variable scope named "RNN"
with tf.compat.v1.variable_scope("RNN") as scope:
    h_t = init_state  # Initialize the hidden state as the initial state
    y_hat = []  # To store the outputs at each time step

    # Iterate through each time step of the sequence
    for t, x_t in enumerate(tf.split(inputs, seq_length, axis=0)):
        if t > 0:
            scope.reuse_variables()  # Reuse variables after the first time step

        # Define weight matrices for input-to-hidden, hidden-to-hidden, and hidden-to-output layers
        U = tf.compat.v1.get_variable("U", [vocab_size, hidden_size], initializer=initializer)
        W = tf.compat.v1.get_variable("W", [hidden_size, hidden_size], initializer=initializer)
        V = tf.compat.v1.get_variable("V", [hidden_size, vocab_size], initializer=initializer)

        # Define biases for the hidden and output layers
        bh = tf.compat.v1.get_variable("bh", [hidden_size], initializer=initializer)
        by = tf.compat.v1.get_variable("by", [vocab_size], initializer=initializer)

        # Calculate the new hidden state using the input x_t and the previous hidden state h_t
        h_t = tf.tanh(tf.matmul(x_t, U) + tf.matmul(h_t, W) + bh)

        # Calculate the output y_hat_t for the current time step
        y_hat_t = tf.matmul(h_t, V) + by
        y_hat.append(y_hat_t)  # Append the output to the y_hat list

# Apply softmax to the final output to get probabilities
output_softmax = tf.nn.softmax(y_hat[-1])
outputs = tf.concat(y_hat, axis=0)

# Compute the cross-entropy loss between the outputs and the targets
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=outputs))

# Store the final hidden state of the RNN, which can be used for predictions or further processing
hprev = h_t

In [54]:
# BPTT

# Initialize the Adam optimizer
minimizer = tf.compat.v1.train.AdamOptimizer()

# Compute the gradients of the loss with the Adam optimizer
gradients = minimizer.compute_gradients(loss)

# Set the threshold for gradient clipping
threshold = tf.constant(5.0, name="grad_clipping")

# Clip the gradients that exceed the threshold and bring them within the range
clipped_gradients = []
for grad, var in gradients:
    if grad is not None:  # Check if the gradient is not None
        clipped_grad = tf.clip_by_value(grad, -threshold, threshold)
        clipped_gradients.append((clipped_grad, var))

# Apply the clipped gradients to update the model's variables
updated_gradients = minimizer.apply_gradients(clipped_gradients)


<h5>Start generating songs

In [57]:
# Initialize TensorFlow session and variables
sess = tf.compat.v1.Session()
init = tf.compat.v1.global_variables_initializer()
sess.run(init)


# Dataset and pointer initialization
pointer = 0

# Define input and output data
input_sentence = data[pointer:pointer + seq_length]
print("Input Sentence: ",input_sentence)

output_sentence = data[pointer + 1:pointer + seq_length + 1]
print("Output Sentence: ",output_sentence)



sample_length = 500

for iteration in range(500001):
    if pointer + seq_length+1 >= len(data) or iteration == 0:
        hprev_val = np.zeros([1, hidden_size])
        pointer = 0
    
    input_sentence = data[pointer:pointer + seq_length]
    output_sentence = data[pointer + 1:pointer + seq_length + 1]

    # Convert sentences to indices
    input_indices = [char_to_ix[ch] for ch in input_sentence]
    target_indices = [char_to_ix[ch] for ch in output_sentence]

    # Convert indices to one-hot vectors
    input_vector = one_hot_encoder(input_indices)
    target_vector = one_hot_encoder(target_indices)
        
    hprev_val, loss_val, _ = sess.run(
        [hprev, loss, updated_gradients],
        feed_dict={
            inputs: input_vector,
            targets: target_vector,
            init_state: hprev_val
        }
    )

    # Generate song lyrics every 50,000 iterations
    if iteration % 50000 == 0:
        random_index = random.randint(0, len(data) - seq_length)
        sample_input_sent = data[random_index:random_index + seq_length]
        sample_input_indices = [char_to_ix[ch] for ch in sample_input_sent]
        sample_prev_state_val = np.copy(hprev_val)
        predicted_indices = []

        for t in range(sample_length):
            sample_input_vector = one_hot_encoder(sample_input_indices)
            probs_dist, sample_prev_state_val = sess.run(
                [output_softmax, hprev],
                feed_dict={
                    inputs: sample_input_vector,
                    init_state: sample_prev_state_val
                }
            )

            ix = np.random.choice(range(len(char_to_ix)), p=probs_dist.ravel())
            sample_input_indices = sample_input_indices[1:] + [ix]
            predicted_indices.append(ix)

        predicted_chars = [ix_to_char[ix] for ix in predicted_indices]
        text = ''.join(predicted_chars)
        print('\n')
        print(f'After {iteration} iterations')
        print('\n', text, '\n')
        print('-' * 115)

    pointer += seq_length
    iteration += 1

Input Sentence:  Look at her face, it's a 
Output Sentence:  ook at her face, it's a w


After 0 iterations

 "CtmYG.CCkY'j4GR9yXIrPicosoDp].5[8L jJa]Zex02xCTbVwawO26:po24BQn9mtnB  Zb5:[Job!u3tqNqaK?9LRUz!dL:Ey7gZO7lKZ -seurw Fcn)X!G(G!:kgpI(DyT-Q0p8Mw5pQjJO-w'TX1] Gz]d7sEr?O[tp"FBmX1lC'SkZu1j)Bd.2,EpE-4.1"o1y3G13(oBJCSgDn?aan]'AL(IyTeK,LyEKqnROM1-eK)R7xRl"Cc:CG:HIuh1eE!t4p1f?)kOF6z0g4l,owsliOQ:
'K
yDBKpn0uu8LJ7nu?8aXAzSL?eMOSVvUeD7-4x.WwK[op:MlnZPG5.3-BcHag?:48l0vAo4?LF0[65ao,Z,lk7[4EUD
Dw'Wzkx
JLwgX8)0 f2Pe (YUV9u65!-m-w(7O[pd(76!kSo[6.sZkT2n
8?Q'tWNwN:t6vBX'jHFKPw1pn-Lok(L7Tj3iboYwlV0)7Gqz3wtGDSw9rwV 

-------------------------------------------------------------------------------------------------------------------


After 50000 iterations

  heep, you have it and here  
Foold, me opdir  
Non't, dodn everying  
Canbe or  
[flling ohd  
[Thaigh... (Malh... HBadony-here what me it.. ,I red hear Brathe! ridal allice  
[.  
[Mrs.]. Trie]  
True prownathiow!!'Baigh.  
[Mnter  
[And elda