<a href="https://colab.research.google.com/github/bhopchi/-Nusrat_Fateh_Ali_Khan-s_lyrics-Text_Generation_with_an_RNN_LSTM/blob/main/Nusrat_Fateh_Ali_Khan's_lyrics%2C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing necessary libraries
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import os


In [None]:
# Load and preprocess the dataset
def load_dataset(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

In [None]:
# Replace with the path of your Nusrat Fateh Ali Khan dataset
data_path = '/content/NFK lyrics text.txt'  # UPDATED: File containing lyrics
text = load_dataset(data_path)

In [None]:
# Preprocessing - Tokenizing the text
tokenizer = Tokenizer(char_level=True, lower=False, oov_token='<OOV>') # Set lower=False and oov_token
tokenizer.fit_on_texts(text)

total_chars = len(tokenizer.word_index) + 1
print(f'Total unique characters: {total_chars}')

Total unique characters: 86


In [None]:
# Prepare the dataset for training
input_seq_length = 40  # The length of input sequences
sequences = []
next_chars = []

for i in range(0, len(text) - input_seq_length):
    sequences.append(text[i:i + input_seq_length])
    next_chars.append(text[i + input_seq_length])

print(f'Total sequences generated: {len(sequences)}')

# Use bool instead of np.bool
X = np.zeros((len(sequences), input_seq_length, total_chars), dtype=bool)
y = np.zeros((len(sequences), total_chars), dtype=bool)

for i, sequence in enumerate(sequences):
    for t, char in enumerate(sequence):
        X[i, t, tokenizer.word_index[char] - 1] = 1
    y[i, tokenizer.word_index[next_chars[i]] - 1] = 1

Total sequences generated: 40121


In [None]:
# Model - RNN LSTM
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(128, input_shape=(input_seq_length, total_chars)),
    tf.keras.layers.Dense(total_chars, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy')
model.summary()

  super().__init__(**kwargs)


In [None]:
# Training the model
model.fit(X, y, batch_size=128, epochs=30)  # Hyperparameters may need tuning based on dataset

Epoch 1/30
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 132ms/step - loss: 3.3571
Epoch 2/30
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 124ms/step - loss: 2.8715
Epoch 3/30
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 124ms/step - loss: 2.5504
Epoch 4/30
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 123ms/step - loss: 2.4056
Epoch 5/30
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 125ms/step - loss: 2.2972
Epoch 6/30
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 122ms/step - loss: 2.2209
Epoch 7/30
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 124ms/step - loss: 2.1698
Epoch 8/30
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 125ms/step - loss: 2.1125
Epoch 9/30
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 129ms/step - loss: 2.0929
Epoch 10/30
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

<keras.src.callbacks.history.History at 0x7b07ab0959f0>

In [None]:
# Text Generation Function
def generate_text(model, tokenizer, seed_text, length=400):
    generated = seed_text
    for i in range(length):
        sampled = np.zeros((1, input_seq_length, total_chars))
        for t, char in enumerate(seed_text):
            sampled[0, t, tokenizer.word_index[char] - 1] = 1
        predictions = model.predict(sampled, verbose=0)[0]
        next_index = np.argmax(predictions)
        next_char = tokenizer.index_word[next_index + 1]

        seed_text = seed_text[1:] + next_char
        generated += next_char
    return generated

In [None]:
# Example - Generate text based on Nusrat Fateh Ali Khan dataset
seed_text = " You are life of love "  # UPDATED: Suitable seed text for dataset
print(generate_text(model, tokenizer, seed_text))

 You are life of love                                                                                                                                                                                                                                                                                                                                                                                                                 


In [None]:
def save_generated_text(generated_text, filename="generated_text.txt"):
    """Saves the generated text to a file.

    Args:
        generated_text: The text generated by the model.
        filename: The name of the file to save the text to.
    """
    with open(filename, "w", encoding="utf-8") as file:
        file.write(generated_text)

# After generating the text:
generated_text = generate_text(model, tokenizer, seed_text)
save_generated_text(generated_text)