**Importing Tensorflow**

In [1]:
import tensorflow as tf

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

import numpy as np

2023-02-12 23:36:58.996566: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-12 23:36:59.263148: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-02-12 23:36:59.263171: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-02-12 23:37:00.105753: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

**Loading data and creating tokens**

In [2]:
tokenizer = Tokenizer()

data = open('dataset.txt').read()

corpus = data.lower().split("\n")

tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

print(tokenizer.word_index)
print(total_words)

3407


**Preparing data for training**

Generating N-Grams

In [3]:
input_sequences = []

for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

Padding Sequences

In [4]:
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

Creating predictors and label

In [5]:
xs, labels = input_sequences[:,:-1], input_sequences[:,-1]

ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

In [6]:
print(tokenizer.word_index['love'])
print(tokenizer.word_index['darling'])
print(tokenizer.word_index['baby'])
print(tokenizer.word_index['together'])

17
218
80
439


In [7]:
print(xs[17])

[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0  465 1032   16   50   51]


In [8]:
print(ys[17])

[0. 0. 0. ... 0. 0. 0.]


**Creating the model**

In [47]:
model = Sequential()
model.add(Embedding(total_words, 200, input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(512, return_sequences=True)))
model.add(LSTM(256))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


**Model Summary**

In [48]:
print(model.summary())

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_7 (Embedding)     (None, 24, 200)           681400    
                                                                 
 bidirectional_7 (Bidirectio  (None, 24, 1024)         2920448   
 nal)                                                            
                                                                 
 lstm_14 (LSTM)              (None, 256)               1311744   
                                                                 
 dense_5 (Dense)             (None, 3407)              875599    
                                                                 
Total params: 5,789,191
Trainable params: 5,789,191
Non-trainable params: 0
_________________________________________________________________
None


**Training the model**

In [45]:
import os

if 'singer' not in os.listdir():
    history = model.fit(xs, ys, epochs=100, verbose=1)
    model.save('singer')
else:
    model = tf.keras.models.load_model('singer')

**Writing songs**

In [46]:
seed_text = "I used to love her but"
next_words = 500
print(seed_text, end=' ')
for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
    predicted = np.argmax(model.predict(token_list, verbose=0), axis=1)
    output_word=""
    for word, index in tokenizer.word_index.items():
        if index == predicted:
            output_word = word
            break
    seed_text += " " + output_word
    print(output_word, end=' ')

# print(seed_text)

I used to love her but then i do it for angels to fly to fly to fly ever do it for free the dark lays off street and lace untied make the road street and their seats to fly to fly to fly to the motherland and joy be flooded in the oceans isn't up street and starts to fly to the motherland and starts to move along in a team cd for neck that e'er that's with your shape of eyes mind now that i wake up i think of goodbye to you pay around set sail the motherland and joy be with a piano home into hide and joy be with a thousand kings for two old country lanes then you’re doing oh oh one to you hear you bring to you pay till the sunlight cracks love like a thousand kings i'd want for the living this ground and more than true me over the n i got living right back the party's time street love to come follow me tight now i'm doing away to you bring that i think of love you wanna know about your fear you could get asleep tonight i mean it on him smaller in the shape of a thousand kings and their