In [26]:

import numpy as np 
import pandas as pd 

In [None]:
import tensorflow as tf

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU is enabled.")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU found. Using CPU.")


Num GPUs Available:  1
GPU is enabled.


In [28]:
!pip install langdetect



In [29]:
df = pd.read_csv('song_lyrics.csv', usecols=['lyrics'], nrows=50)

import langdetect

def is_english(text):
    try:
        return langdetect.detect(text) == 'en'
    except:
        return False

df = df[df['lyrics'].apply(is_english)]
df = df.dropna()

In [30]:
import re

def clean_text(text):
    text = str(text).lower()
    text = text.replace('\n', ' ')
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'[,\.!?()]', '', text)
    text = re.sub(r'\w*\d\w*',' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    text = re.sub(r'\s+', ' ', text).strip()
    text = re.sub(r'[^a-z0-9\s\n\']', '', text)
    return text

df['lyrics'] = df['lyrics'].apply(clean_text)
df = df[df['lyrics'].str.strip().astype(bool)]  

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(df['lyrics'])

import pickle
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

sequences = tokenizer.texts_to_sequences(df['lyrics'])

sequences = [seq for seq in sequences if len(seq) > 5]

input_sequences = []
for seq in sequences:
    for i in range(1, len(seq)):
        n_gram_seq = seq[:i+1]
        input_sequences.append(n_gram_seq)

max_seq_len = max([len(x) for x in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_seq_len, padding='pre')


In [32]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, input_length=max_seq_len - 1))
model.add(LSTM(128))
model.add(Dense(len(tokenizer.word_index) + 1, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])




In [None]:
import numpy as np
from tensorflow.keras.utils import to_categorical

input_sequences = np.array(input_sequences)
X = input_sequences[:, :-1]
y = to_categorical(input_sequences[:, -1], num_classes=len(tokenizer.word_index) + 1)

In [34]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=max_seq_len - 1))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dense(len(tokenizer.word_index) + 1, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [35]:
model.fit(X, y, epochs=10, batch_size=64)
model.save("song_generator_v1.h5")

Epoch 1/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 154ms/step - accuracy: 0.0466 - loss: 6.5917
Epoch 2/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 153ms/step - accuracy: 0.0479 - loss: 5.9419
Epoch 3/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 153ms/step - accuracy: 0.0572 - loss: 5.7470
Epoch 4/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 154ms/step - accuracy: 0.0646 - loss: 5.5334
Epoch 5/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 154ms/step - accuracy: 0.0739 - loss: 5.4024
Epoch 6/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 155ms/step - accuracy: 0.0893 - loss: 5.2436
Epoch 7/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 154ms/step - accuracy: 0.1057 - loss: 5.0444
Epoch 8/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 153ms/step - accuracy: 0.1305 - loss: 4.8287
Epoch 9/10
[1m1



In [37]:
def generate_song_line(seed_text, next_words=50):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_seq_len - 1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        output_word = tokenizer.index_word[np.argmax(predicted)]
        if output_word is None:
            break
        seed_text += " " + output_word
    return seed_text

generate_song_line = generate_song_line("I love the way you", next_words=10)
print(generate_song_line)

I love the way you i be what you got me i i be you
