### Setup

In [1]:
import pandas as pd
import numpy as np
import re
import random
from tensorflow import keras
from keras import layers
from keras import optimizers

#### Importing and Cleaning Data

In [2]:
songs = pd.read_csv("doj_songs.csv")

In [3]:
# Tokenize Data

# merge all characters into one string
text = ""
clean = ""
for line in songs["lyrics"]:
    text = text + str(line).lower()
    clean = clean + " ".join(re.findall(r"[a-z']+", text))

# find all unique characters
tokens = re.findall(r"[a-z'\s]", clean)

In [4]:
# Define the Alphabet

characters = sorted(list(set(tokens)))
len(characters)
# 28 unique characters

# dictionary for character-to-index mapping
char_to_index = dict((char, index) for index, char in enumerate(characters))

# dictionary for index-to-character mapping
index_to_char = dict((index, char) for index, char in enumerate(characters))

In [5]:
len(characters)

28

In [6]:
# Create Training Sequences

# chunk the text into sequences
maxlen = 20  # n
step = 1  # length of step at each iteration

# list of sequences
sequences = []

# list of next characters model should predict
next_characters = []

# iterate over cleaned text string and each 20-length sequence into list
for i in range(0, len(clean) - maxlen, step):
    sequences.append(clean[i : (i + maxlen)])
    next_characters.append(clean[i + maxlen])

In [7]:
# Label Encode Training Sequences (one-hot encoding)

# create empty matrices for input and output sets
# input: each n-length sequence in sequences list
# output: next character after each n-length sequence
# i.e.: sentence = "hello there"
#       sequence = "hel"
#       next char = "l"

x = np.zeros((len(sequences), maxlen, len(characters)), dtype=np.bool_)  # input
y = np.zeros((len(sequences), len(characters)), dtype=np.bool_)  # output

for i, chunk in enumerate(sequences):
    for j, c in enumerate(chunk):
        x[i, j, char_to_index[c]] = 1
    y[i, char_to_index[next_characters[i]]] = 1

### Build the Model
A single LSTM

In [8]:
model = keras.Sequential(
    [
        keras.Input(shape=(maxlen, len(characters))),
        layers.LSTM(128),
        layers.Dense(len(characters), activation="softmax"),
    ]
)

2021-12-12 01:15:27.598922: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)

In [10]:
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 128)               80384     
                                                                 
 dense (Dense)               (None, 28)                3612      
                                                                 
Total params: 83,996
Trainable params: 83,996
Non-trainable params: 0
_________________________________________________________________


### Prepare the Text Sampling Function

In [12]:
# Function to sample an index from a probability array

def sample(predictions, temperature=1.0):
    predictions = np.asarray(predictions).astype("float64")
    predictions = np.log(predictions) / temperature
    exp_predictions = np.exp(predictions)
    predictions = exp_predictions / np.sum(exp_predictions)
    probabilities = np.random.multinomial(1, predictions, 1)
    return np.argmax(probabilities)

### Train the Model

In [13]:
epochs = 4
size = 128

In [14]:
for epoch in range(epochs):

    model.fit(x, y, batch_size = size, epochs=1)
    print()
    print("Generating text after epoch: %d" %epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)

    for diversity in [0.5, 1.0, 1.2]:
        print("Diversity: ", diversity)

        generated = ""
        sentence = clean[start_index:start_index + maxlen]
        print("Generating with seed: '" + sentence + "'")

        for i in range(400):
            x_predict = np.zeros((1, maxlen, len(characters)))
            
            for t, char in enumerate(sentence):
                x_predict[0, t, char_to_index[char]] = 1.0
            predictions = model.predict(x_predict, verbose = 0)[0]
            next_index = sample(predictions, diversity)
            next_char = index_to_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        print("Generated: ", generated)
        print()


Generating text after epoch: 0
Diversity:  0.5
Generating with seed: 'if she won't go dijo'
Generated:   lean and your hands down woman woman woman ayy i can be your woman woman woman woman ayy i can get believe i got a man but i want a man to stand and fight stand and fight for meging i have it ain't got your ganester ass me ath no got a h baby i how ain 't coldow ha om ain't gotta ally want me a to you boy i 'll show you yeah baby let me watch you go to town it's your one chaine a manted get into 

Diversity:  1.0
Generating with seed: 'if she won't go dijo'
Generated:  n do it like that saids from the nigga don't clean weth your deesed can give speak in the body but when the best come thatiggin' like this a diffim like vikendes something baby i wouldn't lie wiah wancust'  it's one me how love cknougd jealoe the tis with mydan when he tait ain't boss full my drank like the bed son 't bandes cars and lett the move you like that i want that one till we hew to prout

Diversity:  1.2
Ge

  predictions = np.log(predictions) / temperature


Generated:  ld bring them hoes and bros with us and baby you can roll roll with us yeah roll roll with us and baby you can roll roll with us and baby you can roll roll with us and baby you can roll maybe you can roll roll with us yeah roll roll with us and baby you can roll roll with us and baby you can roll maybe you can roll roll with us yeah roll roll with us and baby you can roll maybe youg lost me like y

Diversity:  1.0
Generating with seed: 'now bitches you shou'
Generated:  ld bring them hoes and bros with us all dodin ' what for egco opand in to to yourprerse that's just like that selfin' like i i i my trick my bottom bitch don't gin turn my lusbrsy with you ayy all shit crock like that it addice night with hure should be let we freak awon't toke niggas your pats now bitch ibcives one know i goun raoor goodnieg needer something bitches i'ma dirt eath she ain't got i

Diversity:  1.2
Generating with seed: 'now bitches you shou'
Generated:  ldn speed so grinat baby up and in pri