<a href="https://colab.research.google.com/github/efekaanefe/Rapper-AI/blob/main/rapper_ai.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from torch.autograd import Variable
import torch.nn.functional as F


In [2]:
with open('eminem-lyrics.txt', 'r') as f:
    text = f.read()

words = text.split()

len(words)

102390

## Word level inputs and targets

In [6]:
words = text.split()
unique_words = sorted(list(set(words)))
vocab_size = len(unique_words)
print(f'Unique words/vocabulary size: {vocab_size}')

# Create mappings from words to indices and vice versa
word_to_idx = {word: idx for idx, word in enumerate(unique_words)}
idx_to_word = {idx: word for idx, word in enumerate(unique_words)}

# Encode the text using word-level vocabulary
encoded_text = np.array([word_to_idx[word] for word in words])

seq_length = 20  # Adjust sequence length as needed for words
num_samples = len(encoded_text) // seq_length

input_sequences = []
target_sequences = []

for i in range(num_samples):
    start_idx = i * seq_length
    end_idx = start_idx + seq_length

    input_seq = encoded_text[start_idx:end_idx]
    target_seq = encoded_text[start_idx + 1:end_idx + 1]

    # Pad if the sequence is shorter than seq_length
    if len(input_seq) < seq_length:
        input_seq = np.pad(input_seq, (0, seq_length - len(input_seq)), 'constant', constant_values=0)  # Pad with zeros
    if len(target_seq) < seq_length:
        target_seq = np.pad(target_seq, (0, seq_length - len(target_seq)), 'constant', constant_values=0)  # Pad with zeros

    input_sequences.append(input_seq)
    target_sequences.append(target_seq)

input_sequences = torch.tensor(np.array(input_sequences), dtype=torch.long)
target_sequences = torch.tensor(np.array(target_sequences), dtype=torch.long)

print(input_sequences.shape, target_sequences.shape, encoded_text.shape)

print(f"1st input sequence: \n{input_sequences[0]}", end="\n\n")
print("Input sequence as words:")
print(' '.join([idx_to_word[int(i)] for i in input_sequences[0]]), end="\n\n")
print("Target sequence as words:")
print(' '.join([idx_to_word[int(i)] for i in target_sequences[0]]))


Unique words/vocabulary size: 14734
torch.Size([5119, 20]) torch.Size([5119, 20]) (102390,)
1st input sequence: 
tensor([ 2823, 14612, 13434,  8898,  1632,  4696,  4703, 13907,  8759, 13351,
        10033,  4591,  2841, 13521,  7622, 14678, 10020,  7759,  9871, 14215])

Input sequence as words:
Oh yeah, this is Eminem baby, back up in that motherfucking ass One time for your mother fucking mind, we

Target sequence as words:
yeah, this is Eminem baby, back up in that motherfucking ass One time for your mother fucking mind, we represent


## Character level inputs and targets

In [None]:
# Create a character-level vocabulary
chars = sorted(list(set(text)))
char_size = len(chars)
print(f'Unique characters size: {char_size}')

# Create mappings from characters to indices and vice versa
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for idx, char in enumerate(chars)}

encoded_text = np.array([char_to_idx[char] for char in text])

seq_length = 100 # Sequence length
num_samples = len(encoded_text) // seq_length

input_sequences = []
target_sequences = []

for i in range(num_samples):
    start_idx = i * seq_length
    end_idx = start_idx + seq_length
    input_sequences.append(encoded_text[start_idx:end_idx])
    target_sequences.append(encoded_text[start_idx + 1:end_idx + 1])

input_sequences = torch.tensor(np.array(input_sequences), dtype=torch.long)
target_sequences = torch.tensor(np.array(target_sequences), dtype=torch.long)

print(input_sequences.shape, target_sequences.shape, encoded_text.shape)

print(f"1st input sequence: \n{input_sequences[0]}", end="\n\n")
print("Input sequence as words:")
print(''.join([idx_to_char[int(i)] for i in input_sequences[0]]), end="\n\n")
print("Target sequence as words:")
print(''.join([idx_to_char[int(i)] for i in target_sequences[0]]))


Unique characters size: 95
torch.Size([5225, 100]) torch.Size([5225, 100]) (522527,)
1st input sequence: 
tensor([44, 66,  1, 83, 63, 59, 66, 13,  1, 78, 66, 67, 77,  1, 67, 77,  1, 34,
        71, 67, 72, 63, 71,  1, 60, 59, 60, 83, 13,  1, 60, 59, 61, 69,  1, 79,
        74,  1, 67, 72,  1, 78, 66, 59, 78,  1, 71, 73, 78, 66, 63, 76, 64, 79,
        61, 69, 67, 72, 65,  1, 59, 77, 77,  0, 44, 72, 63,  1, 78, 67, 71, 63,
         1, 64, 73, 76,  1, 83, 73, 79, 76,  1, 71, 73, 78, 66, 63, 76,  1, 64,
        79, 61, 69, 67, 72, 65,  1, 71, 67, 72])

Input sequence as words:
Oh yeah, this is Eminem baby, back up in that motherfucking ass
One time for your mother fucking min

Target sequence as words:
h yeah, this is Eminem baby, back up in that motherfucking ass
One time for your mother fucking mind


## Model

In [4]:
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out.reshape(out.size(0) * out.size(1), out.size(2)))
        return out, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        return (weight.new_zeros(num_layers, batch_size, hidden_size),
                weight.new_zeros(num_layers, batch_size, hidden_size))


## Training

In [7]:
# Hyperparameters
embed_size = 512*4
hidden_size = 256
num_layers = 3
num_epochs = 500
learning_rate = 0.001
batch_size = 64*4

model = LSTMModel(vocab_size, embed_size, hidden_size, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [8]:
# Training loop
for epoch in range(num_epochs):
    hidden = model.init_hidden(batch_size)
    total_loss = 0

    for i in range(0, input_sequences.size(0) - batch_size, batch_size):
        inputs = input_sequences[i:i+batch_size]
        targets = target_sequences[i:i+batch_size]

        hidden = tuple([h.detach() for h in hidden])

        # Forward
        outputs, hidden = model(inputs, hidden)
        loss = criterion(outputs, targets.view(-1))

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / (input_sequences.size(0) // batch_size)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')


Epoch [1/500], Loss: 8.3558
Epoch [2/500], Loss: 7.3229
Epoch [3/500], Loss: 7.2493
Epoch [4/500], Loss: 7.2262
Epoch [5/500], Loss: 7.2220
Epoch [6/500], Loss: 7.2203
Epoch [7/500], Loss: 7.2199
Epoch [8/500], Loss: 7.2191
Epoch [9/500], Loss: 7.2186
Epoch [10/500], Loss: 7.2182
Epoch [11/500], Loss: 7.2190
Epoch [12/500], Loss: 7.2175
Epoch [13/500], Loss: 7.2170
Epoch [14/500], Loss: 7.2166
Epoch [15/500], Loss: 7.2165
Epoch [16/500], Loss: 7.2159
Epoch [17/500], Loss: 7.2150
Epoch [18/500], Loss: 7.2131
Epoch [19/500], Loss: 7.2101
Epoch [20/500], Loss: 7.2035
Epoch [21/500], Loss: 7.1939
Epoch [22/500], Loss: 7.1779
Epoch [23/500], Loss: 7.1505
Epoch [24/500], Loss: 7.1157
Epoch [25/500], Loss: 7.0787
Epoch [26/500], Loss: 7.0426
Epoch [27/500], Loss: 7.0183
Epoch [28/500], Loss: 6.9964
Epoch [29/500], Loss: 6.9724
Epoch [30/500], Loss: 6.9305
Epoch [31/500], Loss: 6.8873
Epoch [32/500], Loss: 6.8502
Epoch [33/500], Loss: 6.8149
Epoch [34/500], Loss: 6.7969
Epoch [35/500], Loss: 6

### Word level predicition

In [28]:
# save the model
torch.save(model.state_dict(), 'model.pth')

# load the model
model = LSTMModel(vocab_size, embed_size, hidden_size, num_layers)
model.load_state_dict(torch.load('model.pth'))


In [30]:
# @title Default title text
def generate_text_word_level(model, start_str, word_to_idx, idx_to_word, num_generate=50, temperature=1.0):
    model.eval()

    # Tokenize the start string
    start_tokens = start_str.split()
    input_eval = torch.tensor([word_to_idx[word] for word in start_tokens], dtype=torch.long).unsqueeze(0)
    hidden = model.init_hidden(1)  # Reset the hidden state

    generated_text = start_tokens[:]  # Initialize with the start tokens

    with torch.no_grad():
        for _ in range(num_generate):
            output, hidden = model(input_eval, hidden)

            # Apply temperature and get the predicted word index
            output = output / temperature
            predicted_idx = torch.multinomial(torch.softmax(output[-1], dim=0), num_samples=1).item()

            # Update input for the next iteration
            input_eval = torch.tensor([[predicted_idx]], dtype=torch.long)

            # Add the predicted word to the generated text
            generated_text.append(idx_to_word[predicted_idx])

    return ' '.join(generated_text)  # Join the words back into a string

start_str = "Oh yeah, this is Eminem baby"
generated_text = generate_text_word_level(model, start_str, word_to_idx, idx_to_word, num_generate=50)
print(generated_text)

Oh yeah, this is Eminem baby 'Cause there daddy's shot that shit up around I'll take no picture now his hands But your hands in the rock and sit is back to the damn bit down, it back for my ass, if it's over, we know on this Earth And shoot no man over steam Sit


### Character level predicition


In [None]:
def generate_text_char_level(model, start_str, char_to_idx, idx_to_char, num_generate=100, temperature=1.0):
    model.eval()
    input_eval = torch.tensor([char_to_idx[c] for c in start_str], dtype=torch.long).unsqueeze(0)
    hidden = model.init_hidden(1)

    generated_text = start_str

    with torch.no_grad():
        for _ in range(num_generate):
            output, hidden = model(input_eval, hidden)
            output = output / temperature
            predicted_idx = torch.multinomial(torch.softmax(output[-1], dim=0), num_samples=1).item()
            input_eval = torch.tensor([[predicted_idx]], dtype=torch.long)
            generated_text += idx_to_char[predicted_idx]

    return generated_text

start_str = "I am "
generated_text = generate_text_char_level(model, start_str, char_to_idx, idx_to_char, num_generate=300)
print(generated_text)
