<a href="https://colab.research.google.com/github/bmwise14/NES_Salad/blob/main/240504_SampleModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import pandas as pd

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [None]:
# Upload the sp500.csv file
from google.colab import files
uploaded = files.upload()

Saving nes_titles.csv to nes_titles.csv


In [None]:
df = pd.read_csv("nes_titles.csv")
game_titles = list(df['Title'])

In [None]:
# Tokenize the game titles
word_to_index = {}
index_to_word = {}
for title in game_titles:
    for word in title.split():
        if word not in word_to_index:
            index = len(word_to_index)
            word_to_index[word] = index
            index_to_word[index] = word

# Convert titles to sequences of tokens
sequences = [[word_to_index[word] for word in title.split()] for title in game_titles]

# Pad sequences to ensure equal length
max_seq_length = max(len(seq) for seq in sequences)
padded_sequences = [torch.tensor(seq) for seq in sequences]
padded_sequences = pad_sequence(padded_sequences, batch_first=True)

In [None]:
len(word_to_index)

2710

# Pytorch

In [None]:
# Define dataset and dataloader
class TitlesDataset(Dataset):
    def __init__(self, sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx, :-1], self.sequences[idx, 1:]

dataset = TitlesDataset(padded_sequences)
dataloader = DataLoader(dataset, batch_size=6, shuffle=True)

In [None]:
# Define LSTM model architecture
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.lstm(embedded)
        output = self.fc(output)
        return output

In [None]:
# Define hyperparameters
vocab_size = len(word_to_index)
embedding_dim = 512
hidden_dim = 100
num_epochs = 100

In [None]:
# Initialize model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTMModel(vocab_size, embedding_dim, hidden_dim).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Train the model
for epoch in range(num_epochs):
    running_loss = 0.0
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.transpose(1, 2), targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss}")

Epoch 1/100, Loss: 390.63046169281006
Epoch 2/100, Loss: 244.49286526441574
Epoch 3/100, Loss: 214.48400661349297
Epoch 4/100, Loss: 187.66015687584877
Epoch 5/100, Loss: 161.30525560677052
Epoch 6/100, Loss: 135.68534918129444
Epoch 7/100, Loss: 111.12115539610386
Epoch 8/100, Loss: 89.05245089530945
Epoch 9/100, Loss: 69.68471380323172
Epoch 10/100, Loss: 54.260032491758466
Epoch 11/100, Loss: 43.05357524752617
Epoch 12/100, Loss: 35.280125848948956
Epoch 13/100, Loss: 30.034081391990185
Epoch 14/100, Loss: 26.143187245354056
Epoch 15/100, Loss: 23.141496011056006
Epoch 16/100, Loss: 20.867516248486936
Epoch 17/100, Loss: 18.987366657704115
Epoch 18/100, Loss: 17.55054520815611
Epoch 19/100, Loss: 16.20929110608995
Epoch 20/100, Loss: 15.263709285878576
Epoch 21/100, Loss: 14.430420184507966
Epoch 22/100, Loss: 13.83391175745055
Epoch 23/100, Loss: 13.332712145522237
Epoch 24/100, Loss: 12.859432470751926
Epoch 25/100, Loss: 12.468856894178316
Epoch 26/100, Loss: 12.147306252270937
E

In [None]:
# def generate_title(model, seed_titles, max_length):
#     with torch.no_grad():
#         for seed_title in seed_titles:
#             seed_sequence = [word_to_index[word] for word in seed_title.split()]
#             for _ in range(max_length):
#                 input_tensor = torch.tensor(seed_sequence).unsqueeze(0).to(device)
#                 output = model(input_tensor)
#                 predicted_token = torch.argmax(output[:, -1, :], dim=-1)
#                 seed_sequence.append(predicted_token.item())
#                 if predicted_token.item() == 0:
#                     break
#             generated_title = ' '.join([index_to_word[index] for index in seed_sequence])
#             print("Generated Title:", generated_title)

def generate_title(model, seed_titles, max_length, top_n=5):
    with torch.no_grad():
        for seed_title in seed_titles:
            seed_sequence = [word_to_index[word] for word in seed_title.split()]
            for _ in range(max_length):
                input_tensor = torch.tensor(seed_sequence).unsqueeze(0).to(device)
                output = model(input_tensor)
                last_token_probs = output[:, -1, :]
                top_n_probs, top_n_indices = torch.topk(last_token_probs, top_n)
                sampled_index = np.random.choice(top_n_indices.squeeze().cpu().numpy())
                seed_sequence.append(sampled_index.item())
                if sampled_index.item() == 0:
                    break
            generated_title = ' '.join([index_to_word[index] for index in seed_sequence])
            print("Generated Title:", generated_title)


In [None]:
# Seed titles for generation
seed_titles = ["Adventures", "Die", "Boy", 'Tom', "Zelda", 'Mermaid', 'Jerry', 'Dragon']

# Generate title variations
generate_title(model, seed_titles, max_length=8)

Generated Title: Adventures 2: The Magic Kingdom Off 2: Ninja '89
Generated Title: Die Shanghai II II: The Demon Darkness '92 Red
Generated Title: Boy and 5 Nin Uchi 2 + '89
Generated Title: Tom Ball Z '89
Generated Title: Zelda Adventure Island 3 Takahashi Igo '89
Generated Title: Mermaid Kid Kool Nekketsu Kimi 2: The New Jump
Generated Title: Jerry 2 Dennou Kyuusei no DaiboukenJP II '89
Generated Title: Dragon Buster '89
