<a href="https://colab.research.google.com/github/chloe-wong/projects/blob/main/pokemon_name_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
import requests

device = "cuda:0" if torch.cuda.is_available() else "cpu"
print (device)

cuda:0


In [3]:
#getting data

result = requests.get("https://pokeapi.co/api/v2/pokemon/?limit=2000").json()
keys = result['results']
nums = result['count']
names = [keys[x]['name'] for x in range(nums)]
print(names)

['bulbasaur', 'ivysaur', 'venusaur', 'charmander', 'charmeleon', 'charizard', 'squirtle', 'wartortle', 'blastoise', 'caterpie', 'metapod', 'butterfree', 'weedle', 'kakuna', 'beedrill', 'pidgey', 'pidgeotto', 'pidgeot', 'rattata', 'raticate', 'spearow', 'fearow', 'ekans', 'arbok', 'pikachu', 'raichu', 'sandshrew', 'sandslash', 'nidoran-f', 'nidorina', 'nidoqueen', 'nidoran-m', 'nidorino', 'nidoking', 'clefairy', 'clefable', 'vulpix', 'ninetales', 'jigglypuff', 'wigglytuff', 'zubat', 'golbat', 'oddish', 'gloom', 'vileplume', 'paras', 'parasect', 'venonat', 'venomoth', 'diglett', 'dugtrio', 'meowth', 'persian', 'psyduck', 'golduck', 'mankey', 'primeape', 'growlithe', 'arcanine', 'poliwag', 'poliwhirl', 'poliwrath', 'abra', 'kadabra', 'alakazam', 'machop', 'machoke', 'machamp', 'bellsprout', 'weepinbell', 'victreebel', 'tentacool', 'tentacruel', 'geodude', 'graveler', 'golem', 'ponyta', 'rapidash', 'slowpoke', 'slowbro', 'magnemite', 'magneton', 'farfetchd', 'doduo', 'dodrio', 'seel', 'dew

In [None]:
#unique characters and count 
def split(item): 
  return [char for char in item]
chars = []
for item in names:
  chars += split(item) 
chars.append(' ')
chars = set(chars)
vocab_size = len(chars)
data_size = sum([len(item) for item in names])

print ('Unique characters:', chars)
print ('Length of Unique characters:', vocab_size)
print ('Number of characters in data:', data_size)

Unique characters: {'e', 'f', 'a', 'd', 'k', 'r', 'w', 'p', 's', 'g', 'n', 'l', '2', '0', 'i', 't', 'o', '1', 'y', 'z', '-', 'x', 'u', 'b', ' ', 'j', 'm', 'v', 'q', '5', 'c', 'h'}
Length of Unique characters: 32
Number of characters in data: 10590


In [None]:
char2id = {ch:i for i, ch in enumerate(chars)}
id2char = {i:ch for i, ch in enumerate(chars)}

print ('Characters to id\n')
print (char2id)

Characters to id

{'e': 0, 'f': 1, 'a': 2, 'd': 3, 'k': 4, 'r': 5, 'w': 6, 'p': 7, 's': 8, 'g': 9, 'n': 10, 'l': 11, '2': 12, '0': 13, 'i': 14, 't': 15, 'o': 16, '1': 17, 'y': 18, 'z': 19, '-': 20, 'x': 21, 'u': 22, 'b': 23, ' ': 24, 'j': 25, 'm': 26, 'v': 27, 'q': 28, '5': 29, 'c': 30, 'h': 31}


In [None]:
#standardise length of each "word"
longest = 26
augmented = [(split(item) + [' ']*(26 - len(item))) for item in names]
words = []
nextchars = []

for item in augmented: 
  temp = [char2id[ch] for ch in item]
  temp2 = temp[1:]
  temp2.append(30)
  words.append(temp)
  nextchars.append(temp2)

print(words[0])
print(nextchars[0])

[23, 22, 11, 23, 2, 8, 2, 22, 5, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24]
[22, 11, 23, 2, 8, 2, 22, 5, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 30]


In [None]:
def to_categorical(y, num_classes):
    """ 1-hot encodes a tensor """
    return np.eye(num_classes, dtype='float32')[y]

In [None]:
X = np.zeros((len(words), 26, vocab_size), dtype=np.float32)
one_hot = [to_categorical(c, num_classes=vocab_size) for i in range(len(words)) for c in words[i]]
X = np.array(one_hot).reshape(X.shape)
print (X[0], X.shape)

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 0. 

In [None]:
Y = np.zeros((len(nextchars), 26), dtype=np.float32)
Y = np.array(nextchars).reshape(Y.shape)
print (Y[0], Y.shape)

[22 11 23  2  8  2 22  5 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24
 24 30] (1154, 26)


In [None]:
train_x, val_x, train_y, val_y = train_test_split(X, Y, test_size=0.05)
print ('Training:', train_x.shape, train_y.shape)
print ('Validation:', val_x.shape, val_y.shape)

Training: (1096, 26, 32) (1096, 26)
Validation: (58, 26, 32) (58, 26)


In [None]:
class CharRNN(nn.Module):
  
    def __init__(self, batch_size=128, n_hidden=512, n_layers=2, drop_prob=0.5):
      
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
      
        self.dropout = nn.Dropout(drop_prob)
        self.lstm = nn.LSTM(vocab_size, n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)
        self.fc = nn.Linear(n_hidden, vocab_size)
        
        self.init_weights()

    def forward(self, x, hc):
        ''' Forward pass through the network '''
        
        x, (h, c) = self.lstm(x, hc)
        x = self.dropout(x)
        
        # Stack up LSTM outputs
        x = x.reshape(x.size()[0]*x.size()[1], self.n_hidden)
        
        x = self.fc(x)
        
        return x, (h, c)

    def predict(self, char, h=None, top_k=None):
        ''' Given a character, predict the next character.
        
            Returns the predicted character and the hidden state.
        '''
        
        if h is None:
            h = self.init_hidden(1)
        
        x = np.array([[char2id[char]]])
        x = to_categorical(x, num_classes=vocab_size)
        
        with torch.no_grad():
            h = tuple([Variable(each.data) for each in h])
            inputs = Variable(torch.from_numpy(x))
        inputs = inputs.to(device)
        
        out, h = self.forward(inputs, h)
        p = F.softmax(out, dim=1).data
        p = p.to(device)
        
        if top_k is None:
            top_ch = np.arange(vocab_size)
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.cpu().numpy().squeeze()

        p = p.cpu().numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
            
        return id2char[char], h
    
    def init_weights(self):
        ''' Initialize weights for fully connected layer '''
      
        # Set bias tensor to all zeros
        self.fc.bias.data.fill_(0)
        # FC weights as random uniform
        self.fc.weight.data.uniform_(-1, 1)
        
    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x n_seqs x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        return (Variable(weight.new(self.n_layers, batch_size, self.n_hidden).zero_()),
                Variable(weight.new(self.n_layers, batch_size, self.n_hidden).zero_()))
    

In [None]:
net = CharRNN(n_hidden=512, n_layers=2)
net.to(device)

CharRNN(
  (dropout): Dropout(p=0.5, inplace=False)
  (lstm): LSTM(32, 512, num_layers=2, batch_first=True, dropout=0.5)
  (fc): Linear(in_features=512, out_features=32, bias=True)
)

In [None]:
opt = torch.optim.Adam(net.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
def train(net, train_x, train_y, val_x, val_y, opt, criterion, epochs=25, batch_size=128, maxlen=26, clip=5, print_every=10):
    
    net.train()
    counter = 0
    
    for e in range(epochs):
        
        h = net.init_hidden(batch_size)
        
        for k in range(0, train_x.shape[0]-batch_size, batch_size):
            
            counter += 1
            # batch data and convert to torch tensors
            x, y = train_x[k : k+batch_size], train_y[k : k+batch_size]
            x, y = torch.from_numpy(x), torch.from_numpy(y)
            
            
            inputs, targets = Variable(x), Variable(y)
            inputs, targets = inputs.to(device), targets.to(device)
            #print (counter, inputs.shape, targets.shape)
            
            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([Variable(each.data) for each in h])

            net.zero_grad()
            output, h = net.forward(inputs, h)
            loss = criterion(output, targets.view(batch_size*maxlen))
            loss.backward()
            
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)

            opt.step()
            
            if counter % print_every == 0:
                
                # Get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                
                for k in range(0, val_x.shape[0]-batch_size, batch_size):
                  
                    # One-hot encode our data and make them Torch tensors
                    x, y = val_x[k : k+batch_size], val_y[k : k+batch_size]
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
            
                    
                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    with torch.no_grad():
                        val_h = tuple([Variable(each.data) for each in val_h])
                        inputs, targets = Variable(x), Variable(y)
                    
                    inputs, targets = inputs.to(device), targets.to(device)

                    output, val_h = net.forward(inputs, val_h)
                    val_loss = criterion(output, targets.view(batch_size*maxlen))
                
                    val_losses.append(val_loss.item())
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [None]:
train(net, train_x, train_y, val_x, val_y, opt=opt, criterion=criterion, epochs=25,
      batch_size=128, maxlen=26, clip=5, print_every=10)

Epoch: 2/25... Step: 10... Loss: 1.6169... Val Loss: nan
Epoch: 3/25... Step: 20... Loss: 1.3799... Val Loss: nan
Epoch: 4/25... Step: 30... Loss: 1.2300... Val Loss: nan
Epoch: 5/25... Step: 40... Loss: 1.3372... Val Loss: nan
Epoch: 7/25... Step: 50... Loss: 1.3525... Val Loss: nan
Epoch: 8/25... Step: 60... Loss: 1.2412... Val Loss: nan
Epoch: 9/25... Step: 70... Loss: 1.1364... Val Loss: nan
Epoch: 10/25... Step: 80... Loss: 1.1500... Val Loss: nan
Epoch: 12/25... Step: 90... Loss: 1.3410... Val Loss: nan
Epoch: 13/25... Step: 100... Loss: 1.1709... Val Loss: nan
Epoch: 14/25... Step: 110... Loss: 1.0779... Val Loss: nan
Epoch: 15/25... Step: 120... Loss: 1.0857... Val Loss: nan
Epoch: 17/25... Step: 130... Loss: 1.1907... Val Loss: nan
Epoch: 18/25... Step: 140... Loss: 1.0924... Val Loss: nan
Epoch: 19/25... Step: 150... Loss: 1.0751... Val Loss: nan
Epoch: 20/25... Step: 160... Loss: 1.0419... Val Loss: nan
Epoch: 22/25... Step: 170... Loss: 1.1378... Val Loss: nan
Epoch: 23/25.

In [None]:
def sample(net, size, prime="a", top_k=None):
  net.eval()
  chars = [ch for ch in prime]
  h = net.init_hidden(1)
  for ch in prime: 
    char, h = net.predict(ch, h, top_k=top_k)
  chars.append(char)

  for i in range(size):
    char, h = net.predict(chars[-1], h, top_k=top_k)
    chars.append(char)
  
  return ''.join(chars)

In [None]:
print(sample(net, 8, prime="sh", top_k=5))

sheranat   


In [None]:
torch.save(net, '/content/poke_name_gen.pt')