In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [2]:
class DinoDataset(Dataset):
    
    def __init__(self):
        
        # load input file content: one dino name per line
        file_content = open("./data/dinos.txt").read().lower()
        self.data = file_content.split("\n")
        
        # note: build chars from file_content so that it includes "\n"
        self.chars = sorted(set(file_content))
        self.vocab_size = len(self.chars)
        
        # to create a OneHotEncoder manually
        self.char_to_ix = { ch:i for i,ch in enumerate(self.chars) }
        self.ix_to_char = { i:ch for i,ch in enumerate(self.chars) }
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        
        # every training example `name` consists of `len_name` + 1
        # input characters and `len_name` + 1 output characters
        name = self.data[index]
        len_name = len(name)
        
        # input X is $x_0=\vec{0}$ and the chars from `name`
        X = torch.zeros((1 + len_name, self.vocab_size))
        for ii, char in enumerate(list(name)):
            X[ii + 1, self.char_to_ix[char]] = 1
        
        # output Y is the 1-shifted chars from `name` and a final "\n"
        Y = torch.zeros((len_name + 1, self.vocab_size))
        Y[:-1, :] = X[1:, :]
        Y[-1, self.char_to_ix["\n"]] = 1 
        
        return X, Y

In [3]:
class RNN(nn.Module):
    """
        An RNN model that consists of
        1 RNN layer, followed by 1 linear layer
        (assumes batch_first = True in x)
    """
    
    def __init__(self, in_size, h_size, out_size):
        
        super().__init__()
        
        # hyperparameters
        self.hidden_size = h_size
        
        # parameters
        self.rnn = nn.RNN(in_size, h_size, batch_first=True)
        self.linear = nn.Linear(h_size, out_size)
        
    def _init_hidden(self, batch_size):
        """initialize hidden cell states"""
        return torch.zeros((batch_size, 1, self.hidden_size))
    
    def forward(self, x):
        
        batch_size = x.shape[0]
        hidden = self._init_hidden(batch_size)
        #print(x.shape, hidden.shape)
        out_h, _ = self.rnn(x, hidden)
        out_l = self.linear(out_h.view(-1, self.hidden_size))
        return out_l
        

In [4]:
def get_sample(sample_ix, ix_to_char):
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)
    txt = txt[0].upper() + txt[1:]  # capitalize first character 
    return txt

def sample(model, seed, max_length=50):
    """
    Sample a sequence of characters according to a sequence of probability distributions output of the RNN

    Arguments:
        model 
        seed
        max_length -- maximum number of characters generated for each name
    
    Returns:
        indices -- A list of the indices of the sampled characters.
    """
    
    model.eval()
    vocab_size = len(model.state_dict()["linear.bias"])
    n_a = model.state_dict()["linear.weight"].shape[1]
    
    # Step 1: Create the a zero vector x that can be used as the one-hot vector 
    # Representing the first character (initializing the sequence generation). (≈1 line) 
    x = torch.zeros((1, 1, vocab_size))
    
    # Step 1': Initialize a_prev as zeros (≈1 line)
    a_prev = torch.zeros((n_a, 1))
    
    # Create an empty list of indices. This is the list which will contain the list of indices of the characters to generate (≈1 line)
    indices = []
    
    # idx is the index of the one-hot vector x that is set to 1
    # All other positions in x are zero.
    # Initialize idx to -1
    idx = -1
    
    # Loop over time-steps t. At each time-step:
    # Sample a character from a probability distribution 
    # And append its index (`idx`) to the list "indices".
    # Stop if you reach a new line character or `max_length` characters 
    counter = 0
    newline_char_idx = train_data.char_to_ix['\n']
    
    while (idx != newline_char_idx and counter != max_length):
        
        # Step 2: Forward propagate x
        yhat = model(x)
        probs = torch.softmax(yhat, dim=1).detach().numpy().ravel()
        
        # for grading purposes
        np.random.seed(counter + seed) 
        
        # Step 3: Sample the index of a character within the vocabulary 
        # from the probability distribution 
        idx = np.random.choice(range(train_data.vocab_size), p=probs)

        # Append the index to "indices"
        indices.append(idx)
        
        # Step 4: Overwrite the input x with one 
        # that corresponds to the sampled index `idx`.
        x = torch.zeros((1, 1, vocab_size))
        x[0, 0, idx] = 1
        
        # for grading purposes
        seed += 1
        counter += 1
    
    if (counter == max_length):
        indices.append(newline_char_idx)
    
    return indices

In [5]:
# def train()
def optimize(model, train_loader, num_epochs, 
             loss_fn, optimizer, 
             clip_value=5, num_dino_names_to_sample=7):
    
    for epoch in range(num_epochs):
        running_loss = 0.
        
        for X, Y in train_loader:
            optimizer.zero_grad()

            # forward propagate through time
            Yhat = model(X)
            loss = loss_fn(Yhat, Y[0])
            
            # backpropagate through time
            loss.backward()
            
            # clip
            nn.utils.clip_grad_value_(model.parameters(), clip_value)
            
            # update parameters
            optimizer.step()
            
            # training loss after each iteration
            running_loss += loss.item()
        
        # training loss after each epoch
        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch + 1} / {num_epochs}], Loss: {epoch_loss:.4f}")
        
        # validate
        if epoch % 10 == 0:
            seed = 0
            for name in range(num_dino_names_to_sample):
                sampled_indices = sample(model, seed)
                last_dino_name = get_sample(sampled_indices, train_data.ix_to_char)
                print(last_dino_name.replace('\n', ''))
                seed += 1  # To get the same result (for grading purposes), increment the seed by one. 
            print('\n')

In [6]:
# data
train_data = DinoDataset()
train_loader = DataLoader(train_data, batch_size=1, shuffle=True)
#test_loader = DataLoader(train_data, batch_size=1, shuffle=True)

# model
in_size = train_data.vocab_size
h_size = 50
model = RNN(in_size, h_size, in_size)

# loss
loss_fn = nn.CrossEntropyLoss()

# optimizer
learning_rate = 0.01
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [7]:
num_epochs = 1000
optimize(model, train_loader, num_epochs, loss_fn, optimizer)

Epoch [1 / 1000], Loss: 2.8055
Njyvusandpodygrpuasijius
Jnda
Kyvusandpodygrpuasijius
Nda
Yvusandpodygrpuasijius
Ea
Vusandpodygrpuasijius


Epoch [2 / 1000], Loss: 2.4221
Epoch [3 / 1000], Loss: 2.2258
Epoch [4 / 1000], Loss: 2.1346
Epoch [5 / 1000], Loss: 2.0741
Epoch [6 / 1000], Loss: 2.0274
Epoch [7 / 1000], Loss: 1.9937
Epoch [8 / 1000], Loss: 1.9662
Epoch [9 / 1000], Loss: 1.9441
Epoch [10 / 1000], Loss: 1.9241
Epoch [11 / 1000], Loss: 1.9070
Mgytspcidoraweshtasamespantfzhbamakvafecihafpctokp
Jlba
Kytspcidoraweshtasamespantfzhbamakvafecihafpctokpn
Macadrtdacitgtddivlcbzalosacusdcjdbopthceqpmapdsjl
Ytspcidoraweshtasamespantfzhbamakvafecihafpctokpnm
Ca
Tspcidoraweshtasamespantfzhbamakvafecihafpctokpnma


Epoch [12 / 1000], Loss: 1.8911
Epoch [13 / 1000], Loss: 1.8767
Epoch [14 / 1000], Loss: 1.8635
Epoch [15 / 1000], Loss: 1.8513
Epoch [16 / 1000], Loss: 1.8400
Epoch [17 / 1000], Loss: 1.8297
Epoch [18 / 1000], Loss: 1.8213
Epoch [19 / 1000], Loss: 1.8110
Epoch [20 / 1000], Loss: 1.8

Epoch [127 / 1000], Loss: 1.5033
Epoch [128 / 1000], Loss: 1.5016
Epoch [129 / 1000], Loss: 1.5009
Epoch [130 / 1000], Loss: 1.4988
Epoch [131 / 1000], Loss: 1.4991
Mevossangnggvesptaphamptanodymabbagtadochachkathap
Ingcadrodedoskschcrghax
Jyvosangnggvesptaphamptanodymabbagtadochachkathapp
Macadrodedoskschcrghax
Xsthangnggvesptaphamptanodymabbagtadochachkathapps
Cechoscedoskschcrghax
Tosangnggvesptaphamptanodymabbagtadochachkathappsa


Epoch [132 / 1000], Loss: 1.4962
Epoch [133 / 1000], Loss: 1.4958
Epoch [134 / 1000], Loss: 1.4955
Epoch [135 / 1000], Loss: 1.4962
Epoch [136 / 1000], Loss: 1.4940
Epoch [137 / 1000], Loss: 1.4927
Epoch [138 / 1000], Loss: 1.4909
Epoch [139 / 1000], Loss: 1.4902
Epoch [140 / 1000], Loss: 1.4890
Epoch [141 / 1000], Loss: 1.4876
Mdywthangnggvesptaphamptanodymabadosadochabangrmap
Helabastacepteschcomactagnodesthanggngsphangngngon
Itosqchanggvesptaphamptanodymabadosadochabangrmapp
Macadrodedoskschcomactagnodesthanggngsphangngngong
Ywthangnggvesptaphamptanod

Epoch [279 / 1000], Loss: 1.4061
Epoch [280 / 1000], Loss: 1.4038
Epoch [281 / 1000], Loss: 1.4043
Mbustododrhaveris
Hilabasphabesclbanyphax
Itosphilomaveris
Macadrodfanodrakothactalosacosphimangrbalmingngong
Xisphilomaveris
Dechsphabesclbanyphax
Tosangnggsasps


Epoch [282 / 1000], Loss: 1.4036
Epoch [283 / 1000], Loss: 1.4030
Epoch [284 / 1000], Loss: 1.4052
Epoch [285 / 1000], Loss: 1.4008
Epoch [286 / 1000], Loss: 1.4050
Epoch [287 / 1000], Loss: 1.4017
Epoch [288 / 1000], Loss: 1.4010
Epoch [289 / 1000], Loss: 1.4059
Epoch [290 / 1000], Loss: 1.4028
Epoch [291 / 1000], Loss: 1.4066
Mbustkephongteris
Hilabatodedosclbanyphax
Itrosangnggteris
Macadrodedosclbanyphax
Ystkephongteris
Cechrodedosclbanyphax
Tosangnggteris


Epoch [292 / 1000], Loss: 1.4013
Epoch [293 / 1000], Loss: 1.3993
Epoch [294 / 1000], Loss: 1.4463
Epoch [295 / 1000], Loss: 1.4356
Epoch [296 / 1000], Loss: 1.4076
Epoch [297 / 1000], Loss: 1.4020
Epoch [298 / 1000], Loss: 1.4052
Epoch [299 / 1000], Loss: 1.4000
Epoch

Epoch [442 / 1000], Loss: 1.3719
Epoch [443 / 1000], Loss: 1.3795
Epoch [444 / 1000], Loss: 1.3722
Epoch [445 / 1000], Loss: 1.3720
Epoch [446 / 1000], Loss: 1.3738
Epoch [447 / 1000], Loss: 1.3695
Epoch [448 / 1000], Loss: 1.3695
Epoch [449 / 1000], Loss: 1.3691
Epoch [450 / 1000], Loss: 1.3699
Epoch [451 / 1000], Loss: 1.3672
Mbusprangnggrausperangos
Ingdenisacerusphanyphax
Jusprangnggrausperangos
Macadrsacerusphanyphax
X
Cedosphachusphanyphax
Tos


Epoch [452 / 1000], Loss: 1.3764
Epoch [453 / 1000], Loss: 1.3646
Epoch [454 / 1000], Loss: 1.3691
Epoch [455 / 1000], Loss: 1.3663
Epoch [456 / 1000], Loss: 1.3709
Epoch [457 / 1000], Loss: 1.3712
Epoch [458 / 1000], Loss: 1.3709
Epoch [459 / 1000], Loss: 1.3664
Epoch [460 / 1000], Loss: 1.3620
Epoch [461 / 1000], Loss: 1.3687
Mbusps
Himabausacerusphanyphax
Itrus
Macadrus
Yrus
Cedisphabavesalithactalos
Tos


Epoch [462 / 1000], Loss: 1.3658
Epoch [463 / 1000], Loss: 1.3635
Epoch [464 / 1000], Loss: 1.3650
Epoch [465 / 1000], Loss: 1.3655

Epoch [638 / 1000], Loss: 1.3490
Epoch [639 / 1000], Loss: 1.3502
Epoch [640 / 1000], Loss: 1.3598
Epoch [641 / 1000], Loss: 1.3434
Mesptilodrhaveris
Hima
Itrus
Macachus
X
Cedisphachusphanthactalos
Tos


Epoch [642 / 1000], Loss: 1.3430
Epoch [643 / 1000], Loss: 1.3702
Epoch [644 / 1000], Loss: 1.3683
Epoch [645 / 1000], Loss: 1.3631
Epoch [646 / 1000], Loss: 1.3553
Epoch [647 / 1000], Loss: 1.3494
Epoch [648 / 1000], Loss: 1.3548
Epoch [649 / 1000], Loss: 1.3539
Epoch [650 / 1000], Loss: 1.3529
Epoch [651 / 1000], Loss: 1.3481
Mespthangngfykosphphepskbavevephengrachang
Ing
Jusps
Mba
Yrus
Dedosphachusphanthactakos
Tos


Epoch [652 / 1000], Loss: 1.3493
Epoch [653 / 1000], Loss: 1.3551
Epoch [654 / 1000], Loss: 1.3528
Epoch [655 / 1000], Loss: 1.3472
Epoch [656 / 1000], Loss: 1.3450
Epoch [657 / 1000], Loss: 1.3570
Epoch [658 / 1000], Loss: 1.3500
Epoch [659 / 1000], Loss: 1.3524
Epoch [660 / 1000], Loss: 1.3559
Epoch [661 / 1000], Loss: 1.3499
Messps
Ing
Jusps
Mba
X
Cedosphachusphantha

Epoch [839 / 1000], Loss: 1.3326
Epoch [840 / 1000], Loss: 1.3361
Epoch [841 / 1000], Loss: 1.3441
Mestrmaqdrepthosphonggos
Ing
Juspraqdrepthosphonggos
Mba
X
Deferojachusphanthactalos
Trmaqdrepthosphonggos


Epoch [842 / 1000], Loss: 1.3442
Epoch [843 / 1000], Loss: 1.3519
Epoch [844 / 1000], Loss: 1.3456
Epoch [845 / 1000], Loss: 1.3422
Epoch [846 / 1000], Loss: 1.3423
Epoch [847 / 1000], Loss: 1.3398
Epoch [848 / 1000], Loss: 1.3629
Epoch [849 / 1000], Loss: 1.3421
Epoch [850 / 1000], Loss: 1.3415
Epoch [851 / 1000], Loss: 1.3435
Mestrmas
Ing
Kyros
Mba
X
Deferolachusphapthactamis
Trmas


Epoch [852 / 1000], Loss: 1.3410
Epoch [853 / 1000], Loss: 1.3445
Epoch [854 / 1000], Loss: 1.3449
Epoch [855 / 1000], Loss: 1.3348
Epoch [856 / 1000], Loss: 1.3409
Epoch [857 / 1000], Loss: 1.3383
Epoch [858 / 1000], Loss: 1.3644
Epoch [859 / 1000], Loss: 1.3522
Epoch [860 / 1000], Loss: 1.3488
Epoch [861 / 1000], Loss: 1.3431
Messpraqdrepthosphonggos
Hima
Ityrmaqdrepthosphonggos
Mba
Yros
Cedisphach