In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import numpy as np
from load_dinousars import *
from torch.autograd import Variable

## Load and preprocess the data

In [2]:
chars,data= read_data('data/dinos.txt')

There are 19909 total characters and 27 unique characters in your data.


The characters are a-z (26 characters) plus the "\n" (or newline character), which plays a role similar to the `<EOS>` (or "End of sentence") token.it indicates the end of the dinosaur name rather than the end of a sentence. In the cell below, we create a python dictionary (i.e., a hash table) to map each character to an index from 0-26. We also create a second python dictionary that maps each index back to the corresponding character character. This will help figure out what index corresponds to what character in the probability distribution output of the softmax layer. Below, `char_to_ix` and `ix_to_char` are the python dictionaries. 

In [3]:
char_to_ix={char:i for i,char in enumerate(sorted(chars))}
ix_to_char=np.array(sorted(chars))

### The prediction task

Given a character, or a sequence of characters, what is the most probable next character? This is the task we're training the model to perform. The input to the model will be a sequence of characters, and we train the model to predict the output—the following character at each time step.

Since RNNs maintain an internal state that depends on the previously seen elements, given all the characters computed until this moment, what is the next character?

In [4]:
dinousar_names=read_dinousar_names('data/dinos.txt')
dinousar_names[:5]

['aachenosaurus', 'aardonyx', 'abdallahsaurus', 'abelisaurus', 'abrictosaurus']

In [5]:
# Shuffle list of all dinosaur names
np.random.seed(0)
np.random.shuffle(dinousar_names)

In [6]:
# Create X,Y set
X,Y= create_x_y_dataset(dinousar_names,char_to_ix)

In [7]:
x_flattened=X.flatten().reshape(-1,1,1)
y_flattened=Y.flatten()

In [8]:
x_flattened.shape,y_flattened.shape

((41472, 1, 1), (41472,))

In [9]:
def to_categorical(y, num_classes):
    """ 1-hot encodes a tensor """
    return np.eye(num_classes, dtype='uint8')[y]

In [10]:
y_one_hot=to_categorical(y_flattened,len(chars))

In [11]:
y_one_hot[0].shape

(27,)

### Model

In [12]:
class CharToCharRNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(CharToCharRNN, self).__init__()
        # Number of hidden dimensions
        self.hidden_dim = hidden_dim
        
        # Number of hidden layers
        self.layer_dim = layer_dim
        
        # RNN
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, 
                          nonlinearity='relu')
        
        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
            
        # One time step
        out, hn = self.rnn(x, h0)
        out = self.fc(out.view(out.size(0),-1)) 
        return out
        
        

In [13]:
train = torch.utils.data.TensorDataset(torch.from_numpy(x_flattened).float(),torch.from_numpy(y_flattened).long())

In [14]:
train_loader = torch.utils.data.DataLoader(train, batch_size = 32, shuffle = False)

In [15]:
model_cnn=CharToCharRNN(1,128,1,27)
criterion = nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model_cnn.parameters(),lr=0.001)

In [16]:
LOSS=[]
epochs=10
preds=[]

for epoch in range(epochs):
    for x,y in train_loader:

        optimizer.zero_grad() 
        
        yhat= model_cnn(x)
        
        loss = criterion(yhat,y)
        loss.backward()
        optimizer.step()
        nn.utils.clip_grad_norm_(model_cnn.parameters(),5)
    correct=0
    for x_test, y_test in train_loader:

        z=model_cnn(x_test)
        _,yhat=torch.max(z.data,1)
        
        preds.append(yhat)
    
    LOSS.append(loss)
    

In [98]:
pred=preds[12953][0:27].detach().numpy()

In [99]:
print('\n'.join(ix_to_char[pred]))



















o
o
u
a
o
o
o
s
a
a
u
s
a
s
a








In [110]:
pred=model_cnn(torch.from_numpy(x_flattened[2:3]).float())

In [111]:
torch.max(pred.data,1)

(tensor([1.6644]), tensor([19]))

In [None]:
ix_to_char