# Childrens Book (Pytorch)

A Pytorch Version of Alyssa's Notebook

In [24]:
from itertools import permutations
import numpy as np
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torch.optim as optim

In [2]:
# Parameters
seq_length = 4 # Length of input and target strings
batch_size = 1 # Use 1 so we don't have to rebuild model for generating data after training
buffer_size = 4
n_epochs = 10
embedding_dims = 256
rnn_units = 1024

## Data

In [3]:
names1 = ['Doug', 'Jane', 'Spot', 'Kaylee', 'Mal', 'Link', 'Zelda', 'Mario', 'Luigi']
names2 = ['Leia', 'Luke', 'Han', 'Harry', 'Hermione', 'Ron']
names3 = ['Frodo', 'Sam', 'Merry', 'Pippin']

text_array = []
for name_pair in permutations(names1, 2):
    text_array.append(' saw '.join(name_pair))
for name_pair in permutations(names2, 2):
    text_array.append(' saw '.join(name_pair))
for name_pair in permutations(names3, 2):
    text_array.append(' saw '.join(name_pair))
data_text = ' . '.join(text_array) + ' .' # Need that last period

print(data_text)

Doug saw Jane . Doug saw Spot . Doug saw Kaylee . Doug saw Mal . Doug saw Link . Doug saw Zelda . Doug saw Mario . Doug saw Luigi . Jane saw Doug . Jane saw Spot . Jane saw Kaylee . Jane saw Mal . Jane saw Link . Jane saw Zelda . Jane saw Mario . Jane saw Luigi . Spot saw Doug . Spot saw Jane . Spot saw Kaylee . Spot saw Mal . Spot saw Link . Spot saw Zelda . Spot saw Mario . Spot saw Luigi . Kaylee saw Doug . Kaylee saw Jane . Kaylee saw Spot . Kaylee saw Mal . Kaylee saw Link . Kaylee saw Zelda . Kaylee saw Mario . Kaylee saw Luigi . Mal saw Doug . Mal saw Jane . Mal saw Spot . Mal saw Kaylee . Mal saw Link . Mal saw Zelda . Mal saw Mario . Mal saw Luigi . Link saw Doug . Link saw Jane . Link saw Spot . Link saw Kaylee . Link saw Mal . Link saw Zelda . Link saw Mario . Link saw Luigi . Zelda saw Doug . Zelda saw Jane . Zelda saw Spot . Zelda saw Kaylee . Zelda saw Mal . Zelda saw Link . Zelda saw Mario . Zelda saw Luigi . Mario saw Doug . Mario saw Jane . Mario saw Spot . Mario saw K

In [4]:
dataset_vocab = np.array(data_text.split())
encoder = LabelEncoder()
dataset_enc = encoder.fit_transform(dataset_vocab) # Reshape dataset to be a single column vector

print(f'Vocabulary: {encoder.classes_}')
print(f'Orignal data: {dataset_vocab[:8]}\nEncoded data: {dataset_enc[:8]}')

Vocabulary: ['.' 'Doug' 'Frodo' 'Han' 'Harry' 'Hermione' 'Jane' 'Kaylee' 'Leia' 'Link'
 'Luigi' 'Luke' 'Mal' 'Mario' 'Merry' 'Pippin' 'Ron' 'Sam' 'Spot' 'Zelda'
 'saw']
Orignal data: ['Doug' 'saw' 'Jane' '.' 'Doug' 'saw' 'Spot' '.']
Encoded data: [ 1 20  6  0  1 20 18  0]


In [5]:
dataset_enc

array([ 1, 20,  6,  0,  1, 20, 18,  0,  1, 20,  7,  0,  1, 20, 12,  0,  1,
       20,  9,  0,  1, 20, 19,  0,  1, 20, 13,  0,  1, 20, 10,  0,  6, 20,
        1,  0,  6, 20, 18,  0,  6, 20,  7,  0,  6, 20, 12,  0,  6, 20,  9,
        0,  6, 20, 19,  0,  6, 20, 13,  0,  6, 20, 10,  0, 18, 20,  1,  0,
       18, 20,  6,  0, 18, 20,  7,  0, 18, 20, 12,  0, 18, 20,  9,  0, 18,
       20, 19,  0, 18, 20, 13,  0, 18, 20, 10,  0,  7, 20,  1,  0,  7, 20,
        6,  0,  7, 20, 18,  0,  7, 20, 12,  0,  7, 20,  9,  0,  7, 20, 19,
        0,  7, 20, 13,  0,  7, 20, 10,  0, 12, 20,  1,  0, 12, 20,  6,  0,
       12, 20, 18,  0, 12, 20,  7,  0, 12, 20,  9,  0, 12, 20, 19,  0, 12,
       20, 13,  0, 12, 20, 10,  0,  9, 20,  1,  0,  9, 20,  6,  0,  9, 20,
       18,  0,  9, 20,  7,  0,  9, 20, 12,  0,  9, 20, 19,  0,  9, 20, 13,
        0,  9, 20, 10,  0, 19, 20,  1,  0, 19, 20,  6,  0, 19, 20, 18,  0,
       19, 20,  7,  0, 19, 20, 12,  0, 19, 20,  9,  0, 19, 20, 13,  0, 19,
       20, 10,  0, 13, 20

In [6]:
# makes the datasets and data loaders
class ReadFromArray(Dataset):

    def __init__(self, array_enc, transform=None):
        self.array_enc = array_enc

    def __len__(self):
        return len(self.array_enc) - seq_length

    def __getitem__(self, idx):
        input_text = self.array_enc[idx:idx+seq_length]
        target_text = self.array_enc[idx + 1 :idx+seq_length + 1]        
        sample = {'input': input_text, 'target': target_text}
        return sample

In [7]:
dataset = ReadFromArray(dataset_enc)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [8]:
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.word_embeddings = nn.Embedding(len(encoder.classes_), embedding_dims)
        self.lstm = nn.LSTM(embedding_dims, rnn_units)
        self.dens = nn.Linear(rnn_units, len(encoder.classes_))
        
    def forward(self, x):
        x = self.word_embeddings(x)
        x, y = self.lstm(x)
        x = self.dens(x)
#         x = nn.Softmax(x)
        return x

rnn = RNN()

In [9]:
# model = tf.keras.Sequential([
#     # Embedding layer maps words to vectors
#     tf.keras.layers.Embedding(vocab_size, 
#                               embedding_dim, 
#                               batch_input_shape=[batch_size, None]),
    
#     # Recurrent layer
#     tf.keras.layers.LSTM(units=rnn_units, 
#                          return_sequences=True, 
#                          stateful=True),
    
#     # Output layer
#     tf.keras.layers.Dense(vocab_size)

In [26]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(rnn.parameters(), lr=0.001, momentum=0.9)

In [45]:
for epoch in range(n_epochs):
    for i, data in enumerate(dataloader, 0):
        output = rnn(data['input'])
        
        new_output = []
        for out in output.data.numpy()[0]:
            new_output.append(np.where(out == np.amax(out)))
    
        
        new_output = np.squeeze(np.array(new_output))
        new_output = np.array([new_output])
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        print(torch.from_numpy(new_output))
        print(data['target'])
        loss = criterion(torch.from_numpy(new_output), data['target'])
        loss.backward()
        optimizer.step()
                
        break
    break

tensor([[ 3,  9, 15, 14]])
tensor([[18, 20, 19,  0]])


RuntimeError: "log_softmax_lastdim_kernel_impl" not implemented for 'Long'

In [22]:
encoder.inverse_transform(new_output)

array(['Sam', 'Han', 'Hermione', 'Pippin'], dtype='<U8')