In [1]:
import numpy as np
import torch
import sys
from helpers import one_hot, prepare_batches
from random import seed, shuffle
import torch.nn as nn
from torch.nn.utils.rnn import pad_packed_sequence

from importlib import reload
import helpers

<module 'helpers' from '/Users/nikku/projects/vates/helpers.py'>

#### Data Extraction

In [2]:
with open('data/t8.shakespeare.txt') as f:
    text = f.read()

In [3]:
shakespeare = []
#start after the header
skip = False
for line in text.split("\n")[244:]:
    if line[:2] == "<<":
        skip = True
    elif line[-2:] == ">>":
        skip = False
    if skip or line == "":
        continue
    shakespeare.append(line)

In [4]:
len(shakespeare)

113282

In [5]:
sys.getsizeof(shakespeare)

927568

#### Data Preprocessing

In [6]:
flatten = lambda l: [item for sublist in l for item in sublist]
flattened = flatten(shakespeare)

In [7]:
chars = tuple(set(flattened))
int2char = dict(enumerate(chars))
char2int = {value: key for key, value in int2char.items()}

In [8]:
len(chars)

83

In [9]:
#roughly 90% of sequences are shorter than 70 characters, so we'll make the max sequence length 70.
sorted([len(l) for l in shakespeare], reverse=True)[1100]

69

In [10]:
#90 % are longer than 39 characters... maybe we should have a shortest length too?
[len(l) for l in shakespeare][1100]

39

In [11]:
#simple character to number encoding
#truncate sequences longer than 70
numeric_sequences = [[char2int[char] for char in line][:70] for line in shakespeare]

In [12]:
seed(1609)
#randomly shuffle the sequences
shuffle(numeric_sequences)

#### Split into Training and Validation Sets

In [13]:
#90 - 10 training / validation split
n_training_sequences = int(.9 * len(numeric_sequences))
training = numeric_sequences[:n_training_sequences]
validation = numeric_sequences[n_training_sequences:]

In [14]:
len(validation), len(training)

(11329, 101953)

In [15]:
training_input = [sequence[:-1] for sequence in training]
training_target = [sequence[1:] for sequence in training]

In [16]:
reload(helpers)

<module 'helpers' from '/Users/nikku/projects/vates/helpers.py'>

In [17]:
packed_batches = helpers.prepare_batches(training_input, batch_size = 20, n_states = len(chars), sequence_length = 69)

In [18]:
lines = []

for packed_batch in packed_batches:
    
    unpacked_sequences, sequence_lengths = pad_packed_sequence(packed_batch)
    
    for i in range(len(sequence_lengths)):
        
        length = sequence_lengths[i]
        sequence = unpacked_sequences[:,i,:][:length]
        
        numbers_sequence = [helpers.decode_one_hot(vec) for vec in sequence]
        
        lines.append([int2char[num] for num in numbers_sequence])

In [19]:
for i in range(15):
    print(''.join(lines[i]) + '\n')

    interim be but a se'nnight, Time's pace is so hard that it seems 

and three or four FOLLOWERS accordingly, with PORTIA, NERISSA, and tr

Alarum. Excursions. Enter the King, the Prince, Lord John of Lancaste

    John Doit of Staffordshire, and black George Barnes, and Francis 

  Ham. Let me see. [Takes the skull.] Alas, poor Yorick! I knew him, 

GLOUCESTER offers to put up a bill; WINCHESTER snatches it, and tears

Enter King, Queen, Polonius, Ophelia, Rosencrantz, Guildenstern, and 

  SPEED. Marry, by these special marks: first, you have learn'd, like

    never did such deeds in arms as I have done this day. I have paid

Trumpets, sennet, and cornets. Enter two VERGERS, with short silver w

  PANDARUS. Good boy, tell him I come.                       Exit Boy

    it with security. I look'd 'a should have sent me two and twenty 

    thus much for greeting. Now, my spruce companions, is all ready, 

  LAUNCE. Out with that too; it was Eve's legacy, and cannot be ta'en

    yo

#### Training the Model

In [73]:
del nn

In [97]:
import torch.nn as nn

In [85]:
import pdb

In [99]:
from torch.nn import _VF

In [113]:
nn.LSTM.forward_impl = monkey_patch

In [None]:
def monkey_patch(self, input, hx, batch_sizes, max_batch_size, sorted_indices):
    # type: (Tensor, Optional[Tuple[Tensor, Tensor]], Optional[Tensor], int, Optional[Tensor]) -> Tuple[Tensor, Tuple[Tensor, Tensor]]  # noqa
    if hx is None:
        num_directions = 2 if self.bidirectional else 1
        zeros = torch.zeros(self.num_layers * num_directions,
                            max_batch_size, self.hidden_size,
                            dtype=input.dtype, device=input.device)
        hx = (zeros, zeros)
    else:
        # Each batch of the hidden state should match the input sequence that
        # the user believes he/she is passing in.
        hx = self.permute_hidden(hx, sorted_indices)

    self.check_forward_args(input, hx, batch_sizes)
    if batch_sizes is None:
        result = _VF.lstm(input, hx, self._get_flat_weights(), self.bias, self.num_layers,
                          self.dropout, self.training, self.bidirectional, self.batch_first)
    else:
        result = _VF.lstm(input, batch_sizes, hx, self._get_flat_weights(), bool(self.bias),
                          self.num_layers, self.dropout, self.training, self.bidirectional)
    output = result[0]
    hidden = result[1:]

    return output, hidden

In [None]:
class ShakespeareNN(nn.Module):
    
    def __init__(self, n_chars, hidden_size, n_rnn_layers=1, dropout=0):
        
        super().__init__()
        
        self.n_layers = n_rnn_layers
        self.n_hidden = hidden_size
        #input size corresponds to the number of unique characters
        self.lstm = nn.LSTM(n_chars, hidden_size, n_rnn_layers, dropout)
        
        #decoder layer?
        self.dense = nn.Linear(hidden_size, n_chars)
        
        
    def forward(self, seq, hx):
        
        #ignore hidden 
        recurrent_output, _ = self.lstm(seq, hx)
        
        X, _ = pad_packed_sequence(recurrent_output)
        
        pdb.set_trace()
        
        out = self.dense(recurrent_output)
        
        return out
    
    def init_hidden(self, batch_size):
        
        weight0 = next(self.parameters()).data
        hidden = (weight0.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                  weight0.new(self.n_layers, batch_size, self.n_hidden).zero_())

In [None]:
william = ShakespeareNN(83, 100)

In [None]:
hx = william.init_hidden(20)
william(packed_batch, hx)

In [22]:
packed_batch = packed_batches[0]
with torch.no_grad():
    hx = william.init_hidden(20)
    william(packed_batch, hx)

TypeError: lstm() received an invalid combination of arguments - got (Tensor, Tensor, tuple, list, int, int, float, bool, bool), but expected one of:
 * (Tensor data, Tensor batch_sizes, tuple of Tensors hx, tuple of Tensors params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional)
      didn't match because some of the arguments have invalid types: ([32;1mTensor[0m, [32;1mTensor[0m, [31;1mtuple[0m, [31;1mlist[0m, [31;1mint[0m, [32;1mint[0m, [32;1mfloat[0m, [32;1mbool[0m, [32;1mbool[0m)
 * (Tensor input, tuple of Tensors hx, tuple of Tensors params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first)
      didn't match because some of the arguments have invalid types: ([32;1mTensor[0m, [31;1mTensor[0m, [31;1mtuple[0m, [31;1mlist[0m, [32;1mint[0m, [32;1mint[0m, [31;1mfloat[0m, [32;1mbool[0m, [32;1mbool[0m)


In [35]:
input_size = len(chars)

In [36]:
lstm = nn.LSTM(input_size, hidden_size = 100, num_layers=2, dropout=0.5)

In [37]:
lstm(packed_batch)

(PackedSequence(data=tensor([[-0.0139,  0.0061, -0.0217,  ..., -0.0060, -0.0010, -0.0222],
         [-0.0155, -0.0032, -0.0221,  ..., -0.0153, -0.0020, -0.0266],
         [-0.0197,  0.0005, -0.0210,  ..., -0.0144,  0.0002, -0.0240],
         ...,
         [-0.0099, -0.0041, -0.0365,  ..., -0.0332, -0.0240, -0.0397],
         [-0.0022, -0.0014, -0.0188,  ..., -0.0083, -0.0060, -0.0291],
         [-0.0360,  0.0112, -0.0251,  ..., -0.0252, -0.0176, -0.0272]],
        grad_fn=<CatBackward>), batch_sizes=tensor([20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]), sorted_indices=None, unsorted_indices=None),
 (tensor([[[-0.0758, -0.0262,  0.0271,  ...,  0.0043, -0.0169, -0.0479],
           [-0.0966, -0.0695,  0.0326,  ..., -0.0045,  0.0137, -

In [126]:
loss_function = nn.NLLLoss()