In [1]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import torch.optim as optim

In [86]:
class AdditionDataset(Dataset):
    """Addition dataset as introduced in the original LSTM paper.
    This implementation is from p.11 of 'On the difficulty of training recurrent neural networks' """

    def __init__(self, dataset_length, len_sequence):
        self.dataset_length = dataset_length  # This is what is returned by len(), see def __len__(self) below
        self.t = len_sequence  # Length of sequence
        # Check that sequence length is at least 10
        # If not, there is no randomness in the position of the first number to be added
        assert(self.t > 10), 'Sequence length must be at least 10'

    def __len__(self):
        return self.dataset_length

    def __getitem__(self, dummy_index):
        # The dummy index is required for the dataloader to work,
        # but since we are sampling data randomly it has no effect

        # Sample the length of the sequence and positions of numbers to add
        t_dash = np.random.randint(self.t, int(self.t * 11.0 / 10.0) )  # Length of the sequence
        t_1 = np.random.randint(0, int(t_dash / 10.0))  # Indicator of position of first number to add
        t_2 = np.random.randint(int(t_dash / 10.0), int(t_dash / 2.0))  # Indicator of position of second number to add

        # We generate random numbers uniformly sampled from [0,1]
        # as depicted in Figure 2 of
        # "Learning Recurrent Neural Networks with Hessian-Free Optimization"
        # Details of how to sample the numbers was not given in
        # "On the difficulty of training recurrent neural networks"
        sequence = torch.zeros((2, t_dash))  # Initialize empty sequence
        sequence[0, :] = torch.rand((1, t_dash))  # Make first row random numbers
        
        # Set second row to indicate which numbers to add
        sequence[1, t_1] = 1.0 
        sequence[1, t_2] = 1.0  

        # Calculate target
        target = torch.Tensor([sequence[0, t_1] + sequence[0, t_2]])
        
        # Collect sequence and target into a sample
        sample = (sequence, target)
        
        return sample

In [87]:
def addition_problem(train_dataset_length, test_data_length, len_sequence, batch_size=4, num_workers=4):
    """This is the addition problem

    Args:
        T: Sequence length

    Returns:
        train_loader    Loads training data
        test_loader     Loads test data

    """

    train_loader = DataLoader(AdditionDataset(train_dataset_length, len_sequence),
                              batch_size=batch_size,
                              num_workers=num_workers)
    test_loader = DataLoader(AdditionDataset(test_data_length, len_sequence),
                             batch_size=batch_size,
                             num_workers=num_workers)
    return train_loader, test_loader

In [239]:
train_data = AdditionDataset(dataset_length=8, len_sequence=11)
train_loader, test_loader = addition_problem(train_dataset_length=8, test_data_length=9, len_sequence=11, batch_size=1)

In [98]:
for i_batch, sample_batched in enumerate(test_loader):
    print(i_batch)
    print(sample_batched)

0
[
(0 ,.,.) = 

Columns 0 to 8 
   0.5405  0.7344  0.6877  0.7288  0.9935  0.7193  0.8915  0.9268  0.7231
  1.0000  0.0000  0.0000  0.0000  1.0000  0.0000  0.0000  0.0000  0.0000

Columns 9 to 10 
   0.4768  0.3068
  0.0000  0.0000

(1 ,.,.) = 

Columns 0 to 8 
   0.7075  0.4802  0.4804  0.0686  0.6688  0.0107  0.5450  0.2223  0.1243
  1.0000  1.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000

Columns 9 to 10 
   0.0064  0.2569
  0.0000  0.0000
[torch.FloatTensor of size 2x2x11]
, 
 1.5340
 1.1877
[torch.FloatTensor of size 2x1]
]
1
[
(0 ,.,.) = 

Columns 0 to 8 
   0.0733  0.2384  0.2285  0.2221  0.9158  0.4494  0.0554  0.6146  0.8529
  1.0000  0.0000  0.0000  0.0000  1.0000  0.0000  0.0000  0.0000  0.0000

Columns 9 to 10 
   0.6943  0.5793
  0.0000  0.0000

(1 ,.,.) = 

Columns 0 to 8 
   0.9373  0.7821  0.4976  0.7325  0.4952  0.5159  0.6168  0.6693  0.3526
  1.0000  1.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000

Columns 9 to 10 
   0.0727  0.1611
  0

In [268]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden))
        hidden = self.i2h(combined)
        hidden = nn.functional.sigmoid(hidden)
        output = self.i2o(combined)
        return output, hidden

    def initHidden(self):
        return Variable(torch.zeros(self.hidden_size))

In [269]:
n_hidden = 5
rnn = RNN(2, n_hidden, 1)

In [126]:
input_size = 2
output_size = 1

In [140]:
print(data[0,:,:])

Variable containing:

Columns 0 to 9 
 0.4207  0.7871  0.8007  0.9347  0.1911  0.2069  0.5466  0.2533  0.8444  0.5589
 1.0000  1.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000

Columns 10 to 10 
 0.4797
 0.0000
[torch.FloatTensor of size 2x11]



In [267]:
print(data)
print(target)
input = data[:,0] # [2]
hidden = Variable(torch.zeros(n_hidden)) # [h]

combined = torch.cat((input, hidden)) # [h+2]

i2h = nn.Linear(input_size + n_hidden, n_hidden)
i2o = nn.Linear(input_size + n_hidden, output_size)

hidden_new = i2h(combined)
rnn(input, hidden)

Variable containing:

Columns 0 to 9 
 0.7378  0.4488  0.2960  0.5442  0.6075  0.5622  0.1364  0.9256  0.2062  0.3828
 1.0000  0.0000  0.0000  0.0000  1.0000  0.0000  0.0000  0.0000  0.0000  0.0000

Columns 10 to 10 
 0.8429
 0.0000
[torch.FloatTensor of size 2x11]

Variable containing:
 1.3453
[torch.FloatTensor of size 1x1]



(Variable containing:
  0.1093
 [torch.FloatTensor of size 1], Variable containing:
  0.5285
  0.5342
  0.4726
  0.5195
  0.5662
 [torch.FloatTensor of size 5])

In [94]:
criterion = nn.MSELoss()
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn
optimizer = optim.SGD(rnn.parameters(), lr=learning_rate)

In [266]:
for i_batch, (data, target) in enumerate(test_loader):
    data = Variable(data[0,:,:]) # [b x 2 x t]
    target = Variable(target)
    break

In [298]:
rnn.train()
for epoch in range(100):
    for i_batch, (data, target) in enumerate(test_loader):
        
        data = Variable(data) # [1 x 2 x t]
        target = Variable(target) # [1]

        # Get rid of zeroth dimension, since the minibatch is of size 1
        data = data[0,:,:] # [2 x t]
        
        hidden = rnn.initHidden() # [h]
        hidden = hidden # [h]
        rnn.zero_grad()

        for i in range(data.size()[1]):
            input = data[:,i] # [2]
            output, hidden = rnn(input, hidden)

        loss = nn.MSELoss()(output, target)
        if i_batch == 0:
            print(loss)
        loss.backward()

        optimizer.step()


Variable containing:
 0.1087
[torch.FloatTensor of size 1]

Variable containing:
 0.2544
[torch.FloatTensor of size 1]

Variable containing:
 0.4633
[torch.FloatTensor of size 1]

Variable containing:
 0.5047
[torch.FloatTensor of size 1]

Variable containing:
1.00000e-02 *
  4.0774
[torch.FloatTensor of size 1]

Variable containing:
 0.3634
[torch.FloatTensor of size 1]

Variable containing:
 0.2198
[torch.FloatTensor of size 1]

Variable containing:
1.00000e-03 *
  6.2119
[torch.FloatTensor of size 1]

Variable containing:
 0.2463
[torch.FloatTensor of size 1]

Variable containing:
 2.1970
[torch.FloatTensor of size 1]

Variable containing:
 0.7279
[torch.FloatTensor of size 1]

Variable containing:
1.00000e-02 *
  5.0633
[torch.FloatTensor of size 1]

Variable containing:
 0.3690
[torch.FloatTensor of size 1]

Variable containing:
 1.2820
[torch.FloatTensor of size 1]

Variable containing:
 0.2403
[torch.FloatTensor of size 1]

Variable containing:
 2.0804
[torch.FloatTensor of size

In [296]:
rnn.eval()
data, target = train_data[0]
data = Variable(data) # [2 x t]
target = Variable(target) # [1]
hidden = rnn.initHidden()

for i in range(data.size()[1]):
    input = data[:,i]
    output, hidden = rnn(input, hidden)
    
print('data: ', data)
print('target: ', float(target))
print('output: ', float(output))

data:  Variable containing:

Columns 0 to 9 
 0.1647  0.1292  0.8754  0.6599  0.8394  0.9164  0.3432  0.5971  0.9808  0.1611
 1.0000  0.0000  0.0000  0.0000  1.0000  0.0000  0.0000  0.0000  0.0000  0.0000

Columns 10 to 10 
 0.3637
 0.0000
[torch.FloatTensor of size 2x11]

target:  1.0041024684906006
output:  0.29795801639556885
