In [1]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import torch.optim as optim

In [2]:
class AdditionDataset(Dataset):
    """Addition dataset as introduced in the original LSTM paper.
    This implementation is from p.11 of 'On the difficulty of training recurrent neural networks' """

    def __init__(self, dataset_length, len_sequence):
        self.dataset_length = dataset_length  # This is what is returned by len(), see def __len__(self) below
        self.t = len_sequence  # Length of sequence
        # Check that sequence length is at least 10
        # If not, there is no randomness in the position of the first number to be added
        assert(self.t > 10), 'Sequence length must be at least 10'

    def __len__(self):
        return self.dataset_length

    def __getitem__(self, dummy_index):
        # The dummy index is required for the dataloader to work,
        # but since we are sampling data randomly it has no effect

        # Sample the length of the sequence and positions of numbers to add
        t_dash = np.random.randint(self.t, int(self.t * 11.0 / 10.0) )  # Length of the sequence
        t_1 = np.random.randint(0, int(t_dash / 10.0))  # Indicator of position of first number to add
        t_2 = np.random.randint(int(t_dash / 10.0), int(t_dash / 2.0))  # Indicator of position of second number to add

        # We generate random numbers uniformly sampled from [0,1]
        # as depicted in Figure 2 of
        # "Learning Recurrent Neural Networks with Hessian-Free Optimization"
        # Details of how to sample the numbers was not given in
        # "On the difficulty of training recurrent neural networks"
        sequence = torch.zeros((2, t_dash))  # Initialize empty sequence
        sequence[0, :] = torch.rand((1, t_dash))  # Make first row random numbers
        
        # Set second row to indicate which numbers to add
        sequence[1, t_1] = 1.0 
        sequence[1, t_2] = 1.0  

        # Calculate target
        target = sequence[0, t_1] + sequence[0, t_2]
        
        # Collect sequence and target into a sample
        sample = {'sequence': sequence, 'target': target}
        
        return sample

In [3]:
def addition_problem(train_dataset_length, test_data_length, len_sequence, batch_size=4, num_workers=4):
    """This is the addition problem

    Args:
        T: Sequence length

    Returns:
        train_loader    Loads training data
        test_loader     Loads test data

    """

    train_loader = DataLoader(AdditionDataset(train_dataset_length, len_sequence),
                              batch_size=batch_size,
                              num_workers=num_workers)
    test_loader = DataLoader(AdditionDataset(test_data_length, len_sequence),
                             batch_size=batch_size,
                             num_workers=num_workers)
    return train_loader, test_loader

In [4]:
train_data = AdditionDataset(dataset_length=8, len_sequence=11)
train_loader, test_loader = addition_problem(train_dataset_length=8, test_data_length=9, len_sequence=11, batch_size=1)

In [5]:
for i_batch, sample_batched in enumerate(test_loader):
    print(i_batch)
    print(sample_batched)

0
{'sequence': 
(0 ,.,.) = 

Columns 0 to 8 
   0.8043  0.3458  0.6361  0.3467  0.9410  0.5093  0.1975  0.0848  0.5405
  1.0000  1.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000

Columns 9 to 10 
   0.8104  0.1169
  0.0000  0.0000
[torch.FloatTensor of size 1x2x11]
, 'target': 
 1.1501
[torch.DoubleTensor of size 1]
}
1
{'sequence': 
(0 ,.,.) = 

Columns 0 to 8 
   0.8069  0.3209  0.4774  0.6750  0.0925  0.2210  0.9256  0.6101  0.5587
  1.0000  1.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000

Columns 9 to 10 
   0.9583  0.7564
  0.0000  0.0000
[torch.FloatTensor of size 1x2x11]
, 'target': 
 1.1278
[torch.DoubleTensor of size 1]
}
2
{'sequence': 
(0 ,.,.) = 

Columns 0 to 8 
   0.3990  0.8373  0.1908  0.8315  0.5879  0.8295  0.5340  0.8592  0.8446
  1.0000  1.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000

Columns 9 to 10 
   0.7993  0.9168
  0.0000  0.0000
[torch.FloatTensor of size 1x2x11]
, 'target': 
 1.2364
[torch.DoubleTensor of size 1]


In [6]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden))
        hidden = self.i2h(combined)
        hidden = nn.functional.sigmoid(hidden)
        output = self.i2o(combined)
        return output, hidden

    def initHidden(self):
        return Variable(torch.zeros(self.hidden_size))

In [7]:
n_hidden = 10
rnn = RNN(2, n_hidden, 1)

In [11]:
sample = train_data[0]
input = Variable(sample['sequence'][:,0]) # Variable(torch.rand(2))
hidden = Variable(torch.zeros(n_hidden))

output, next_hidden = rnn(input, hidden)
print(output)
print(next_hidden)

Variable containing:
-0.1429
[torch.FloatTensor of size 1]

Variable containing:
 0.5770
 0.4841
 0.6535
 0.3875
 0.5885
 0.5603
 0.4657
 0.3724
 0.4168
 0.6209
[torch.FloatTensor of size 10]



In [12]:
criterion = nn.MSELoss()
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn
optimizer = optim.SGD(rnn.parameters(), lr=learning_rate)

In [14]:
rnn.train()
for epoch in range(100):
    for i_batch, sample_batched in enumerate(test_loader):
        sequence = Variable(sample['sequence'])
        target = Variable(torch.Tensor([sample['target']]))

        hidden = rnn.initHidden()
        rnn.zero_grad()

        for i in range(sequence.size()[1]):
            input = sequence[:,i]
            output, hidden = rnn(input, hidden)

        loss = criterion(output, target)
#         if i_batch == 0:
#             print(loss)
        loss.backward()

        optimizer.step()


In [37]:
rnn.eval()
sample = train_data[0]
sequence = Variable(sample['sequence'], requires_grad=False)
target = Variable(torch.Tensor([sample['target']]), requires_grad=False)
hidden = rnn.initHidden()
for i in range(sequence.size()[1]):
    input = sequence[:,i]
    output, hidden = rnn(input, hidden)
    
print('sequence: ', sequence)
print('target: ', float(target))
print('output: ', float(output))

sequence:  Variable containing:

Columns 0 to 9 
 0.8030  0.2471  0.0761  0.3242  0.9985  0.7212  0.0496  0.3132  0.2303  0.5832
 1.0000  1.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000

Columns 10 to 10 
 0.5846
 0.0000
[torch.FloatTensor of size 2x11]

target:  1.050081491470337
output:  1.544363260269165
