In [200]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import torch.optim as optim

In [None]:
t = 10

t_dash = np.random.randint(t, int(t*11.0/10.0))
t_1 = np.random.randint(0, int(t_dash/10.0))
t_2 = np.random.randint(int(t_dash/10.0), int(t_dash/2.0))

print(t_dash)
print(t_1)
print(t_2)

# We generate random numbers uniformly sampled from [0,1] 
# as depicted in Figure 2 of "Learning Recurrent Neural Networks with Hessian-Free Optimization"
# Details of how to sample the numbers was not given in "On the difficulty of training recurrent neural networks"

sequence = np.zeros((2,t_dash))
sequence[0,:] = np.random.rand(t_dash)
sequence[1,[t_1,t_2]] = 1.0
print(sequence)

In [None]:
class AdditionDataset(Dataset):
    """Addition dataset."""

    def __init__(self, num_sequences, len_sequence):
        self.num_sequences = num_sequences
        self.t = len_sequence

    def __len__(self):
        return self.num_sequences

    def __getitem__(self, dummy_index):
        t_dash = np.random.randint(self.t, int(t*11.0/10.0))
        t_1 = np.random.randint(0, int(t_dash/10.0))
        t_2 = np.random.randint(int(t_dash/10.0), int(t_dash/2.0))

        sequence = np.zeros((2,t_dash))
        sequence[0,:] = np.random.rand(t_dash)
        sequence[1,[t_1,t_2]] = 1.0

        return sequence

In [None]:
addition_training_dataset = AdditionDataset(5,10)
training_dataloader = DataLoader(addition_training_dataset, batch_size=4, num_workers=4)

In [None]:
for i_batch, sample_batched in enumerate(training_dataloader):
    print(i_batch)
    print(sample_batched)

In [214]:
class AdditionDataset(Dataset):
    """Addition dataset as introduced in the original LSTM paper.
    This implementation is from p.11 of 'On the difficulty of training recurrent neural networks' """

    def __init__(self, dataset_length, len_sequence):
        self.dataset_length = dataset_length  # This is what is returned by len(), see def __len__(self) below
        self.t = len_sequence  # Length of sequence
        # Check that sequence length is at least 10
        # If not, there is no randomness in the position of the first number to be added
        assert(self.t > 10), 'Sequence length must be at least 10'

    def __len__(self):
        return self.dataset_length

    def __getitem__(self, dummy_index):
        # The dummy index is required for the dataloader to work,
        # but since we are sampling data randomly it has no effect

        # Sample the length of the sequence and positions of numbers to add
        t_dash = np.random.randint(self.t, int(self.t * 11.0 / 10.0) )  # Length of the sequence
        t_1 = np.random.randint(0, int(t_dash / 10.0))  # Indicator of position of first number to add
        t_2 = np.random.randint(int(t_dash / 10.0), int(t_dash / 2.0))  # Indicator of position of second number to add

        # We generate random numbers uniformly sampled from [0,1]
        # as depicted in Figure 2 of
        # "Learning Recurrent Neural Networks with Hessian-Free Optimization"
        # Details of how to sample the numbers was not given in
        # "On the difficulty of training recurrent neural networks"
        sequence = torch.zeros((2, t_dash))  # Initialize empty sequence
        sequence[0, :] = torch.rand((1, t_dash))  # Make first row random numbers
        
        # Set second row to indicate which numbers to add
        sequence[1, t_1] = 1.0 
        sequence[1, t_2] = 1.0  

        # Calculate target
        target = sequence[0, t_1] + sequence[0, t_2]
        
        # Collect sequence and target into a sample
        sample = {'sequence': sequence, 'target': target}
        
        return sample

In [215]:
t_dash = 5
sequence = torch.zeros((2, t_dash))
sequence[0,:] = torch.rand((1, t_dash))
print(sequence)


 0.5541  0.5260  0.7278  0.2966  0.8558
 0.0000  0.0000  0.0000  0.0000  0.0000
[torch.FloatTensor of size 2x5]



In [216]:
def addition_problem(train_dataset_length, test_data_length, len_sequence, batch_size=4, num_workers=4):
    """This is the addition problem

    Args:
        T: Sequence length

    Returns:
        train_loader    Loads training data
        test_loader     Loads test data

    """

    train_loader = DataLoader(AdditionDataset(train_dataset_length, len_sequence),
                              batch_size=batch_size,
                              num_workers=num_workers)
    test_loader = DataLoader(AdditionDataset(test_data_length, len_sequence),
                             batch_size=batch_size,
                             num_workers=num_workers)
    return train_loader, test_loader

In [217]:
train_data = AdditionDataset(dataset_length=8, len_sequence=11)
train_loader, test_loader = addition_problem(train_dataset_length=8, test_data_length=9, len_sequence=11, batch_size=1)

In [218]:
for i_batch, sample_batched in enumerate(test_loader):
    print(i_batch)
    print(sample_batched)

0
{'sequence': 
(0 ,.,.) = 

Columns 0 to 8 
   0.8345  0.9610  0.8239  0.4113  0.1251  0.7004  0.1098  0.6241  0.5064
  1.0000  0.0000  0.0000  0.0000  1.0000  0.0000  0.0000  0.0000  0.0000

Columns 9 to 10 
   0.8276  0.4372
  0.0000  0.0000
[torch.FloatTensor of size 1x2x11]
, 'target': 
 0.9596
[torch.DoubleTensor of size 1]
}
1
{'sequence': 
(0 ,.,.) = 

Columns 0 to 8 
   0.8337  0.2175  0.6819  0.0203  0.5571  0.0284  0.9252  0.8844  0.7368
  1.0000  0.0000  0.0000  0.0000  1.0000  0.0000  0.0000  0.0000  0.0000

Columns 9 to 10 
   0.0038  0.4473
  0.0000  0.0000
[torch.FloatTensor of size 1x2x11]
, 'target': 
 1.3908
[torch.DoubleTensor of size 1]
}
2
{'sequence': 
(0 ,.,.) = 

Columns 0 to 8 
   0.6179  0.1068  0.1715  0.4557  0.1782  0.8736  0.0532  0.2064  0.6394
  1.0000  0.0000  0.0000  0.0000  1.0000  0.0000  0.0000  0.0000  0.0000

Columns 9 to 10 
   0.4200  0.5955
  0.0000  0.0000
[torch.FloatTensor of size 1x2x11]
, 'target': 
 0.7961
[torch.DoubleTensor of size 1]


In [219]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden))
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        return output, hidden

    def initHidden(self):
        return Variable(torch.zeros(self.hidden_size))

In [220]:
n_hidden = 10
rnn = RNN(2, n_hidden, 1)

In [221]:
sample = train_data[0]

In [222]:
input = Variable(sample['sequence'][:,0]) # Variable(torch.rand(2))
hidden = Variable(torch.zeros(n_hidden))

output, next_hidden = rnn(input, hidden)
print(output)
print(next_hidden)

Variable containing:
 0.1326
[torch.FloatTensor of size 1]

Variable containing:
-0.3639
-0.2442
-0.0588
 0.2449
-0.0737
 0.0707
 0.4350
-0.0146
 0.0985
-0.1212
[torch.FloatTensor of size 10]



In [224]:
criterion = nn.MSELoss()
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn
optimizer = optim.SGD(rnn.parameters(), lr=learning_rate)

In [230]:

hidden = rnn.initHidden()
rnn.zero_grad()

sequence = Variable(sample['sequence'], requires_grad=False)
target = Variable(torch.Tensor([sample['target']]), requires_grad=False)

for i in range(t_dash):
    input = sequence[:,i]
    output, hidden = rnn(input, hidden)
    
print(output) 
print(target)
loss = criterion(output, target)
loss.backward()


Variable containing:
1.00000e-02 *
 -4.9688
[torch.FloatTensor of size 1]

Variable containing:
 0.5874
[torch.FloatTensor of size 1]



In [233]:
rnn.train()
for i_batch, sample_batched in enumerate(test_loader):
    sequence = Variable(sample['sequence'], requires_grad=False)
    target = Variable(torch.Tensor([sample['target']]), requires_grad=False)
    
    hidden = rnn.initHidden()
    rnn.zero_grad()
    
    for i in range(t_dash):
        input = sequence[:,i]
        output, hidden = rnn(input, hidden)
        
    print(output)
#     optimizer.zero_grad()
#     output = model(data)
#     loss = F.nll_loss(output, target)
#     loss.backward()

#     # Clip gradients
#     # As implemented in https://github.com/pytorch/examples/blob/master/word_language_model/main.py#L162-L164
#     if Hyperparameters.clipping_threshold != 0:
#         clip_grad_norm(model.parameters(), Hyperparameters.clipping_threshold)

#     optimizer.step()
#     if batch_idx % 1000 == 0:
#         print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
#             epoch, batch_idx * len(data), len(train_loader.dataset),
#                    100.0 * batch_idx / len(train_loader), loss.data[0]))

Variable containing:
 0.5874
[torch.FloatTensor of size 1]

Variable containing:
 0.5874
[torch.FloatTensor of size 1]

Variable containing:
 0.5874
[torch.FloatTensor of size 1]

Variable containing:
 0.5874
[torch.FloatTensor of size 1]

Variable containing:
 0.5874
[torch.FloatTensor of size 1]

Variable containing:
 0.5874
[torch.FloatTensor of size 1]

Variable containing:
 0.5874
[torch.FloatTensor of size 1]

Variable containing:
 0.5874
[torch.FloatTensor of size 1]

Variable containing:
 0.5874
[torch.FloatTensor of size 1]

