# RNN for Seq2Seq

In [2]:
import torch
import torch.nn as nn
import numpy as np

## 数据生成

In [3]:
def generate_sequences(n=128, variable_len=False, seed=13):
    basic_corners = np.array([[-1, -1], [-1, 1], [1, 1], [1, -1]])
    np.random.seed(seed)
    bases = np.random.randint(4, size=n)
    if variable_len:
        lengths = np.random.randint(3, size=n) + 2
    else:
        lengths = [4] * n
    directions = np.random.randint(2, size=n)
    points = [basic_corners[[(b + i) % 4 for i in range(4)]][slice(None, None, d*2-1)][:l] + np.random.randn(l, 2) * 0.1 for b, d, l in zip(bases, directions, lengths)]
    return points, directions

## Encoder

In [66]:
class Encoder(nn.Module):
    def __init__(self, hidden_dim, n_features, num_layers=1):
        super(Encoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_features = n_features
        self.num_layers = num_layers
        self.hidden = None
        self.rnn = nn.GRU(self.n_features, self.hidden_dim, self.num_layers, batch_first=True)
        
    def forward(self, X):
        output, self.hidden = self.rnn(X)
        return output

In [5]:
full_seq = torch.tensor([[-1, -1], [-1, 1], [1, 1], [1, -1]]).float().view(1, 4, 2)
source_seq = full_seq[:, :2] # first two corners
target_seq = full_seq[:, 2:] # last two corners

In [6]:
full_seq, source_seq, target_seq

(tensor([[[-1., -1.],
          [-1.,  1.],
          [ 1.,  1.],
          [ 1., -1.]]]),
 tensor([[[-1., -1.],
          [-1.,  1.]]]),
 tensor([[[ 1.,  1.],
          [ 1., -1.]]]))

In [67]:
torch.manual_seed(21)
encoder = Encoder(n_features=2, hidden_dim=2)
hidden_seq = encoder(source_seq)
hidden_final = hidden_seq[:, -1:]

In [39]:
hidden_final

tensor([[[ 0.3105, -0.5263]]], grad_fn=<SliceBackward0>)

## Decoder

In [40]:
class Decoder(nn.Module):
    def __init__(self, hidden_dim, n_features, num_layers=1):
        super(Decoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_features = n_features
        self.num_layers = num_layers
        self.hidden = None
        self.rnn = nn.GRU(self.n_features, self.hidden_dim, self.num_layers, batch_first=True)
        self.regression = nn.Linear(self.hidden_dim, self.n_features)
        
    def init_hidden(self, hidden_seq):
        hidden_final = hidden_seq[:, -1:]
        self.hidden = hidden_final.permute(1, 0, 2)
        
    def forward(self, X):
        output, self.hidden = self.rnn(X, self.hidden)
        last_output = output[:, -1:]
        out = self.regression(last_output)
        
        return out.view(-1, 1, self.n_features)

In [41]:
torch.manual_seed(21)
decoder = Decoder(n_features=2, hidden_dim=2)
decoder.init_hidden(hidden_seq)
inputs = source_seq[:, -1:]

target_len = 2
for i in range(target_len):
    print(f'Hidden {decoder.hidden}')
    out = decoder(inputs)
    print(f'Output {out}\n')
    
    inputs = out

Hidden tensor([[[ 0.3105, -0.5263]]], grad_fn=<PermuteBackward0>)
Output tensor([[[-0.2339,  0.4702]]], grad_fn=<ViewBackward0>)

Hidden tensor([[[ 0.3913, -0.6853]]], grad_fn=<StackBackward0>)
Output tensor([[[-0.0226,  0.4628]]], grad_fn=<ViewBackward0>)



## Teacher forcing

In [47]:
torch.manual_seed(21)
decoder = Decoder(n_features=2, hidden_dim=2)
decoder.init_hidden(hidden_seq)
inputs = source_seq[:, -1:]

teacher_forcing_prob = 0.5
target_len = 2
for i in range(target_len):
    print(f'Hidden {decoder.hidden}')
    out = decoder(inputs)
    print(f'Output {out}\n')
    
    if torch.rand(1) <= teacher_forcing_prob:
        inputs = target_seq[:, i:i+1]
    else:
        inputs = out

Hidden tensor([[[ 0.3105, -0.5263]]], grad_fn=<PermuteBackward0>)
Output tensor([[[-0.2339,  0.4702]]], grad_fn=<ViewBackward0>)

Hidden tensor([[[ 0.3913, -0.6853]]], grad_fn=<StackBackward0>)
Output tensor([[[-0.0226,  0.4628]]], grad_fn=<ViewBackward0>)



## Encoder + Decoder

In [87]:
class EncoderDecoder(nn.Module):
    def __init__(self, encoder, decoder, input_len, target_len, teacher_forcing_prob):
        super(EncoderDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.input_len = input_len
        self.target_len = target_len
        self.teacher_forcing_prob = teacher_forcing_prob
        self.outputs = None
    
    def init_outputs(self, batch_size):
        self.outputs = torch.zeros(batch_size, self.target_len, self.encoder.n_features)
    
    def store_output(self, i, out):
        self.outputs[:, i:i+1, :] = out
        
    def forward(self, X):
        source_seq = X[:, :self.input_len, :]
        target_seq = X[:, self.input_len:, :]
        self.init_outputs(X.shape[0])
        
        hidden_seq = self.encoder(source_seq)
        self.decoder.init_hidden(hidden_seq)
        
        des_inputs = source_seq[:, -1:]
        
        for i in range(self.target_len):
            out = self.decoder(des_inputs)
            self.store_output(i, out)
            
            prob = self.teacher_forcing_prob
            
            if not self.training:
                prob = 0
            
            if torch.rand(1) <= prob:
                des_inputs = target_seq[:, i:i+1, :]
            else:
                des_inputs = out
        return self.outputs

In [88]:
encdec = EncoderDecoder(encoder, decoder, input_len=2, target_len=2, teacher_forcing_prob=0.5)

In [89]:
encdec.train()
encdec(full_seq)

tensor([[[-0.2339,  0.4702],
         [ 0.2265,  0.4529]]], grad_fn=<CopySlices>)

## 附录

## 张量运算

In [79]:
tensor_3d = torch.randn(5, 10, 5)
tensor_3d

tensor([[[-0.4564, -0.7840,  0.1247, -0.9606, -0.6569],
         [-0.7334,  0.1145, -1.0212,  0.1640,  1.6930],
         [ 0.0648,  0.4329, -0.1878, -1.6598,  0.8662],
         [ 2.1388, -0.9376, -1.1139,  0.0666,  0.0629],
         [ 0.8676,  0.4623, -0.1737, -0.4213,  0.6469],
         [-0.9211, -0.1064, -0.9334,  0.9722,  1.2287],
         [-0.0681, -1.2094,  0.0790,  0.3298, -0.7183],
         [-1.6490,  2.4531,  1.4808, -0.9764, -1.3599],
         [-0.4461, -0.8618, -0.1400, -1.4759,  1.7178],
         [ 2.1376,  1.3808, -0.5966, -0.5782, -0.0857]],

        [[-1.5045, -0.0218,  2.0812,  2.8357, -0.4534],
         [-0.6158, -1.6141,  1.2543,  0.5157,  0.4364],
         [-0.2901,  0.6084,  1.3221, -0.0471,  0.4305],
         [-0.5746, -0.1764, -0.0164,  2.0941,  0.2338],
         [ 1.8353, -0.1671, -1.1604, -0.6550, -0.4926],
         [ 0.7152,  0.0294,  0.6620, -0.2393,  1.1722],
         [ 0.0397,  1.8118, -1.2269, -0.4904,  1.2061],
         [ 0.4260, -0.3638,  0.9938, -1.6244, 

In [82]:
tensor_3d[:,-1] #取出每一个batch的最后一个并降低维度

tensor([[ 2.1376,  1.3808, -0.5966, -0.5782, -0.0857],
        [ 0.4172,  0.9692,  1.6687,  0.6110, -1.4135],
        [ 0.6346, -0.0921, -0.2599,  1.8829,  1.0246],
        [-0.8092, -1.5832, -0.7156,  0.2863,  0.8670],
        [ 1.1259, -0.7252, -2.4818, -2.1175, -0.1668]])

In [86]:
tensor_3d[:, -1:,:] #取出每一个batch的最后一个且保留维度 等价于tensor_3d[:, -1:]

tensor([[[ 2.1376,  1.3808, -0.5966, -0.5782, -0.0857]],

        [[ 0.4172,  0.9692,  1.6687,  0.6110, -1.4135]],

        [[ 0.6346, -0.0921, -0.2599,  1.8829,  1.0246]],

        [[-0.8092, -1.5832, -0.7156,  0.2863,  0.8670]],

        [[ 1.1259, -0.7252, -2.4818, -2.1175, -0.1668]]])

In [92]:
tensor_3d[:,0:3,:]

tensor([[[-0.4564, -0.7840,  0.1247, -0.9606, -0.6569],
         [-0.7334,  0.1145, -1.0212,  0.1640,  1.6930],
         [ 0.0648,  0.4329, -0.1878, -1.6598,  0.8662]],

        [[-1.5045, -0.0218,  2.0812,  2.8357, -0.4534],
         [-0.6158, -1.6141,  1.2543,  0.5157,  0.4364],
         [-0.2901,  0.6084,  1.3221, -0.0471,  0.4305]],

        [[-0.5241,  2.4863, -0.3260,  0.4464, -0.0639],
         [-0.5079,  1.5866, -0.3553, -0.1205, -0.3859],
         [-1.6351,  0.2871,  0.7942, -0.8115, -0.1827]],

        [[ 0.1964,  0.3782,  0.5305, -1.0835, -0.1533],
         [-0.0122, -1.3114,  0.1299,  0.8989,  0.0366],
         [ 0.6419,  0.0457, -0.3778,  0.7756,  0.4564]],

        [[ 0.5502, -1.1774,  0.1485,  0.1360,  0.8631],
         [ 1.2926, -0.0440, -1.4618, -0.7746, -0.8272],
         [ 0.5184,  1.3051, -0.1559, -0.7755, -0.4430]]])