# Sequence to Sequence Modeling: Many to Many

Echoing signal $n$ steps is an example of synchronized many-to-many task.

In [14]:
from res.sequential_tasks import EchoData
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

torch.manual_seed(1)
np.random.seed(3)

## 1. Generate Toy Sequence to Sequence Dataset: Signal Echo

In [15]:
batch_size = 5
echo_step = 3
series_length = 20000
BPTT_T = 20

# EchoData provides input and target data for training a network to
# echo a `series_length`-long stream of data. `.x_batch` contains the input series,
# it has shape `[batch_size, series_length]`; `.y_batch` contains the target data,
# it has the same shape as `.x_batch`.
#
# Unlike other training data in this course, successive batches from a single `EchoData`
# object draw from the same stream. For example, in 08-seq_classification, training data
# has the following format:
#
#   [[S11 S12...S1N], [S21 S22...S2N], ..., [SM1 SM2...SMN]]
#
# where `SIJ` represents the `j`th sample drawn from the `i`th stream. 
#
# However, `EchoData` output has the following format (slicing along the batch dimension):
#
#   [[S11 S21...S1N], [S1(N+1) S1(N+2)...S2(2N)], ..., [S1(MN) S1(MN+1)...SM(MNN)]]
#
# This means that successive batches of data drawn from the same `EchoData` object
# are not independent.
train_data = EchoData(
    echo_step=echo_step,
    batch_size=batch_size,
    series_length=series_length,
    truncated_length=BPTT_T
)
total_values_in_one_chunck = batch_size * BPTT_T
train_size = len(train_data)

test_data = EchoData(
    echo_step=echo_step,
    batch_size=batch_size,
    series_length=series_length,
    truncated_length=BPTT_T,
)
test_size = len(test_data)

In [16]:
# Let's print first 20 timesteps of the first sequences to see the echo data:
print('(1st input sequence)  x:', *train_data.x_batch[0, :20], '... ')
print('(1st target sequence) y:', *train_data.y_batch[0, :20], '... ')

(1st input sequence)  x: 1 1 0 1 1 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 ... 
(1st target sequence) y: 0 0 0 1 1 0 1 1 1 0 0 0 0 0 0 1 0 1 1 0 ... 


In [17]:
# batch_size different sequences are created:
print('x_batch:', *(str(d)[1:-1] + ' ...' for d in train_data.x_batch[:, :20]), sep='\n')
print('x_batch size:', train_data.x_batch.shape)
print()
print('y_batch:', *(str(d)[1:-1] + ' ...' for d in train_data.y_batch[:, :20]), sep='\n')
print('y_batch size:', train_data.y_batch.shape)

x_batch:
1 1 0 1 1 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 ...
0 1 0 1 1 0 0 0 1 1 0 0 1 0 1 1 0 0 1 0 ...
1 1 1 1 0 0 0 0 0 1 0 0 0 0 1 1 0 0 1 1 ...
0 1 0 1 0 1 0 0 1 0 1 1 0 1 0 1 1 0 1 0 ...
0 0 1 1 1 1 0 1 1 1 1 0 0 1 1 1 0 0 1 1 ...
x_batch size: (5, 20000)

y_batch:
0 0 0 1 1 0 1 1 1 0 0 0 0 0 0 1 0 1 1 0 ...
0 0 0 0 1 0 1 1 0 0 0 1 1 0 0 1 0 1 1 0 ...
0 0 0 1 1 1 1 0 0 0 0 0 1 0 0 0 0 1 1 0 ...
0 0 0 0 1 0 1 0 1 0 0 1 0 1 1 0 1 0 1 1 ...
0 0 0 0 0 1 1 1 1 0 1 1 1 1 0 0 1 1 1 0 ...
y_batch size: (5, 20000)


In [18]:
# In order to use RNNs data is organized into temporal
# chunks of size [batch_size, T, feature_dim]
print('x_chunk:', *train_data.x_chunks[0].squeeze(), sep='\n')
print('1st x_chunk size:', train_data.x_chunks[0].shape)
print()
print('y_chunk:', *train_data.y_chunks[0].squeeze(), sep='\n')
print('1st y_chunk size:', train_data.y_chunks[0].shape)

x_chunk:
[1 1 0 1 1 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0]
[0 1 0 1 1 0 0 0 1 1 0 0 1 0 1 1 0 0 1 0]
[1 1 1 1 0 0 0 0 0 1 0 0 0 0 1 1 0 0 1 1]
[0 1 0 1 0 1 0 0 1 0 1 1 0 1 0 1 1 0 1 0]
[0 0 1 1 1 1 0 1 1 1 1 0 0 1 1 1 0 0 1 1]
1st x_chunk size: (5, 20, 1)

y_chunk:
[0 0 0 1 1 0 1 1 1 0 0 0 0 0 0 1 0 1 1 0]
[0 0 0 0 1 0 1 1 0 0 0 1 1 0 0 1 0 1 1 0]
[0 0 0 1 1 1 1 0 0 0 0 0 1 0 0 0 0 1 1 0]
[0 0 0 0 1 0 1 0 1 0 0 1 0 1 1 0 1 0 1 1]
[0 0 0 0 0 1 1 1 1 0 1 1 1 1 0 0 1 1 1 0]
1st y_chunk size: (5, 20, 1)


## 2. Define Sequence Model

In [19]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super().__init__()
        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            nonlinearity='relu',
            batch_first=True
        )
        self.linear = nn.Linear(
            in_features=hidden_size,
            out_features=output_size
        )

    def forward(self, x, hidden):
        # In order to model the fact that successive batches belong to the same stream of data,
        # we share the hidden state across successive invocations.
        y, hidden = self.rnn(x, hidden)
        y = self.linear(y)
        return y, hidden
    
# class SimpleGRU(nn.Module):
#     def __init__(self, input_size, hidden_size, output_size, num_layers=1):
#         super().__init__()
#         self.gru = nn.GRU(
#             input_size=input_size,
#             hidden_size=hidden_size,
#             num_layers=num_layers,
#             batch_first=True
#         )
#         self.linear = nn.Linear(
#             in_features=hidden_size,
#             out_features=output_size
#         )

#     def forward(self, x, hidden):
#         # In order to model the fact that successive batches belong to the same stream of data,
#         # we share the hidden state across successive invocations.
#         y, hidden = self.gru(x, hidden)
#         y = self.linear(y)
#         return y, hidden

## 3. Train and Test Function

In [20]:
def train(model, train_data_gen, criterion, optimizer, device):
    model.train()
    
    # New epoch --> fresh hidden state
    hidden = None   
    correct = 0
    for batch_idx in range(train_size):
        data, target = train_data_gen[batch_idx]
        data, target = torch.from_numpy(data).float().to(device), torch.from_numpy(target).float().to(device)
        
        # Forward pass
        if hidden is not None: hidden.detach_()
        # logits, hidden = model(data, hidden)
        logits, _ = model(data, hidden)
        # logits, _ = model(data)
        
        loss = criterion(logits, target)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        pred = (torch.sigmoid(logits) > 0.5)
        correct += (pred == target.byte()).int().sum().item()/total_values_in_one_chunck
        
    return correct, loss.item()

In [21]:
def test(model, test_data_gen, device):
    model.eval()   
    correct = 0
    # New epoch --> fresh hidden state
    hidden = None
    with torch.no_grad():
        for batch_idx in range(test_size):
            data, target = test_data_gen[batch_idx]
            data, target = torch.from_numpy(data).float().to(device), torch.from_numpy(target).float().to(device)
#             logits, hidden = model(data, hidden)
            if hidden is not None: hidden.detach_()
            logits, hidden = model(data, hidden)
            
            pred = (torch.sigmoid(logits) > 0.5)
            correct += (pred == target.byte()).int().sum().item()/total_values_in_one_chunck

    return correct

## 4. Define RNN model

#### Simple RNN

In [22]:
feature_dim = 1 #since we have a scalar series
h_units = 4
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = SimpleRNN(
    input_size=1,
    hidden_size=h_units,
    output_size=feature_dim
).to(device)
        
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.RMSprop(model.parameters(), lr=0.001)

#### Simple GRU

In [23]:
# feature_dim = 1 #since we have a scalar series
# h_units = 4

# model_gru = SimpleGRU(
#     input_size=1,
#     hidden_size=h_units,
#     output_size=feature_dim
# ).to(device)
        
# criterion = nn.BCEWithLogitsLoss()
# optimizer = optim.RMSprop(model_gru.parameters(), lr=0.001)

## 5.Training the model

In [24]:
# training
n_epochs = 5

for epoch in range(1, n_epochs+1):
    correct, loss = train(model, train_data, criterion, optimizer, device)
    train_accuracy = float(correct)*100/ train_size
    print(f'Train Epoch: {epoch}/{n_epochs}, loss: {loss:.3f}, accuracy {train_accuracy:.1f}%')

TypeError: SimpleRNN.forward() missing 1 required positional argument: 'hidden'

In [None]:
#test
correct = test(model, test_data, device)
test_accuracy = float(correct) * 100 / test_size
print(f'Test accuracy: {test_accuracy:.1f}%')

Test accuracy: 100.0%


In [None]:
# Let's try some echoing
my_input = torch.empty(1, 100, 1).random_(2)
hidden = None
my_out, _ = model(my_input.to(device), hidden)
my_pred = torch.where(my_out > .5, 
                      torch.ones_like(my_out), 
                      torch.zeros_like(my_out)).cpu()


# print(my_input.view(1, -1).byte(), my_pred.view(1, -1).byte(), sep='\n')

# Calculate the expected output for our random input
expected = np.roll(my_input, echo_step)
expected[:, :echo_step] = 0
correct = expected == my_pred.numpy()

print(f'Input: {my_input.view(1, -1).byte()}')
print(f'Target: {np.uint(expected.reshape(1, -1))}')
print(f'Prediction: {my_pred.view(1, -1).byte()}')

print(np.ndarray.flatten(correct))

Input: tensor([[1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1,
         0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1,
         0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0,
         0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,
         1, 1, 0, 0]], dtype=torch.uint8)
Target: [[0 0 0 1 1 1 1 0 0 0 1 1 1 1 1 1 0 1 1 0 0 1 0 0 1 1 1 0 1 0 0 1 1 0 1 1
  1 1 0 0 1 1 0 0 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 0 1 1 1 0 1 1 0 1 1 1
  1 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 0 0 1]]
Prediction: tensor([[1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
         1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
         1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,
         1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
         1, 0, 0, 1]], dtype=torch.uint8)
[False False  True  True  True  T