<a href="https://colab.research.google.com/github/ariahosseini/DeepML/blob/main/033_PyTorch_Proj_ThirtyThree_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim


**RNN:**
*   The input tensor of shape (seq_len, batch, input_size) if batch_first=False or (batch, seq_len, input_size) if batch_first=True

*   The term hx (Tensor, optional): Initial hidden state for each element in the batch. Defaults to zero if not provided. It should have the shape (num_layers * num_directions, batch, hidden_size)


*   The output (Tensor): Output features from the last layer of the RNN for each time step. Shape is (seq_len, batch, num_directions * hidden_size) if batch_first=False or (batch, seq_len, num_directions * hidden_size) if batch_first=True

*   The term hn (Tensor): Hidden state for t=seq_len. Shape is (num_layers * num_directions, batch, hidden_size)

## RNN One to One

In [None]:
class RNN_OnetoOne(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN_OnetoOne, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        hx = torch.zeros(1, x.size(0), self.hidden_size)
        out, _ = self.rnn(x, hx)
        out = self.fc(out[:, -1, :])
        return out

In [None]:
# hyperparameters
input_size = 1
hidden_size = 20
output_size = 1
num_epochs = 21
learning_rate = 0.01

In [None]:
# synthetic data
time_steps = torch.linspace(0, 10, 100)
data = torch.sin(time_steps).unsqueeze(1).unsqueeze(1)  # shape: (100, 1, 1)
data.shape, data[0], data[1], data[-1]

(torch.Size([100, 1, 1]),
 tensor([[0.]]),
 tensor([[0.1008]]),
 tensor([[-0.5440]]))

In [None]:
# model, loss function, optimizer
model = RNN_OnetoOne(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
data[:-1].shape, data[1:].shape, data[:-1].size(), data[:-1].size(0)

(torch.Size([99, 1, 1]), torch.Size([99, 1, 1]), torch.Size([99, 1, 1]), 99)

In [None]:
# training loop
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(data[:-1])
    loss = criterion(outputs, data[1:])
    loss.backward()
    optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/21], Loss: 0.4471
Epoch [20/21], Loss: 0.4463


  return F.mse_loss(input, target, reduction=self.reduction)


In [None]:
# test the model
test_input = torch.tensor([0.1]).unsqueeze(-1).unsqueeze(-1)  # shape: (1, 1, 1)
model.eval()
with torch.no_grad():
    predicted = model(test_input)
    print(f'Predicted value: {predicted.item():.4f}')

Predicted value: 0.1619


## RNN One to Many

In [None]:
class RNN_OnetoMany(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, seq_length):
        super(RNN_OnetoMany, self).__init__()
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        hx = torch.zeros(1, x.size(0), self.hidden_size)
        out, _ = self.rnn(x, hx)
        out = self.fc(out)
        return out

In [None]:
# hyperparameters
input_size = 1
hidden_size = 20
output_size = 1
seq_length = 10
num_epochs = 21
learning_rate = 0.01

In [None]:
# synthetic data
time_steps = torch.linspace(0, 10, 100)
data = torch.sin(time_steps).unsqueeze(1)  # shape: (100, 1)
input_data = data[:-seq_length]
target_data = torch.stack([data[i:i+seq_length] for i in range(len(data)-seq_length)])

In [None]:
data.shape, input_data.shape, target_data.shape

(torch.Size([100, 1]), torch.Size([90, 1]), torch.Size([90, 10, 1]))

In [None]:
# model, loss function, optimizer
model = RNN_OnetoMany(input_size, hidden_size, output_size, seq_length)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# training loop
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(input_data.unsqueeze(1))
    loss = criterion(outputs, target_data)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/21], Loss: 0.2778
Epoch [20/21], Loss: 0.1272


  return F.mse_loss(input, target, reduction=self.reduction)


In [None]:
# test the model
test_input = torch.tensor([0.20, 0.20, 0.5]).unsqueeze(-1).unsqueeze(0)  # shape: (1, 3, 1)
model.eval()
with torch.no_grad():
    predicted = model(test_input)
    print(f'Predicted sequence: {predicted.squeeze().numpy()}')

Predicted sequence: [0.22378036 0.12255333 0.45099804]


## RNN Many to One

In [None]:
# define the RNN model
class SentimentRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(SentimentRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        hx = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        out, hn = self.rnn(x, hx)
        out = self.fc(out[:, -1, :])  # take the output of the last time step
        return out

In [None]:
# hyperparameters
input_size = 50   # example: size of word embeddings
hidden_size = 100
output_size = 1   # sentiment score (e.g., positive or negative)
num_layers = 1
seq_length = 10   # example: length of the sentence
batch_size = 16
num_epochs = 100
learning_rate = 0.01

In [None]:
# synthetic data
x = torch.randn(batch_size, seq_length, input_size)
y = torch.randn(batch_size, output_size)

In [None]:
# initialize the model, loss function, and optimizer
model = SentimentRNN(input_size, hidden_size, output_size, num_layers)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# training loop
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(x)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.0078
Epoch [20/100], Loss: 0.0064
Epoch [30/100], Loss: 0.0027
Epoch [40/100], Loss: 0.0005
Epoch [50/100], Loss: 0.0001
Epoch [60/100], Loss: 0.0001
Epoch [70/100], Loss: 0.0000
Epoch [80/100], Loss: 0.0000
Epoch [90/100], Loss: 0.0000
Epoch [100/100], Loss: 0.0000


In [None]:
# test the model
model.eval()
with torch.no_grad():
    test_input = torch.randn(batch_size, seq_length, input_size)
    test_output = model(test_input)
    print(f'Test output: {test_output}')

Test output: tensor([[ 0.1488],
        [-0.0536],
        [-0.0390],
        [-0.3128],
        [-0.1177],
        [ 0.4725],
        [-0.6706],
        [-0.0905],
        [-0.1791],
        [ 0.8394],
        [-0.7296],
        [-0.7030],
        [-0.2583],
        [-0.7808],
        [-0.2949],
        [-0.8092]])


## RNN Many to Many Aligned

In [None]:
class RNN_ManyToManyAligned(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN_ManyToManyAligned, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        hx = torch.zeros(1, x.size(0), self.hidden_size)
        out, _ = self.rnn(x, hx)
        out = self.fc(out)
        return out

In [None]:
# hyperparameters
input_size = 50  # example: word embedding size
hidden_size = 100
output_size = 10  # example: number of POS tags
seq_length = 7  # example: sentence length
num_epochs = 2
learning_rate = 0.01

In [None]:
# synthetic data
x = torch.randn(20, seq_length, input_size)  # 20 sentences, each of length 7 with 50 features
y = torch.randint(0, output_size, (20, seq_length))  # POS tags for each word
x.shape, y.shape

(torch.Size([20, 7, 50]), torch.Size([20, 7]))

In [None]:
y

tensor([[6, 8, 6, 8, 1, 7, 3],
        [4, 1, 6, 5, 6, 2, 7],
        [4, 2, 5, 1, 3, 2, 1],
        [9, 0, 8, 9, 4, 0, 7],
        [0, 8, 4, 8, 6, 9, 3],
        [2, 5, 6, 2, 1, 5, 7],
        [5, 7, 8, 9, 6, 9, 0],
        [5, 4, 8, 7, 0, 8, 5],
        [6, 7, 3, 0, 3, 1, 9],
        [3, 8, 0, 6, 4, 1, 5],
        [7, 9, 4, 9, 7, 9, 5],
        [2, 0, 9, 7, 6, 4, 9],
        [3, 8, 3, 6, 5, 6, 9],
        [4, 3, 1, 1, 5, 4, 2],
        [1, 1, 0, 2, 0, 7, 1],
        [8, 2, 4, 8, 8, 5, 8],
        [5, 3, 4, 8, 0, 3, 8],
        [7, 8, 5, 3, 5, 8, 8],
        [7, 6, 0, 8, 8, 1, 8],
        [2, 6, 2, 9, 4, 4, 8]])

In [None]:
y.view(-1)

tensor([6, 8, 6, 8, 1, 7, 3, 4, 1, 6, 5, 6, 2, 7, 4, 2, 5, 1, 3, 2, 1, 9, 0, 8,
        9, 4, 0, 7, 0, 8, 4, 8, 6, 9, 3, 2, 5, 6, 2, 1, 5, 7, 5, 7, 8, 9, 6, 9,
        0, 5, 4, 8, 7, 0, 8, 5, 6, 7, 3, 0, 3, 1, 9, 3, 8, 0, 6, 4, 1, 5, 7, 9,
        4, 9, 7, 9, 5, 2, 0, 9, 7, 6, 4, 9, 3, 8, 3, 6, 5, 6, 9, 4, 3, 1, 1, 5,
        4, 2, 1, 1, 0, 2, 0, 7, 1, 8, 2, 4, 8, 8, 5, 8, 5, 3, 4, 8, 0, 3, 8, 7,
        8, 5, 3, 5, 8, 8, 7, 6, 0, 8, 8, 1, 8, 2, 6, 2, 9, 4, 4, 8])

In [None]:
# model, loss function, optimizer
model = RNN_ManyToManyAligned(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# training loop
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(x)
    loss = criterion(outputs.view(-1, output_size), y.view(-1))
    print(outputs.shape, outputs.view(-1, output_size).shape, y.view(-1).shape)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

torch.Size([20, 7, 10]) torch.Size([140, 10]) torch.Size([140])
torch.Size([20, 7, 10]) torch.Size([140, 10]) torch.Size([140])


In [None]:
# test the model
test_input = torch.randn(1, seq_length, input_size)
model.eval()
with torch.no_grad():
    predicted = model(test_input)
    predicted_tags = torch.argmax(predicted, dim=2)
    print(f'Predicted POS tags: {predicted_tags}')

Predicted POS tags: tensor([[3, 5, 8, 5, 8, 0, 5]])


## RNN Many to Many Non-Aligned

In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)

    def forward(self, x):
        hx = torch.zeros(1, x.size(0), self.hidden_size)
        out, hidden = self.rnn(x, hx)
        return out, hidden

class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(output_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out)
        return out, hidden

In [None]:
# hyperparameters
input_size = 50  # example: word embedding size for source language
hidden_size = 100
output_size = 50  # example: word embedding size for target language
seq_length_in = 7  # example: source sentence length
seq_length_out = 5  # example: target sentence length
num_epochs = 100
learning_rate = 0.01

In [None]:
# generate synthetic data
encoder_input = torch.randn(20, seq_length_in, input_size)  # 20 source sentences
decoder_input = torch.randn(20, seq_length_out, output_size)  # 20 target sentences
encoder_input.shape, decoder_input.shape

(torch.Size([20, 7, 50]), torch.Size([20, 5, 50]))

In [None]:
# model, loss function, optimizer
encoder = EncoderRNN(input_size, hidden_size)
decoder = DecoderRNN(hidden_size, output_size)
criterion = nn.MSELoss()
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

In [None]:
# training loop
for epoch in range(num_epochs):
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    encoder_output, encoder_hidden = encoder(encoder_input)
    decoder_output, _ = decoder(decoder_input, encoder_hidden)
    loss = criterion(decoder_output, decoder_input)
    loss.backward()
    encoder_optimizer.step()
    decoder_optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.1496
Epoch [20/100], Loss: 0.0415
Epoch [30/100], Loss: 0.0135
Epoch [40/100], Loss: 0.0046
Epoch [50/100], Loss: 0.0016
Epoch [60/100], Loss: 0.0006
Epoch [70/100], Loss: 0.0002
Epoch [80/100], Loss: 0.0001
Epoch [90/100], Loss: 0.0000
Epoch [100/100], Loss: 0.0000


In [None]:
# test the model
test_input = torch.randn(1, seq_length_in, input_size)
test_decoder_input = torch.randn(1, seq_length_out, output_size)
encoder.eval()
decoder.eval()
with torch.no_grad():
    test_encoder_output, test_encoder_hidden = encoder(test_input)
    predicted, _ = decoder(test_decoder_input, test_encoder_hidden)
    print(f'Predicted sequence: {predicted.squeeze().numpy()}')

Predicted sequence: [[-9.58063245e-01 -2.45522428e+00 -4.06330258e-01  7.63738394e-01
  -3.15582603e-01 -1.42913544e+00  1.24821448e+00  8.91585469e-01
   6.63285077e-01  3.54497463e-01  2.33842790e-01 -1.31666267e+00
  -1.44796324e+00 -1.48879087e+00  1.47157431e+00  2.63503581e-01
  -8.45830142e-01 -1.11829735e-01  1.10252976e+00 -8.63244653e-01
  -3.97329498e-03  7.90493309e-01  1.13649726e+00 -1.30242378e-01
   1.17026579e+00 -1.25789240e-01 -1.34844527e-01 -7.98585340e-02
  -5.54546192e-02 -4.72856045e-01  4.10403386e-02 -7.20104337e-01
  -3.84937853e-01  1.38411522e+00 -3.14527124e-01  1.72361463e-01
   2.28510022e-01  7.45017529e-01 -1.89043653e+00  1.33439553e+00
  -8.93124491e-02 -6.20789409e-01  4.37665552e-01  5.19401908e-01
  -1.94509768e+00  3.64615321e-01 -6.85808420e-01 -9.45431590e-01
  -4.51202363e-01 -1.25475675e-01]
 [ 2.09453776e-01 -3.06074679e-01 -1.46475032e-01 -4.80144560e-01
   2.64177561e-01 -4.76834893e-01  5.97176373e-01 -7.20708430e-01
  -4.89589155e-01 -1.

## RNN Multi Layer

In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=2):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        out, hidden = self.rnn(x, h0)
        return out, hidden

class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers=2):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(output_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out)
        return out, hidden

In [None]:
# hyperparameters
input_size = 50   # example: size of word embeddings for the input sequence
hidden_size = 100
output_size = 50  # example: size of word embeddings for the output sequence
num_layers = 2    # multi-layer RNN
seq_length_in = 7  # length of the input sequence
seq_length_out = 5  # length of the output sequence
batch_size = 16
num_epochs = 100
learning_rate = 0.01

In [None]:
# synthetic data
encoder_input = torch.randn(batch_size, seq_length_in, input_size)  # batch of input sequences
decoder_input = torch.randn(batch_size, seq_length_out, output_size)  # batch of output sequences

In [None]:
# initialize the encoder and decoder
encoder = EncoderRNN(input_size, hidden_size, num_layers)
decoder = DecoderRNN(hidden_size, output_size, num_layers)

In [None]:
# loss function and optimizer
criterion = nn.MSELoss()
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

In [None]:
# training loop
for epoch in range(num_epochs):
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    # forward pass through the encoder
    encoder_output, encoder_hidden = encoder(encoder_input)
    # use the encoder's final hidden state from all layers as the decoder's initial hidden state
    decoder_output, _ = decoder(decoder_input, encoder_hidden)
    # compute loss
    loss = criterion(decoder_output, decoder_input)
    loss.backward()
    # update parameters
    encoder_optimizer.step()
    decoder_optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.1439
Epoch [20/100], Loss: 0.0334
Epoch [30/100], Loss: 0.0096
Epoch [40/100], Loss: 0.0034
Epoch [50/100], Loss: 0.0011
Epoch [60/100], Loss: 0.0004
Epoch [70/100], Loss: 0.0002
Epoch [80/100], Loss: 0.0001
Epoch [90/100], Loss: 0.0002
Epoch [100/100], Loss: 0.0001


In [None]:
# test the model
test_input = torch.randn(1, seq_length_in, input_size)
test_decoder_input = torch.randn(1, seq_length_out, output_size)
encoder.eval()
decoder.eval()
with torch.no_grad():
    test_encoder_output, test_encoder_hidden = encoder(test_input)
    test_decoder_output, _ = decoder(test_decoder_input, test_encoder_hidden)
    print(f'Test output: {test_decoder_output}')

Test output: tensor([[[ 0.0783, -1.2449,  0.5573,  0.3132,  0.6333, -0.1096,  0.3452,
          -0.8341, -1.2519, -0.2495, -1.5950, -0.2431,  0.8112,  0.3485,
          -0.9840,  1.1266,  0.7343, -0.1157, -1.4637,  0.2465,  0.2509,
          -0.5281,  0.4140, -1.0209, -0.0131, -1.4702,  0.8495,  0.6432,
          -0.3199,  0.5975, -0.0654,  0.3880,  1.6320,  0.4710, -0.0626,
           0.0529,  3.1643,  0.8532, -1.3011, -0.5377,  0.7130, -1.0162,
          -0.0842, -0.8379, -1.2955, -1.2935, -0.8112, -2.0623, -0.0325,
          -0.9914],
         [ 0.0707, -0.3950, -1.1536,  0.6344,  0.3251, -1.3754,  0.2431,
          -0.3241,  0.6557, -1.5561,  0.5470,  0.5343,  1.8054, -0.4889,
           0.1326, -0.2897, -0.1422, -0.7454,  1.7426, -0.7533,  1.0297,
          -0.1369,  0.1271, -0.9549,  1.4835,  1.0893, -1.6190,  0.2928,
          -0.8291, -0.1619,  0.1448, -1.0044,  1.0291,  0.2951,  2.0484,
           1.9221, -0.2311,  0.0637,  0.3052, -0.1799, -0.3693, -1.4337,
           0.4723,