<a href="https://colab.research.google.com/github/algodigger/playground/blob/main/RNN_vs_GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The purpose of the notebook to demonstrate differences between RNN and GRU

In [21]:
import torch
import torch.nn as nn

In [28]:
class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, num_layers):
    super(RNN, self).__init__()
    self.hidden_size = hidden_size
    self.rnn = nn.RNN(input_size, hidden_size, num_layers=num_layers,  batch_first=True)
    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, x):
    x, _ = self.rnn(x)
    x = self.fc(x)
    return x


In [29]:
class GRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(GRU, self).__init__()
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
      x, _ = self.gru(x)
      x = self.fc(x)
      return x

#### Generate data

In [30]:
data = []
for i in range(1000):
  data.append((list(range(i, i+10)), list(range(i+1, i+1+10))))
  # data = [(list(range(10)), list(range(1, 11))), (list(range(10, 20)), list(range(11, 21)))]

random.shuffle(data)

print(data[0])


([674, 675, 676, 677, 678, 679, 680, 681, 682, 683], [675, 676, 677, 678, 679, 680, 681, 682, 683, 684])


In [87]:
def train_RNN_GRU(input_size, hidden_size, output_size, num_layers, lr, epochs, data):
    # Initialize models
    rnn_model = RNN(input_size, hidden_size, output_size, num_layers=num_layers)
    gru_model = GRU(input_size, hidden_size, output_size, num_layers=num_layers)

    # Define loss function and optimizer
    criterion = nn.MSELoss()
    rnn_optimizer = torch.optim.Adam(rnn_model.parameters(), lr=lr)
    gru_optimizer = torch.optim.Adam(gru_model.parameters(), lr=lr)

    best_rnn_model_state = None
    best_gru_model_state = None
    best_rnn_loss = float('inf')
    best_gru_loss = float('inf')

    for epoch in range(epochs):
        rnn_total_loss = 0
        gru_total_loss = 0
        for input_sequence, target in data:
            input_sequence = torch.Tensor(input_sequence).view(len(input_sequence), 1, -1)
            target = torch.Tensor(target).view(len(target), -1)
            rnn_optimizer.zero_grad()
            output_seq = rnn_model(input_sequence)
            rnn_loss = criterion(output_seq, target)
            rnn_loss.backward()
            rnn_optimizer.step()
            rnn_total_loss += rnn_loss.item()

            gru_optimizer.zero_grad()
            output_seq = gru_model(input_sequence)
            gru_loss = criterion(output_seq, target)
            gru_loss.backward()
            gru_optimizer.step()
            gru_total_loss += gru_loss.item()

        rnn_avg_loss = rnn_total_loss / len(data)
        gru_avg_loss = gru_total_loss / len(data)

        # Save best models based on validation loss
        if rnn_avg_loss < best_rnn_loss:
            best_rnn_loss = rnn_avg_loss
            best_rnn_model_state = rnn_model.state_dict()

        if gru_avg_loss < best_gru_loss:
            best_gru_loss = gru_avg_loss
            best_gru_model_state = gru_model.state_dict()

        print(f"Epoch {epoch+1}, RNN Loss: {rnn_avg_loss}, GRU Loss: {gru_avg_loss}")

    rnn_model.load_state_dict(best_rnn_model_state)
    gru_model.load_state_dict(best_gru_model_state)

    return rnn_model, gru_model


In [88]:
# Test the model
def test(model, data, criterion = nn.MSELoss()):
  total_loss = 0
  for input_sequence, target in data:
    input_sequence = torch.Tensor(input_sequence).view(len(input_sequence), 1, -1)
    target = torch.Tensor(target).view(len(target), -1)
    output = model(input_sequence)
    total_loss += criterion(output, target).item()
    return total_loss / len(data)
  print(f"Test loss is {total_loss}")

In [89]:
def predict(rnn_model, gru_model, seuqence=list(range(10, 20))):
  input_sequence = torch.Tensor(seuqence).view(10, 1, -1)

  rnn_output = rnn_model(input_sequence)
  gru_output = gru_model(input_sequence)

  # print(rnn_output)
  # print(rnn_output)


  print(f"Sequence {seuqence}")
  print(f'RNN Predicted next number: {rnn_output[-1].item():.4f}')
  print(f'GRU Predicted next number: {gru_output[-1].item():.4f}')


#### Train with different sets of hyperparams


In [93]:
input_size = 1
hidden_size = 32
output_size = 1
number_layers = 1
lr = 0.001
epochs = 100
rnn_model_l1, gru_model_l2 = train_RNN_GRU(input_size, hidden_size, output_size, number_layers, lr, epochs, data)
predict(rnn_model_l1, gru_model_l2)

Epoch 1, RNN Loss: 324846.49424635124, GRU Loss: 323921.0062001715
Epoch 2, RNN Loss: 298276.3157689858, GRU Loss: 295318.265740613
Epoch 3, RNN Loss: 273870.03074251843, GRU Loss: 270655.40552967455
Epoch 4, RNN Loss: 251327.6367607479, GRU Loss: 248202.42705220127
Epoch 5, RNN Loss: 230458.514427166, GRU Loss: 227499.39215038682
Epoch 6, RNN Loss: 211124.49441254424, GRU Loss: 208339.30668241787
Epoch 7, RNN Loss: 193189.3069345684, GRU Loss: 190577.19956401348
Epoch 8, RNN Loss: 176548.91586934854, GRU Loss: 174118.41173921202
Epoch 9, RNN Loss: 161135.39280129145, GRU Loss: 158899.16072394658
Epoch 10, RNN Loss: 146890.49494703102, GRU Loss: 144824.33179019546
Epoch 11, RNN Loss: 133707.37434609796, GRU Loss: 131801.87572152805
Epoch 12, RNN Loss: 121559.41763469887, GRU Loss: 119807.37838124276
Epoch 13, RNN Loss: 110366.97046214485, GRU Loss: 108765.98821656418
Epoch 14, RNN Loss: 100051.6121596775, GRU Loss: 98588.0135194397
Epoch 15, RNN Loss: 90617.64483088112, GRU Loss: 89277

In [96]:
input_size = 1
hidden_size = 32
output_size = 1
number_layers = 5
lr = 0.01
epochs = 100
rnn_model_l5, gru_model_l5 = train_RNN_GRU(input_size, hidden_size, output_size, number_layers, lr, epochs, data)
predict(rnn_model_l5, gru_model_l5)

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1, RNN Loss: 247270.5941183157, GRU Loss: 226722.547326581
Epoch 2, RNN Loss: 126064.52194473839, GRU Loss: 117949.37899271488
Epoch 3, RNN Loss: 91461.65756077194, GRU Loss: 259824.19182653047
Epoch 4, RNN Loss: 84566.93563735866, GRU Loss: 313696.14662215137
Epoch 5, RNN Loss: 83577.81072402191, GRU Loss: 305167.4303747692
Epoch 6, RNN Loss: 83466.45307060433, GRU Loss: 297163.34994507313
Epoch 7, RNN Loss: 83456.52640641308, GRU Loss: 289451.19838191033
Epoch 8, RNN Loss: 83456.18189370251, GRU Loss: 281957.446958395
Epoch 9, RNN Loss: 83456.37611361886, GRU Loss: 274656.01950138283
Epoch 10, RNN Loss: 83456.47313938999, GRU Loss: 267536.6931286602
Epoch 11, RNN Loss: 83456.50681081772, GRU Loss: 260595.29507275962
Epoch 12, RNN Loss: 83456.51857797241, GRU Loss: 253829.47724011232
Epoch 13, RNN Loss: 83456.52030560684, GRU Loss: 247237.83882396508
Epoch 14, RNN Loss: 83456.52202852344, GRU Loss: 240818.95086821652
Epoch 15, RNN Loss: 83456.52110721874, GRU Loss: 234571.508359

In [59]:
import torch
import torch.nn as nn
import random

# Define the model
class NextNumberPredictor(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(NextNumberPredictor, self).__init__()
    self.lstm = nn.LSTM(input_size, hidden_size)
    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, x):
    x, _ = self.lstm(x)
    x = self.fc(x)
    return x

# Train the model
def train(model, data, loss_fn, optimizer, num_epochs):
  for epoch in range(num_epochs):
    for input_sequence, target in data:
      input_sequence = torch.Tensor(input_sequence).view(len(input_sequence), 1, -1)
      target = torch.Tensor(target).view(len(target), -1)

      # Forward pass
      output = model(input_sequence)
      loss = loss_fn(output, target)

      # Backward pass
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    print(f"Loss {loss}")

# Test the model
def test(model, data, loss_fn):
  total_loss = 0
  for input_sequence, target in data:
    input_sequence = torch.Tensor(input_sequence).view(len(input_sequence), 1, -1)
    target = torch.Tensor(target).view(len(target), -1)
    output = model(input_sequence)
    total_loss += loss_fn(output, target).item()
    return total_loss / len(data)


# Setup the model, data, loss function and optimizer
model = NextNumberPredictor(1, 32, 1)
data = []
for i in range(1000):
  data.append((list(range(i, i+10)), list(range(i+1, i+1+10))))
  # data = [(list(range(10)), list(range(1, 11))), (list(range(10, 20)), list(range(11, 21)))]

random.shuffle(data)

loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

# Train the model
train(model, data, loss_fn, optimizer, num_epochs=100)

# Use the model to make predictions
input_sequence = torch.Tensor(list(range(10, 20))).view(10, 1, -1)
output = model(input_sequence)
prediction = output[-1].item()
print(output)
print(f'Predicted next number: {prediction:.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)


Loss 425070.625
Loss 393745.40625
Loss 363516.875
Loss 335637.3125
Loss 308894.125
Loss 281507.6875
Loss 256652.34375
Loss 233518.015625
Loss 211867.375
Loss 191599.59375
Loss 171252.078125
Loss 153077.09375
Loss 136268.6875
Loss 120735.8125
Loss 106401.40625
Loss 93225.8125
Loss 81173.3828125
Loss 70096.2421875
Loss 60069.15625
Loss 51006.3984375
Loss 42868.25390625
Loss 35617.8671875
Loss 29212.080078125
Loss 23564.115234375
Loss 18723.8828125
Loss 14592.8037109375
Loss 11103.7548828125
Loss 8298.5341796875
Loss 6108.85693359375
Loss 4475.17138671875
Loss 3477.737548828125
Loss 2625.169677734375
Loss 2127.0048828125
Loss 2353.9716796875
Loss 2658.392822265625
Loss 1649.530029296875
Loss 1414.6949462890625
Loss 1160.3497314453125
Loss 982.2234497070312
Loss 1121.247314453125
Loss 994.4168090820312
Loss 810.0857543945312
Loss 685.956298828125
Loss 590.5377197265625
Loss 900.513916015625
Loss 778.13623046875
Loss 635.4649047851562
Loss 796.5457763671875
Loss 667.1177978515625
Loss 635.4