In [None]:
import torch
from torch import nn
from torch import Tensor
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
SEQ_LENGTH = 20
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class RNN(nn.Module):
    def __init__(
        self,
        input_size: int = 1,
        output_size: int = 1,
        hidden_dim: int = 32,
        n_layers: int = 4
    ) -> None:
        super(RNN, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_dim, output_size)

    def forward(self, x: Tensor, hidden: Tensor) -> tuple[Tensor, Tensor]:
        r_out, hidden = self.rnn(x, hidden)
        r_out = r_out.view(-1, self.hidden_dim)  
        
        output = self.fc(r_out)
        
        return output, hidden

In [70]:
def train(
    *,
    rnn: nn.Module,
    n_steps: int,
    optimizer: torch.optim.Optimizer,
    criterion: nn.MSELoss,
    seq_length: int,
    print_every: int | None = None,
) -> tuple[nn.Module, list[float]]:
    rnn.to(DEVICE)
    losses = []
    hidden = None

    for step in range(n_steps):
        time_steps = np.linspace(step * np.pi, step * np.pi + 10, seq_length + 1)
        data = np.sin(time_steps)
        data.resize((seq_length + 1, 1))

        x = data[:-1]
        y = data[1:]

        x_tensor = torch.Tensor(x).unsqueeze(0).to(DEVICE)
        y_tensor = torch.Tensor(y).to(DEVICE)

        prediction, hidden = rnn(x_tensor, hidden)

        hidden = hidden.detach()

        loss = criterion(prediction, y_tensor)
        optimizer.zero_grad()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(rnn.parameters(), max_norm=5)
        optimizer.step()

        losses.append(loss.item())

        if print_every is not None and (step % print_every == 0):
            print('Loss: ', loss.item())
            plt.plot(time_steps[1:], x, 'r.')
            plt.plot(time_steps[1:], prediction.cpu().data.numpy().flatten(), 'b.')
            plt.show()

    return rnn, losses


In [74]:
def find_best(
    *,
    hidden_dim: list[int],
    n_layers: list[int],
    lr: list[float],
    budget: int = 1,
    print_every: int | None = None,
):
    results: list[tuple[int, int, float, float]] = []
    for hd in hidden_dim:
        for nl in n_layers:
            for l in lr:
                n_steps = int(budget // l)
                print(f"... hidden dim: {hd}, Layers: {nl}, LR: {l}, Steps: {n_steps}")

                rnn = RNN(hidden_dim=hd, n_layers=nl)
                criterion = nn.MSELoss()
                optimizer = torch.optim.Adam(rnn.parameters(), lr=l)

                trained_rnn, losses = train(rnn=rnn, n_steps=n_steps, print_every=print_every, optimizer=optimizer, criterion=criterion, seq_length=SEQ_LENGTH)
                min_loss = min(losses)

                results.append((hd, nl, l, min_loss))

    for r in sorted(results, key=lambda x: x[3]):
        print(f"Hidden dim: {r[0]}, Layers: {r[1]}, LR: {r[2]}, Loss: {r[3]}")

    return results


find_best(hidden_dim=[32, 64], n_layers=[1, 2, 4], lr=[0.01, 0.001, 0.0001])


... hidden dim: 32, Layers: 1, LR: 0.01, Steps: 99
... hidden dim: 32, Layers: 1, LR: 0.001, Steps: 999
... hidden dim: 32, Layers: 1, LR: 0.0001, Steps: 9999
... hidden dim: 32, Layers: 2, LR: 0.01, Steps: 99
... hidden dim: 32, Layers: 2, LR: 0.001, Steps: 999
... hidden dim: 32, Layers: 2, LR: 0.0001, Steps: 9999
... hidden dim: 32, Layers: 4, LR: 0.01, Steps: 99
... hidden dim: 32, Layers: 4, LR: 0.001, Steps: 999
... hidden dim: 32, Layers: 4, LR: 0.0001, Steps: 9999
... hidden dim: 64, Layers: 1, LR: 0.01, Steps: 99
... hidden dim: 64, Layers: 1, LR: 0.001, Steps: 999
... hidden dim: 64, Layers: 1, LR: 0.0001, Steps: 9999
... hidden dim: 64, Layers: 2, LR: 0.01, Steps: 99
... hidden dim: 64, Layers: 2, LR: 0.001, Steps: 999
... hidden dim: 64, Layers: 2, LR: 0.0001, Steps: 9999
... hidden dim: 64, Layers: 4, LR: 0.01, Steps: 99
... hidden dim: 64, Layers: 4, LR: 0.001, Steps: 999
... hidden dim: 64, Layers: 4, LR: 0.0001, Steps: 9999
Hidden dim: 64, Layers: 4, LR: 0.001, Loss: 1.

[(32, 1, 0.01, 0.005776212550699711),
 (32, 1, 0.001, 0.0034782779403030872),
 (32, 1, 0.0001, 0.002519418252632022),
 (32, 2, 0.01, 0.0004842009802814573),
 (32, 2, 0.001, 1.7583452063263394e-05),
 (32, 2, 0.0001, 0.00037158626946620643),
 (32, 4, 0.01, 0.001130763441324234),
 (32, 4, 0.001, 0.0005662312032654881),
 (32, 4, 0.0001, 0.004026580136269331),
 (64, 1, 0.01, 0.007574848365038633),
 (64, 1, 0.001, 0.0038084990810602903),
 (64, 1, 0.0001, 0.0014218128053471446),
 (64, 2, 0.01, 0.003138075117021799),
 (64, 2, 0.001, 0.00014805930550210178),
 (64, 2, 0.0001, 0.00021140110038686544),
 (64, 4, 0.01, 0.0024483713787049055),
 (64, 4, 0.001, 1.422892546543153e-05),
 (64, 4, 0.0001, 0.0006910807569511235)]