In [90]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, is_bidirectional, device):
        super().__init__()
        self.emb_size = 50
        self.emb = nn.Embedding(input_size, self.emb_size)
        
        self.rnn = nn.RNN(input_size=self.emb_size, hidden_size=hidden_size, 
                          batch_first=True,num_layers=num_layers,bidirectional=is_bidirectional,device=device)
        self.linear = nn.Linear(hidden_size, input_size)
        self.input_size = input_size
        self.hidden_size = hidden_size

    def forward(self, X, H=None):
        #X = F.one_hot(X, self.input_size).type(torch.float32)
        X = self.emb(X)
        Y, H = self.rnn(X, H)
        O = self.linear(Y.reshape(-1, self.hidden_size))
        return O, H

In [91]:
from tqdm import tqdm
import torch.optim as optim
from d2l import torch as d2l
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter()

batch_size, num_steps, use_random_iter = 32, 35, True
train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps, use_random_iter)
epochs, lr , num_hidden, num_feature = 500, 0.001, 500, len(vocab)

device = 'cpu'
net = RNNModel(input_size=num_feature, hidden_size=num_hidden, 
                num_layers=2,is_bidirectional=False, device=device)

updater = optim.Adam(net.parameters(), lr=lr)
criteria = nn.CrossEntropyLoss()

step = 0
bar = tqdm(range(epochs))
for epoch in bar:
    H = None
    for x, y in train_iter:
        y_hat, _ = net(x)
        loss = criteria(y_hat.reshape(-1, num_feature), y.reshape(-1).to(device))
        updater.zero_grad()
        loss.backward()

        d2l.grad_clipping(net, 1)
        updater.step()

        step += 1
        writer.add_scalar("loss", torch.exp(loss.detach().to('cpu')).item(), step)
        bar.set_postfix_str(str(torch.exp(loss.detach())))

100%|██████████| 500/500 [03:08<00:00,  2.66it/s, tensor(1.2884)]


In [116]:
prefix = 'the time machine'
new_prefix = torch.tensor(vocab[[p for p in prefix]])

output = [new_prefix[-1]]

net.eval()
Y, H = net(new_prefix[:-1]) #[16, 1, 5], unbatched_input
for idx in range(100):
    Y, H = net(output[-1].reshape(-1), H)
    output.append(torch.argmax(Y, dim=1))

print(prefix + ''.join(vocab.to_tokens(output[1:])))

the time machine by h g wellsithe time traveller smiled are you sure we can move freely inspace right and left we ca


In [114]:
#for name, weight in net.named_parameters():
#    print(name)
#    print(weight.requires_grad_(True))
#    break

print(net.state_dict().keys())

print(net.state_dict()['rnn.bias_ih_l0'])

odict_keys(['emb.weight', 'rnn.weight_ih_l0', 'rnn.weight_hh_l0', 'rnn.bias_ih_l0', 'rnn.bias_hh_l0', 'rnn.weight_ih_l1', 'rnn.weight_hh_l1', 'rnn.bias_ih_l1', 'rnn.bias_hh_l1', 'linear.weight', 'linear.bias'])
tensor([-7.0853e-03, -1.2100e-01, -2.6153e-02,  3.0991e-02, -2.1006e-02,
         4.4128e-02, -4.9681e-02,  2.8095e-02,  1.2596e-03, -4.6422e-02,
         4.0742e-02,  9.4246e-02,  2.4218e-02, -1.7016e-02, -6.3222e-02,
        -3.8537e-03, -2.3097e-03,  5.7354e-02,  4.7982e-02, -7.9586e-02,
         2.5663e-02, -5.3639e-02,  4.8687e-03,  1.0056e-02,  4.1611e-02,
         2.7002e-02, -8.7209e-03, -7.4684e-02,  1.0778e-01, -3.5348e-02,
        -5.5436e-02, -4.2964e-02, -7.6158e-03, -1.0479e-01,  1.4320e-02,
        -2.0866e-02,  1.8573e-02,  1.4275e-01,  2.1370e-02, -3.8222e-02,
         5.2647e-02,  6.3794e-02, -9.9914e-02,  8.8622e-02, -3.6916e-02,
        -6.0078e-02,  5.8160e-02,  8.4744e-03,  9.7092e-02, -5.5710e-02,
        -3.5376e-02,  1.1966e-02, -6.8065e-02, -2.3710e-03,