In [1]:
import torch
import torch.nn as nn
import torch.autograd as autograd

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.Wxh = torch.nn.Parameter(torch.randn(input_size, hidden_size))
        self.Whh = torch.nn.Parameter(torch.randn(hidden_size, hidden_size))
        self.Why = torch.nn.Parameter(torch.randn(hidden_size, output_size))
        self.bh = torch.nn.Parameter(torch.randn(hidden_size))
        self.by = torch.nn.Parameter(torch.randn(output_size))
        
    def forward(self, inputs, hprev):
        hs, ys, ps = {}, {}, {}
        hs[0] = hprev
        for t in range(len(inputs)):
            xs = inputs[t]
            hs[t+1] = torch.tanh(torch.matmul(xs, self.Wxh) + torch.matmul(hs[t], self.Whh) + self.bh)
            ys[t] = torch.matmul(hs[t+1], self.Why) + self.by
            ps[t] = torch.softmax(ys[t], dim=1)
        return ps, hs[len(inputs)]

seq_len = 25
hidden_size = 128
num_layers = 2
vocab_size = 65

inputs = autograd.Variable(torch.randn(seq_len, vocab_size))
hprev = autograd.Variable(torch.randn(num_layers, hidden_size))

rnn = RNN(vocab_size, hidden_size, vocab_size)
ps, hprev = rnn(inputs, hprev)

# create the graph
scalar_output = torch.sum(ps[len(inputs)-1])
g = autograd.grad(scalar_output, rnn.parameters(), create_graph=True)


In [3]:
from torchviz import make_dot
make_dot(g).render('rnn_dag', format='png')

'rnn_dag.png'