In [11]:
import torch
import torch.nn as nn
from torch.autograd import Variable

torch.manual_seed(7)

<torch._C.Generator at 0x7efffbb45310>

In [12]:
class ForwardNet(nn.Module):
    def __init__(self):
        super(ForwardNet, self).__init__()
        self.fc1 = nn.Linear(3, 4)
        self.fc2 = nn.Linear(4, 5)
        self.fc3 = nn.Linear(5, 6)
        self.fc4 = nn.Linear(6, 7)
        self.layers = nn.ModuleList([self.fc1, self.fc2, self.fc3, self.fc4])

    def forward(self, x):
        self.output = []
        self.input = []
        for layer in self.layers:
            # detach from previous history
            x = Variable(x.data, requires_grad=True)
            self.input.append(x)
            # compute output
            x = layer(x)
            # add to list of outputs
            self.output.append(x)
        return x

    def backward(self, g):
        for i, output in reversed(list(enumerate(self.output))):
            if i == (len(self.output) - 1):
                # for last node, use g
                output.backward(g)
            else:
                output.backward(self.input[i+1].grad.data)
            print(i, self.input[i].grad.data.sum())

In [13]:
model = ForwardNet()
model.eval()
inp = Variable(torch.randn(4, 3))
output = model(inp)
gradients = torch.randn(*output.size())
model.backward(gradients)

3 tensor(-2.2411)
2 tensor(-0.2933)
1 tensor(-0.3095)
0 tensor(-0.1896)


In [14]:
class FeedbackNet(nn.Module):
    def __init__(self, batch_size):
        super(FeedbackNet, self).__init__()
        self.layers = nn.ModuleList([
            nn.Linear(3, 4),
            nn.Linear(4, 5),
            nn.Linear(5, 6),
            nn.Linear(6, 7),
        ])
        self.z = []
        for i, layer in list(enumerate(self.layers)):
            z = torch.ones(batch_size, layer.in_features)
            self.z.append(z)

    def forward(self, x):
        self.output = []
        self.input = []
        for i, layer in list(enumerate(self.layers)):
            # detach from previous history
            x = Variable(x.data, requires_grad=True)
            self.input.append(x)
            # multiply by the hidden gate
            x = x * self.z[i]
            # compute output
            x = layer(x)
            # add to list of outputs
            self.output.append(x)
        return x

    def backward(self, g):
        for i, output in reversed(list(enumerate(self.output))):
            if i == (len(self.output) - 1):
                # for last node, use g
                output.backward(g)
            else:
                output.backward(self.input[i+1].grad.data)
            alpha = self.input[i].grad
            self.z[i] = (alpha > 0).float()
            self.input[i].grad = self.z[i] * alpha
            print(i, self.input[i].grad.data.sum())

In [15]:
model = FeedbackNet(4)
model.eval()
inp = Variable(torch.randn(4, 3))
gradients = torch.randn(*output.size())
for iter in range(5):
    output = model(inp)
    model.backward(gradients)

3 tensor(6.3751)
2 tensor(1.4822)
1 tensor(0.3731)
0 tensor(1.00000e-02 *
       3.2556)
3 tensor(6.3751)
2 tensor(1.4822)
1 tensor(0.3731)
0 tensor(1.00000e-02 *
       3.2556)
3 tensor(6.3751)
2 tensor(1.4822)
1 tensor(0.3731)
0 tensor(1.00000e-02 *
       3.2556)
3 tensor(6.3751)
2 tensor(1.4822)
1 tensor(0.3731)
0 tensor(1.00000e-02 *
       3.2556)
3 tensor(6.3751)
2 tensor(1.4822)
1 tensor(0.3731)
0 tensor(1.00000e-02 *
       3.2556)
