# Reproduce the paper example with a linear layer followed by a sigmoid

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

## Set up the linear layer with the weights and bias in the paper

In [None]:
linear = nn.Linear(2, 1)
linear.weight = nn.Parameter(torch.tensor([[2., -3.]]))
linear.bias = nn.Parameter(torch.tensor([-3.]))

In [None]:
# sanity check; do these weights look like the paper?
print(linear, linear.weight.data, linear.bias.data, sep='\n')

Linear(in_features=2, out_features=1, bias=True)
tensor([[ 2., -3.]])
tensor([-3.])


## Your code goes here: finding gradients
1. Create a tensor called `inputs` here (refer to [this documentation](https://pytorch.org/docs/master/generated/torch.tensor.html) for a reminder on how to create a tensor).
2. Use the linear layer followed by a sigmoid layer to predict the `output`. See the [documentation on predicting with a model here](https://pytorch.org/docs/stable/nn.html#torch.nn.Module.forward) (to call a layer or module simply add `()` after the name of the module or layer with one argument). Also see the [documentation on `sigmoid` here](https://pytorch.org/docs/stable/nn.functional.html#sigmoid) (note the equation). You can reference `sigmoid` with `F.sigmoid`.
3. Call the `backward` method on the `output`.
4. Check `linear.weight.grad` and `linear.bias.grad`. Do they reflect your calculations?

In [None]:
# x = -1., -2.
inputs = torch.tensor([[-1., -2.]])

In [None]:
output = torch.sigmoid(linear(inputs))

In [None]:
# call backward
output.backward()

In [None]:
# check the gradients
linear.weight.grad, linear.bias.grad

(tensor([[-0.1966, -0.3932]]), tensor([0.1966]))

## Applying over batches

### Zero out the grads before adding the loss

In [None]:
# zero the gradients from above
linear.weight.grad.zero_()
linear.bias.grad.zero_()

tensor([0.])

In [None]:
for epoch in range(10):

    output = torch.sigmoid(linear(inputs))
    loss = output - 1.
    loss.backward()

    grad = linear.weight.grad
    learning_rate = 0.1
    linear.weight = torch.nn.Parameter(linear.weight - loss * linear.weight.grad)
    linear.bias = torch.nn.Parameter(linear.bias - loss * linear.bias.grad)

    print('loss:', loss, 'weights:', linear.weight.data, 'grad:', grad, 'model output:', torch.sigmoid(linear(inputs)))

loss: tensor([[-0.2689]], grad_fn=<SubBackward0>) weights: tensor([[ 1.9471, -3.1058]]) grad: tensor([[-0.1966, -0.3932]]) model output: tensor([[0.7887]], grad_fn=<SigmoidBackward>)
loss: tensor([[-0.2113]], grad_fn=<SubBackward0>) weights: tensor([[ 1.9119, -3.1762]]) grad: tensor([[-0.1666, -0.3333]]) model output: tensor([[0.8218]], grad_fn=<SigmoidBackward>)
loss: tensor([[-0.1782]], grad_fn=<SubBackward0>) weights: tensor([[ 1.8858, -3.2284]]) grad: tensor([[-0.1465, -0.2929]]) model output: tensor([[0.8436]], grad_fn=<SigmoidBackward>)
loss: tensor([[-0.1564]], grad_fn=<SubBackward0>) weights: tensor([[ 1.8652, -3.2696]]) grad: tensor([[-0.1320, -0.2639]]) model output: tensor([[0.8592]], grad_fn=<SigmoidBackward>)
loss: tensor([[-0.1408]], grad_fn=<SubBackward0>) weights: tensor([[ 1.8482, -3.3037]]) grad: tensor([[-0.1210, -0.2419]]) model output: tensor([[0.8711]], grad_fn=<SigmoidBackward>)
loss: tensor([[-0.1289]], grad_fn=<SubBackward0>) weights: tensor([[ 1.8337, -3.3326]