<a href="https://colab.research.google.com/github/dlmacedo/ml-dl-notebooks/blob/master/notebooks/deep-learning/PYTORCH_two_layer_net_module.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
%matplotlib inline


PyTorch: Custom nn Modules
--------------------------

A fully-connected ReLU network with one hidden layer, trained to predict y from x
by minimizing squared Euclidean distance.

This implementation defines the model as a custom Module subclass. Whenever you
want a model more complex than a simple sequence of existing Modules you will
need to define your model this way.



In [2]:
import torch


class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.
        """
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred


# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# Construct our model by instantiating the class defined above
model = TwoLayerNet(D_in, H, D_out)

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
for t in range(500):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

0 693.9177856445312
1 642.2050170898438
2 597.47412109375
3 558.2883911132812
4 524.0391845703125
5 493.39422607421875
6 465.6888427734375
7 440.2690124511719
8 416.77215576171875
9 394.9810485839844
10 374.5417785644531
11 355.3805236816406
12 337.3973693847656
13 320.3566589355469
14 304.140625
15 288.7266845703125
16 274.0251770019531
17 260.00701904296875
18 246.59213256835938
19 233.74514770507812
20 221.48757934570312
21 209.79330444335938
22 198.6196746826172
23 187.9658660888672
24 177.79689025878906
25 168.08602905273438
26 158.8075714111328
27 149.9390869140625
28 141.51988220214844
29 133.5194854736328
30 125.90557098388672
31 118.66537475585938
32 111.80897521972656
33 105.30065155029297
34 99.1418685913086
35 93.29566955566406
36 87.76183319091797
37 82.53511810302734
38 77.59910583496094
39 72.94221496582031
40 68.55552673339844
41 64.42570495605469
42 60.53345489501953
43 56.87703323364258
44 53.44154357910156
45 50.211219787597656
46 47.175148010253906
47 44.31939315795