In [None]:
%matplotlib inline


PyTorch: Custom nn Modules
--------------------------

A fully-connected ReLU network with one hidden layer, trained to predict y from x
by minimizing squared Euclidean distance.

This implementation defines the model as a custom Module subclass. Whenever you
want a model more complex than a simple sequence of existing Modules you will
need to define your model this way.



In [9]:
import torch


class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.
        """
        # super is a "shortcut" to allow you to access the base class of a derived class, 
        # without having to know or type the base class name.  For example:
        super(TwoLayerNet, self).__init__()
        # same as: torch.nn.Module.__init__(self)
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        h_relu = self.linear1(x).clamp(min=0) # clamp(min=0) == ReLU!
        y_pred = self.linear2(h_relu)
        return y_pred

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# Construct our model by instantiating the class defined above
model = TwoLayerNet(D_in, H, D_out)

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
for t in range(500):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

0 733.1392211914062
1 679.57958984375
2 633.4454956054688
3 593.1846923828125
4 557.6541137695312
5 525.43603515625
6 495.6590576171875
7 468.17242431640625
8 442.7628479003906
9 419.0447692871094
10 396.766845703125
11 375.8233337402344
12 355.98284912109375
13 337.1553649902344
14 319.1751708984375
15 302.02911376953125
16 285.7063903808594
17 270.1624450683594
18 255.29916381835938
19 241.13906860351562
20 227.7084503173828
21 214.90213012695312
22 202.7025146484375
23 191.0825958251953
24 180.00120544433594
25 169.4767608642578
26 159.51158142089844
27 150.0541229248047
28 141.09194946289062
29 132.6044464111328
30 124.57601928710938
31 116.96768951416016
32 109.78400421142578
33 103.01072692871094
34 96.6224136352539
35 90.58547973632812
36 84.8809814453125
37 79.52014923095703
38 74.49844360351562
39 69.77801513671875
40 65.33128356933594
41 61.1781120300293
42 57.289031982421875
43 53.65111541748047
44 50.247013092041016
45 47.0635986328125
46 44.08720397949219
47 41.30294799804