In [1]:
%matplotlib inline


PyTorch: nn
-----------

**AIM:**

A fully-connected ReLU network with one hidden layer, trained to predict y from x
by minimizing squared Euclidean distance.

**Method:**

This implementation uses the `nn` package from PyTorch to build the network.

PyTorch `autograd` makes it easy to define computational graphs and take gradients,

but raw `autograd` can be a bit too **low-level** for defining complex neural networks.


This is where the `nn` package can help. 

The `nn` package defines a set of Modules, which you can think of as a neural network layer that has produces output from **input** and may have some **trainable weights**.



In [2]:
import torch

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)


In [3]:

# Use the nn package to define our model as a sequence of layers. 

# nn.Sequential is a Module which contains other Modules, 
# and applies them in sequence to produce its output. 
# Each Linear Module computes output from input using a linear function, 
# and holds internal Tensors for its weight and bias.
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H), # w1:(D_in, H)
    torch.nn.ReLU(),          # ReLU
    torch.nn.Linear(H, D_out),# w2:(H, D_out) 
)



In [16]:


# The `nn` package also contains definitions of popular loss functions; 
# in this case 
# we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss() # not reduction



In [17]:

learning_rate = 1e-4

for t in range(500):
    # Forward pass: 
    # compute predicted y by passing x to the model. 
    # Module objects override the __call__ operator 
    # so you can call them like functions. 
    # When doing so you pass a Tensor of input data to the Module 
    # and it produces a Tensor of output data.
    y_pred = model(x)

    # Compute and print loss. We pass Tensors containing the predicted and true
    # values of y, and the loss function returns a Tensor containing the
    # loss.
    loss = loss_fn(y_pred, y)
    print(t, loss.item())

    # Zero the gradients before running the backward pass.
    # gradients is d(loss)/d(w1), or d(loss)/d(w2)
    # set it to zero before the backward pass
    model.zero_grad()

    # Backward pass: 
    # compute gradient of the loss with respect to 
    # all the learnable parameters of the model. 
    # Internally, the parameters of each Module are stored
    # in Tensors with requires_grad=True, 
    # so this call will compute gradients for all learnable parameters in the model.
    loss.backward()

    # Update the weights using gradient descent. Each parameter is a Tensor, so
    # we can access its gradients like we did before.
    with torch.no_grad():
        for param in model.parameters(): # param: w1 and w2
            param -= learning_rate * param.grad

0 1.0910389423370361
1 1.0909075736999512
2 1.090775489807129
3 1.090644121170044
4 1.0905132293701172
5 1.0903816223144531
6 1.0902498960494995
7 1.090118646621704
8 1.0899873971939087
9 1.0898566246032715
10 1.0897254943847656
11 1.0895953178405762
12 1.0894638299942017
13 1.089333176612854
14 1.0892020463943481
15 1.0890710353851318
16 1.0889408588409424
17 1.0888090133666992
18 1.0886787176132202
19 1.088547945022583
20 1.0884170532226562
21 1.0882854461669922
22 1.0881550312042236
23 1.0880248546600342
24 1.0878931283950806
25 1.0877630710601807
26 1.0876328945159912
27 1.0875022411346436
28 1.0873714685440063
29 1.0872410535812378
30 1.0871111154556274
31 1.086979627609253
32 1.0868501663208008
33 1.0867195129394531
34 1.086588978767395
35 1.086458683013916
36 1.0863277912139893
37 1.0861976146697998
38 1.0860679149627686
39 1.0859379768371582
40 1.0858073234558105
41 1.0856773853302002
42 1.0855475664138794
43 1.0854172706604004
44 1.0852876901626587
45 1.0851573944091797
46 1.0

398 1.0412371158599854
399 1.0411176681518555
400 1.04099702835083
401 1.0408774614334106
402 1.0407578945159912
403 1.0406372547149658
404 1.0405170917510986
405 1.0403974056243896
406 1.040277361869812
407 1.0401575565338135
408 1.040037989616394
409 1.0399178266525269
410 1.0397976636886597
411 1.0396780967712402
412 1.0395584106445312
413 1.0394388437271118
414 1.0393178462982178
415 1.039198637008667
416 1.0390795469284058
417 1.0389583110809326
418 1.0388399362564087
419 1.038719654083252
420 1.0386000871658325
421 1.038480281829834
422 1.0383611917495728
423 1.0382411479949951
424 1.0381221771240234
425 1.038001537322998
426 1.0378823280334473
427 1.0377628803253174
428 1.0376436710357666
429 1.0375245809555054
430 1.0374048948287964
431 1.0372856855392456
432 1.0371663570404053
433 1.0370476245880127
434 1.0369280576705933
435 1.0368082523345947
436 1.03669011592865
437 1.0365700721740723
438 1.0364515781402588
439 1.0363315343856812
440 1.0362133979797363
441 1.036094427108764