<a href="https://colab.research.google.com/github/Vonewman/Deep_Learning_models_with_pytorch/blob/master/Pytorch_with_examples.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Tensors

In [1]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is ouput dimension
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)


# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 35069218.74221803
1 33716935.619632915
2 34098766.87823916
3 30692547.205413725
4 22775656.176628392
5 13769639.1450512
6 7392896.60144085
7 3953671.7828749632
8 2334634.490582628
9 1567962.4144430454
10 1171756.456125942
11 936934.7377380374
12 778499.2548000654
13 660932.1957512037
14 568639.943042245
15 493546.003421121
16 431194.95335960033
17 378719.1291224356
18 334179.8047761933
19 296156.18078170443
20 263450.3535144046
21 235152.80106570278
22 210579.99274520425
23 189119.61170935986
24 170303.32723778405
25 153746.66607700102
26 139127.6763448151
27 126180.97955101835
28 114671.91143203089
29 104414.2873770755
30 95255.58464726456
31 87055.05272227735
32 79699.04323073375
33 73072.9286660165
34 67088.65456334257
35 61675.57157641074
36 56770.78951528587
37 52316.85038535405
38 48266.06389614387
39 44576.85451404563
40 41211.12612126819
41 38134.96473231612
42 35320.89482624522
43 32742.35613255601
44 30380.81851684616
45 28215.038041075724
46 26224.23617013584
47 24391.8765

### Pytorch: Tensors

In [2]:
import torch


dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

# Randomly initialize weights
w1 = torch.randn(D_in, H, device=device, dtype=dtype)
w2 = torch.randn(H, D_out, device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)

    # Update weights using gradient descent
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

99 864.3027954101562
199 7.5834150314331055
299 0.08866354078054428
399 0.0014138497645035386
499 0.00011059808457503095


## Autograd
### Pytorch: Tensor autograd