# Learning PyTorch with Examples

## Tensors

### Warm-up Numpy

In [5]:
import numpy as np
import math
import time

In [6]:
# Create input and output data
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# Randomly intialize weights
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
start = time.time()
for t in range(2000):
    # Foward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    y_pred = a + b * x + c * x**2 + d * x**3

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x**2).sum()
    grad_d = (grad_y_pred * x**3).sum()

    # Update weights
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d
end = time.time()

print(f"Result: y = {a} + {b}x + {c}x^2 + {d}x^3")
print(f"Execution time: {end - start}")

99 2932.727550468763
199 1984.8080274591046
299 1345.5554617458304
399 914.0458853943688
499 622.4801262682479
599 425.27354981348924
699 291.75085567798465
799 201.25120398810375
899 139.8459723970504
999 98.13616929138729
1099 69.77318427976725
1199 50.4645077572468
1299 37.30485086267191
1399 28.32577273952717
1499 22.19215668872163
1599 17.997460875558094
1699 15.125466003892214
1799 13.156830763413055
1899 11.805868477542944
1999 10.87772649396241
Result: y = -0.03794718289642867 + 0.8296735978350522x + 0.0065465204934829204x^2 + -0.08948030910903447x^3
Execution time: 0.32587099075317383


### PyTorch tensors

In [7]:
import torch
import math

In [11]:
dtype = torch.float
device = torch.device("cpu")

# Create input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights 
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
start = time.time()
for t in range(2000):
    # Foward pass: compute predicted y
    y_pred = a + b * x + c * x**2 + d * x**3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d
end = time.time()

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')
print(f"Execution time: {end - start}")

99 2178.083251953125
199 1469.6435546875
299 993.4724731445312
399 673.1423950195312
499 457.4585266113281
599 312.1025695800781
699 214.0500946044922
799 147.843017578125
899 103.09439086914062
999 72.81845092773438
1099 52.31305694580078
1199 38.410369873046875
1299 28.974037170410156
1399 22.562240600585938
1499 18.200740814208984
1599 15.230567932128906
1699 13.205599784851074
1799 11.82345962524414
1899 10.878966331481934
1999 10.232831954956055
Result: y = 0.02989010140299797 + 0.8325701951980591 x + -0.00515653844922781 x^2 + -0.08989232033491135 x^3
Execution time: 0.2824528217315674


## Autograd

### PyTorch: tensors and autograd

In [12]:
import torch
import math 

In [13]:
dtype = torch.float
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.set_default_device(device)

# Create Tensors to hold input and outputs.
# By default, requires_grad=False, which indicates that we do not need to
# compute gradients with respect to these Tensors during the backward pass.
x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
y = torch.sin(x)

# Create random Tensors for weights. For a third order polynomial, we need
# 4 weights: y = a + b x + c x^2 + d x^3
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
a = torch.randn((), dtype=dtype, requires_grad=True)
b = torch.randn((), dtype=dtype, requires_grad=True)
c = torch.randn((), dtype=dtype, requires_grad=True)
d = torch.randn((), dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y using operations on Tensors.
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss using operations on Tensors.
    # Now loss is a Tensor of shape (1,)
    # loss.item() gets the scalar value held in the loss.
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Tensors with requires_grad=True.
    # After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding
    # the gradient of the loss with respect to a, b, c, d respectively.
    loss.backward()

    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this
    # in autograd.
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        # Manually zero the gradients after updating weights
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 1286.6337890625
199 859.4053955078125
299 575.2542114257812
399 386.194091796875
499 260.3535461425781
599 176.55799865722656
699 120.73539733886719
799 83.5302734375
899 58.72169876098633
999 42.170711517333984
1099 31.122879028320312
1199 23.74423599243164
1299 18.813232421875
1399 15.515870094299316
1499 13.309537887573242
1599 11.832205772399902
1699 10.842317581176758
1799 10.178573608398438
1899 9.733172416687012
1999 9.43402099609375
Result: y = -0.0138046033680439 + 0.8361940383911133 x + 0.0023815250024199486 x^2 + -0.09040778130292892 x^3
