<a href="https://colab.research.google.com/github/chrishare/colab_deeplearning/blob/master/pytorch_basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Some pytorch basics - first, a simple numpy-based regression problem
# that attempts to produce learn a,b,c,d such that a + bx + cx^2 + dx^3
# closely models sin(x) between -pi and +pi
import numpy as np
import math

# Create random input and output data
# Get a row vector of linearly spaced numbers between -pi and +pi
x = np.linspace(-math.pi, math.pi, 2000)
# Get a row vector of sin(x_i) for each i in x
y = np.sin(x)
# Get the 'size'of x - which will be (2000,) - a 1-dim row vector of size 2000
x.shape

# Randomly initialize weights - get 4 individual floats that are 
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

# Set learning rate to be 1 / 10^6
learning_rate = 1e-6

# For 2000 iterations (epochs)
for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    # y is a 2000-el row vector based on the current weights
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss by calculating the square of every difference 
    # between the predication and actual answer (label), and summing
    # So loss is a scalar of the magnitude of the loss on the whole dataset
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    # a = sum(2 * (y_pred - y_actual)) for each el in y
    grad_a = grad_y_pred.sum()
    # b = sum(2 * (y_pred - y_actual) * x) for each el in y
    grad_b = (grad_y_pred * x).sum()
    # c = sum(2 * (y_pred - y_actual) * x^2) for each el in y
    grad_c = (grad_y_pred * x ** 2).sum()
    # d = sum(2 * (y_pred - y_actual) * x^3) for each el in y
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights - subtrack the gradient of the loss * learning rate
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

99 18.09889106562996
199 15.105687454472273
299 13.082607103470345
399 11.713518430560395
499 10.786040027133247
599 10.157057297824208
699 9.730041053359837
799 9.439820602584472
899 9.242353029135224
999 9.10784353510595
1099 9.0161151779539
1199 8.95348972519423
1299 8.910684511709542
1399 8.881393000359349
1499 8.86132584235104
1599 8.847562388040501
1699 8.838111690051994
1799 8.831615010272614
1899 8.827144003023928
1999 8.824063654947992
Result: y = 0.0022636170507037193 + 0.8582230470700756 x + -0.0003905116079966977 x^2 + -0.09354121965192531 x^3


In [None]:
# Next, a pytorch example - though it uses CPU

import torch
import math

# Get a reference to the torch float datatype
dtype = torch.float
# Get a reference to the local CPU device - you can get a cuda GPU too
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data, on the CPU using torch.float
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
# Get the ground truth sin(x) for every input x
y = torch.sin(x)

# Randomly initialize weights, again, on CPU using torch.floar
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

# The rest is as per numpy - it magically does the computation on the torch
# device and types acquired when setting up the variables/tensors
learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')