## Using Autograd in PyTorch to Solve a Regression Problem

In [2]:
# create a number vector as a tensor that can be used as a variable
import torch
x = torch.tensor([1, 2, 3])
print(x)
print(x.shape)
print(x.dtype)

tensor([1, 2, 3])
torch.Size([3])
torch.int64


In [4]:
# Set up a tensor to support differentiation
# a tensor of floating point values is created 
# It is required because differentiation requires floating points, not integers
x = torch.tensor([1., 2., 3.], requires_grad=True) 
print(x)
print(x.shape)
print(x.dtype)

tensor([1., 2., 3.], requires_grad=True)
torch.Size([3])
torch.float32


In [6]:
# get the derivative of x in the form of a tensor
x = torch.tensor(3.6, requires_grad=True) 
y=x*x
y.backward()
print("x =", x)
print("y =", y)
print("x.grad =", x.grad)

x = tensor(3.6000, requires_grad=True)
y = tensor(12.9600, grad_fn=<MulBackward0>)
x.grad = tensor(7.2000)


In [7]:
# build a random polynomial in numpy
import numpy as np
polynomial = np.poly1d([1, 2, 3])
print(polynomial)

   2
1 x + 2 x + 3


In [8]:
# use the polynomial as a function
print(polynomial(1.5))

8.25


In [9]:
# generate a number of samples from this function using NumPy
N = 20 # number of samples
# Generate random samples roughly between -10 to +10
X = np.random.randn(N,1) * 5
Y = polynomial(X)

In [12]:
# Assume samples X and Y are prepared elsewhere 
XX = np.hstack([X*X, X, np.ones_like(X)])
print(XX)
w = torch.randn(3, 1, requires_grad=True) # the 3 coefficients 
x = torch.tensor(XX, dtype=torch.float32) # input sample
y = torch.tensor(Y, dtype=torch.float32) # output sample 
optimizer = torch.optim.NAdam([w], lr=0.01)
print(w)
for _ in range(1000): 
     y_pred = x @ w
     mse = torch.mean(torch.square(y - y_pred))
     optimizer.zero_grad()
     mse.backward()
     optimizer.step()
print(w)

[[ 4.57594547e+01  6.76457351e+00  1.00000000e+00]
 [ 1.29655260e+01  3.60076742e+00  1.00000000e+00]
 [ 2.35134331e+00  1.53340905e+00  1.00000000e+00]
 [ 9.42143619e+01 -9.70640829e+00  1.00000000e+00]
 [ 1.82703899e+01  4.27438767e+00  1.00000000e+00]
 [ 3.01218200e+01  5.48833490e+00  1.00000000e+00]
 [ 8.40333330e+01  9.16696967e+00  1.00000000e+00]
 [ 9.76666517e+00 -3.12516642e+00  1.00000000e+00]
 [ 7.67137033e+00 -2.76972387e+00  1.00000000e+00]
 [ 3.39243606e+01  5.82446225e+00  1.00000000e+00]
 [ 5.73529688e-02 -2.39484799e-01  1.00000000e+00]
 [ 3.17514686e+01  5.63484415e+00  1.00000000e+00]
 [ 3.64399643e+00  1.90892547e+00  1.00000000e+00]
 [ 1.68687764e+02  1.29879854e+01  1.00000000e+00]
 [ 1.37417485e+02  1.17225204e+01  1.00000000e+00]
 [ 1.91185906e+01 -4.37248105e+00  1.00000000e+00]
 [ 3.86324659e+01 -6.21550207e+00  1.00000000e+00]
 [ 4.09241599e+01 -6.39719938e+00  1.00000000e+00]
 [ 2.51993148e+01 -5.01989191e+00  1.00000000e+00]
 [ 5.90639674e+01 -7.68530854e+

In [15]:
# the complete code
polynomial = np.poly1d([1, 2, 3]) 
N = 20 # number of samples
# Generate random samples roughly between -10 to +10
X = np.random.randn(N,1) * 5
Y = polynomial(X)
# Prepare input as an array of shape (N,3)
XX = np.hstack([X*X, X, np.ones_like(X)])
# Prepare tensors
w = torch.randn(3, 1, requires_grad=True) # the 3 coefficients 
x = torch.tensor(XX, dtype=torch.float32) # input sample
y = torch.tensor(Y, dtype=torch.float32)
optimizer = torch.optim.NAdam([w], lr=0.01)
print(w)
# Run optimizer
# output sample
for _ in range(1000):
    optimizer.zero_grad()
    y_pred = x @ w
    mse = torch.mean(torch.square(y - y_pred)) 
    # derive the gradient, i.e., the rate of change of the mean
    # square error with respect to the coefficients w 
    # using the backward() function
    mse.backward()
    optimizer.step()
print(w)

tensor([[ 0.6253],
        [-0.1860],
        [ 0.2265]], requires_grad=True)
tensor([[1.0013],
        [2.0027],
        [2.9445]], requires_grad=True)
