## PyTorch Basics

In [1]:
import torch    # pytorch library
import numpy as np

In [2]:
# torch.tensor is a function which returns a tensor
# will infer datatype from data

torch.tensor([2])

tensor([2])

In [3]:
# same as above with slight difference
# torch.Tensor is the main tensor class
# will initialize to default type - float32

torch.Tensor([2])

tensor([2.])

In [4]:
torch.tensor(2)

tensor(2)

In [5]:
torch.Tensor(2)

tensor([-8.0606e-30,  3.0693e-41])

In [6]:
x = torch.Tensor([2])
print(x)
print(type(x))

tensor([2.])
<class 'torch.Tensor'>


In [7]:
y = torch.tensor(3.)
print(y)
print(type(y))

tensor(3.)
<class 'torch.Tensor'>


In [8]:
x = torch.Tensor([2,3])   # passing python list
y = torch.Tensor([4,5])

print(x*y)

tensor([ 8., 15.])


In [9]:
x = torch.Tensor([2,3])
y = torch.Tensor([4,5])

print(torch.mul(x,y))

tensor([ 8., 15.])


In [10]:
x = torch.zeros([2,3])
print(x)
print(x.shape)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
torch.Size([2, 3])


In [11]:
y = torch.rand([3,4])
y

tensor([[0.2002, 0.8856, 0.0617, 0.2418],
        [0.6024, 0.9803, 0.1704, 0.5947],
        [0.0665, 0.1776, 0.3001, 0.9500]])

In [12]:
y[:,0]      # showing 1st colm only

tensor([0.2002, 0.6024, 0.0665])

In [13]:
y.view([2,6])       # same as transform

tensor([[0.2002, 0.8856, 0.0617, 0.2418, 0.6024, 0.9803],
        [0.1704, 0.5947, 0.0665, 0.1776, 0.3001, 0.9500]])

In [14]:
y.view([-1,3])       # -1 automatically fills instead of 4

tensor([[0.2002, 0.8856, 0.0617],
        [0.2418, 0.6024, 0.9803],
        [0.1704, 0.5947, 0.0665],
        [0.1776, 0.3001, 0.9500]])

In [15]:
x = torch.empty(1) # scalar
print(x)

tensor([-8.0607e-30])


In [16]:
x = torch.empty(3) # vector, 1D
print(x)

tensor([-8.0608e-30,  3.0693e-41,  2.3694e-38])


In [17]:
x = torch.empty(2,3) # matrix, 2D
print(x)

tensor([[-8.0608e-30,  3.0693e-41, -5.1592e-33],
        [ 3.0693e-41,  3.3631e-44,  0.0000e+00]])


In [18]:
x = torch.empty(2,2,3) # tensor, 3D
print(x)

tensor([[[-8.0607e-30,  3.0693e-41,  2.3694e-38],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],

        [[ 1.1578e+27,  1.1362e+30,  7.1547e+22],
         [ 4.5828e+30,  1.2121e+04,  7.1846e+22]]])


In [19]:
# converting from numpy array to tensor

a = np.ones(5)
print(a)
b = torch.from_numpy(a)
print(b)

[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


In [20]:
# NumPy doesn't natively support GPUs
# by default all tensors are created on the CPU,
# but you can also move them to the GPU (only if it's available )

if torch.cuda.is_available():               # if gpu is available
    device = torch.device("cuda")           # a CUDA device object
    y = torch.ones([2,3], device=device)    # directly create a tensor on GPU
    x = torch.ones([2,3])
    x = x.to(device)                        # or just use .to("cuda")
    z = x + y
    print(z)
    # z = z.numpy() # not possible because numpy cannot handle GPU tenors
    # move to CPU again
    print(z.to("cpu")) 

else:
  print('No GPU')

tensor([[2., 2., 2.],
        [2., 2., 2.]], device='cuda:0')
tensor([[2., 2., 2.],
        [2., 2., 2.]])


## Auto Grad / Back Propagation

In [21]:
torch.tensor([5.5, 3])

tensor([5.5000, 3.0000])

In [22]:
# requires_grad argument
# This will tell pytorch that it will need to calculate the gradients for this tensor
# later in your optimization steps
# i.e. this is a variable in your model that you want to optimize

x = torch.tensor([5.5, 3], requires_grad=True)
print(x)

tensor([5.5000, 3.0000], requires_grad=True)


In [23]:
x[0]

tensor(5.5000, grad_fn=<SelectBackward>)

In [24]:
# all 3 are tensors, with w and b having additional parameter

x = torch.tensor(3.)
w = torch.tensor(4., requires_grad=True)
b = torch.tensor(5., requires_grad=True)

print(x)
print(w)
print(b)

y = w*x+b       # (4x3)+5

print(y)

tensor(3.)
tensor(4., requires_grad=True)
tensor(5., requires_grad=True)
tensor(17., grad_fn=<AddBackward0>)


In [25]:
print('dy/dx', x.grad)
print('dy/dw', w.grad)
print('dy/db', b.grad)

dy/dx None
dy/dw None
dy/db None


In [26]:
# calling below function is necessary to get gradient values
y.backward()    # computing derivatives / gradient

print('dy/dx', x.grad)
print('dy/dw', w.grad)
print('dy/db', b.grad)

# can cross check below answers by doing manually

dy/dx None
dy/dw tensor(3.)
dy/db tensor(1.)


In [8]:
weights = torch.tensor([1,2,3,4], dtype=torch.float32, requires_grad=True)
print(weights)

for epoch in range(3):
    print()
    print('Loop: ', epoch)
    model_output = (weights*3).sum()
    print(model_output)

    model_output.backward()
    print(weights.grad)                 # d(model_output)/d(weights) = 3

    # update weights
    with torch.no_grad():               # deactivates autograd engine for fast computations
        weights -= 0.1 * weights.grad

    weights.grad.zero_()
    print(weights)

print()
print(weights)
print(model_output)

tensor([1., 2., 3., 4.], requires_grad=True)

Loop:  0
tensor(30., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor([0.7000, 1.7000, 2.7000, 3.7000], requires_grad=True)

Loop:  1
tensor(26.4000, grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor([0.4000, 1.4000, 2.4000, 3.4000], requires_grad=True)

Loop:  2
tensor(22.8000, grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor([0.1000, 1.1000, 2.1000, 3.1000], requires_grad=True)

tensor([0.1000, 1.1000, 2.1000, 3.1000], requires_grad=True)
tensor(22.8000, grad_fn=<SumBackward0>)


In [9]:
weights = torch.tensor([1,2,3,4], dtype=torch.float32, requires_grad=True)
print(weights)

for epoch in range(3):
    print()
    print('Loop: ', epoch)
    model_output = (weights*3).sum()
    print(model_output)

    model_output.backward()
    print(weights.grad)               

    # update weights
    with torch.no_grad():             
        weights -= 0.1 * weights.grad

    #weights.grad.zero_()                 # this is commented, to make d(model_output)/d(weights) = 0 after every loop
    print(weights)

print()
print(weights)
print(model_output)

tensor([1., 2., 3., 4.], requires_grad=True)

Loop:  0
tensor(30., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor([0.7000, 1.7000, 2.7000, 3.7000], requires_grad=True)

Loop:  1
tensor(26.4000, grad_fn=<SumBackward0>)
tensor([6., 6., 6., 6.])
tensor([0.1000, 1.1000, 2.1000, 3.1000], requires_grad=True)

Loop:  2
tensor(19.2000, grad_fn=<SumBackward0>)
tensor([9., 9., 9., 9.])
tensor([-0.8000,  0.2000,  1.2000,  2.2000], requires_grad=True)

tensor([-0.8000,  0.2000,  1.2000,  2.2000], requires_grad=True)
tensor(19.2000, grad_fn=<SumBackward0>)


In [4]:
weights = torch.tensor([1,2,3,4], dtype=torch.float32, requires_grad=True)
x = weights*3
x

tensor([ 3.,  6.,  9., 12.], grad_fn=<MulBackward0>)

In [28]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

# we want to optimize this w (weight)
w = torch.tensor(1.0, requires_grad=True)


# forward pass to compute loss
y_predicted = w * x
loss = (y_predicted - y)**2
print(loss)                     # loss = (wx - y)^2 = (wx)^2 + y^2 - 2wxy = 1


# backward pass to compute gradient
loss.backward()
print(w.grad)                   # d(Loss)/dw = 2w(x)^2 - 2xy = -2


# update weights, this operation should not be part of the computational graph
with torch.no_grad():           # deactivates autograd engine for fast computations
    w -= 0.01 * w.grad


# zero the gradients
w.grad.zero_()

print(w.grad) 
print(w)

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)
tensor(0.)
tensor(1.0200, requires_grad=True)
