<a href="https://colab.research.google.com/github/inderpreetsingh01/PyTorch/blob/main/Pytorch_basics1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pytorch Basics


*   Deep Learning Framework
*   Two core components: Tesnors and Autograd



## 1. Tensors


*   Similar to numpy ndarray
*   Can run on GPUs and other hardware accelerators
*   Optimized for automatic differentiation  



In [2]:
import torch
import numpy as np

### Initializing a Tensor

In [19]:
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)
x_data

tensor([[1, 2],
        [3, 4]])

In [20]:
np_array = np.array(data)
x_np = torch.from_numpy(np_array)
x_np

tensor([[1, 2],
        [3, 4]])

In [21]:
np_array1 = x_np.numpy()
np_array1

array([[1, 2],
       [3, 4]])

In [5]:
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.1457, 0.8351],
        [0.8301, 0.8338]]) 



In [9]:
shape = (2, 3 )
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[0.6243, 0.4676, 0.2537],
        [0.3343, 0.1680, 0.1327]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [27]:
# Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")
tensor

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


tensor([[0.8281, 0.8012, 0.4599, 0.2640],
        [0.5334, 0.5765, 0.9848, 0.9437],
        [0.6165, 0.9634, 0.6925, 0.8485]])

In [34]:
#  only for tensor with one element
print(tensor[1,2])
print(tensor[1,2].item())

tensor(0.9848)
0.9847682118415833


In [13]:
# Returns a tensor filled with random numbers from a normal distribution with mean 0 and variance 1 (also called the standard normal distribution).
tensor = torch.randn(3,4, dtype=torch.float16)
tensor.dtype
tensor

tensor([[ 0.3140, -0.0063, -0.3108, -0.7734],
        [-0.1412,  0.3564, -0.9941, -0.1168],
        [ 2.2246, -1.0771, -0.3667, -0.6318]], dtype=torch.float16)

In [14]:
tensor.requires_grad

False

In [15]:
# functions having trailing underscore perform inplace operation
tensor.requires_grad_(True)

tensor([[ 0.3140, -0.0063, -0.3108, -0.7734],
        [-0.1412,  0.3564, -0.9941, -0.1168],
        [ 2.2246, -1.0771, -0.3667, -0.6318]], dtype=torch.float16,
       requires_grad=True)

In [16]:
tensor.requires_grad

True

In [12]:
# if we need to compute gradient of some function wrt x
tensor = torch.randn(3,4, requires_grad=True)
tensor.requires_grad
tensor

tensor([[-0.7069, -0.5498, -0.5748,  0.1080],
        [ 0.5567, -1.5654,  0.0708, -0.7920],
        [ 1.8638,  0.7804, -2.3891,  0.1494]], requires_grad=True)

In [6]:
torch.rand_like(tensor)

tensor([[0.6829, 0.2803, 0.1260, 0.5950],
        [0.6112, 0.2964, 0.9883, 0.0797],
        [0.3127, 0.1052, 0.5009, 0.6591]])

In [21]:
# Returns a tensor filled with random integers generated uniformly between low (inclusive) and high (exclusive).
tensor = torch.randint(0, 5, (2,3))
tensor.dtype

torch.int64

In [7]:
torch.empty((2,3))

tensor([[3.5913e-35, 0.0000e+00, 3.3631e-44],
        [0.0000e+00,        nan, 6.4460e-44]])

In [18]:
torch.empty_like(tensor)

tensor([[1.1120e-34, 0.0000e+00, 3.3631e-44, 0.0000e+00],
        [       nan, 6.4460e-44, 1.1578e+27, 1.1362e+30],
        [7.1547e+22, 4.5828e+30, 1.2121e+04, 7.1846e+22]])

In [8]:
# tensor with start, end and step
torch.arange(2,10,2)

tensor([2, 4, 6, 8])

In [15]:
# from start to end for given size
torch.linspace(2,10,3)

tensor([ 2.,  6., 10.])

In [16]:
torch.logspace(0, 10, 3)

tensor([1.0000e+00, 1.0000e+05, 1.0000e+10])

In [17]:
torch.eye(3)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [19]:
torch.full((2,3), 5)

tensor([[5, 5, 5],
        [5, 5, 5]])

In [22]:
torch.full_like(tensor, 3)

tensor([[3, 3, 3],
        [3, 3, 3]])

In [24]:
 torch.quantize_per_tensor(torch.tensor([-1.12345, 0.4567, 1.23142, 2.567345]), 0.1, 10, torch.quint8)

tensor([-1.0000,  0.5000,  1.2000,  2.6000], size=(4,), dtype=torch.quint8,
       quantization_scheme=torch.per_tensor_affine, scale=0.1, zero_point=10)

tensor([0.0000, 0.5000, 1.0000])

### Operations on Tensors



* By default tensors are created on CPU
* We can move them to GPU, but first we should check if resources are available
* Copying large tensors can be expensive in terms of time and memory   





In [22]:
# We move our tensor to the GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"device: {device}")

device: cpu


In [23]:
tensor = tensor.to(device)
tensor.device

device(type='cpu')

In [26]:
# Returns True if obj is a PyTorch tensor.
torch.is_tensor(tensor)

True

In [40]:
# Returns True if obj is a PyTorch storage object.
torch.is_storage(tensor)

False

In [41]:
storage = tensor.storage()
storage

 0.4336516857147217
 0.17743903398513794
 0.30724143981933594
 0.9208609461784363
 0.5096129179000854
 0.9258978366851807
 0.42516469955444336
 0.6234511137008667
 0.7978058457374573
 0.3086753487586975
 0.8816702961921692
 0.004713296890258789
[torch.storage._TypedStorage(dtype=torch.float32, device=cpu) of size 12]

In [42]:
torch.is_storage(storage)

True

In [43]:
tensor.stride()

(4, 1)

In [44]:
# Returns True if the data type of input is a floating point data type
tensor.is_floating_point()

True

In [48]:
# Returns True if the input is a single element tensor which is not equal to zero after type conversions.
tensor = torch.tensor([1])
tensor.is_nonzero()

True

In [49]:
# Sets the default floating point dtype to d.
torch.set_default_dtype(torch.float64)

In [50]:
tensor = torch.rand(3,4)
print(f"Datatype of tensor: {tensor.dtype}")

Datatype of tensor: torch.float64


In [51]:
torch.get_default_dtype()

torch.float64

In [52]:
# Returns the total number of elements in the input tensor.
tensor = torch.rand((2,3))
tensor.numel()

6

In [53]:
len(tensor)

2

In [4]:
input = torch.tensor([-1.5, 0, 2.0])
values = torch.tensor([0.5])
torch.heaviside(input, values)

tensor([0.0000, 0.5000, 1.0000])

In [24]:
x = torch.tensor([1,2,3])
y = torch.tensor([1,1,1])

z = x+y
z = torch.add(x, y)
print(z)

tensor([2, 3, 4])


In [25]:
x

tensor([1, 2, 3])

In [26]:
x.add_(y)
print(x)

tensor([2, 3, 4])


In [38]:
x = torch.randint(0, 10, (4,4))
# use view only if data is availble in contigous memory location else use reshape
y = x.view(-1, 8)
z = x.view(16)
print(x)
print(y)
print(z)

tensor([[0, 0, 5, 3],
        [9, 6, 1, 2],
        [3, 3, 4, 8],
        [3, 1, 7, 5]])
tensor([[0, 0, 5, 3, 9, 6, 1, 2],
        [3, 3, 4, 8, 3, 1, 7, 5]])
tensor([0, 0, 5, 3, 9, 6, 1, 2, 3, 3, 4, 8, 3, 1, 7, 5])


In [39]:
x.transpose_(0, 1)

tensor([[0, 9, 3, 3],
        [0, 6, 3, 1],
        [5, 1, 4, 7],
        [3, 2, 8, 5]])

In [40]:
x.storage()

 0
 0
 5
 3
 9
 6
 1
 2
 3
 3
 4
 8
 3
 1
 7
 5
[torch.storage._TypedStorage(dtype=torch.int64, device=cpu) of size 16]

In [41]:
x.stride()

(1, 4)

In [46]:
x.view(-1)

RuntimeError: ignored

In [47]:
# reshape will work
x.reshape(-1)

tensor([0, 9, 3, 3, 0, 6, 3, 1, 5, 1, 4, 7, 3, 2, 8, 5])

In [48]:
# this will work fine
y = torch.rand(4, 4)
y.view(-1)

tensor([0.7507, 0.0428, 0.8301, 0.4243, 0.0777, 0.5267, 0.8210, 0.1818, 0.7118,
        0.9457, 0.9060, 0.1534, 0.2696, 0.8160, 0.9858, 0.6565])

In [18]:
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)

tensor([[0.0782, 0.0000, 0.1778, 0.2728, 0.0782, 0.0000, 0.1778, 0.2728, 0.0782,
         0.0000, 0.1778, 0.2728],
        [0.0291, 0.0000, 0.7571, 0.1950, 0.0291, 0.0000, 0.7571, 0.1950, 0.0291,
         0.0000, 0.7571, 0.1950],
        [0.3686, 0.0000, 0.3048, 0.1938, 0.3686, 0.0000, 0.3048, 0.1938, 0.3686,
         0.0000, 0.3048, 0.1938]])


In [19]:
t1 = torch.cat([tensor, tensor, tensor], dim=0)
print(t1)

tensor([[0.0782, 0.0000, 0.1778, 0.2728],
        [0.0291, 0.0000, 0.7571, 0.1950],
        [0.3686, 0.0000, 0.3048, 0.1938],
        [0.0782, 0.0000, 0.1778, 0.2728],
        [0.0291, 0.0000, 0.7571, 0.1950],
        [0.3686, 0.0000, 0.3048, 0.1938],
        [0.0782, 0.0000, 0.1778, 0.2728],
        [0.0291, 0.0000, 0.7571, 0.1950],
        [0.3686, 0.0000, 0.3048, 0.1938]])


### Autograd

In [49]:
# Addition
x = torch.randn(1, requires_grad=True)
print(x)
# y = x + 2
y = x+2
print(y)
# cal gradient of y wrt x
y.backward()
x.grad

tensor([0.3306], requires_grad=True)
tensor([2.3306], grad_fn=<AddBackward0>)


tensor([1.])

In [56]:
# Multiplication
x = torch.randn(1, requires_grad=True)
print(x)
y2 = x*x
print(y2)
y2.backward()
x.grad

tensor([-0.9693], requires_grad=True)
tensor([0.9395], grad_fn=<MulBackward0>)


tensor([-1.9385])

In [57]:
# exponential
x = torch.randn(1, requires_grad=True)
print(torch.exp(x))
y3 = torch.exp(x)
y3.backward()
x.grad

tensor([1.4318], grad_fn=<ExpBackward0>)


tensor([1.4318])

In [58]:
x = torch.randn(2, requires_grad=True)
print(torch.exp(x))
y3 = torch.exp(x)
y3.backward()
x.grad

tensor([0.7293, 0.4255], grad_fn=<ExpBackward0>)


RuntimeError: ignored

In [62]:
x = torch.randn(2, requires_grad=True)
print(x)
print(torch.exp(x))
y3 = torch.exp(x)
print(y3)
y3.backward(torch.tensor([1 , 1]))
x.grad

tensor([-1.3690, -0.8001], requires_grad=True)
tensor([0.2544, 0.4493], grad_fn=<ExpBackward0>)
tensor([0.2544, 0.4493], grad_fn=<ExpBackward0>)


tensor([0.2544, 0.4493])

In [None]:
# to not compute gradient wrt x, any of the following three options can be used:
x.requires_grad_(False)
x.detach()
with torch.no_grad():

In [64]:
x = torch.arange(4, requires_grad=True, dtype=torch.float32)
y = x*x
u = y
z = u*x
# here z = [x1^3, x2^3, x3^3, x4^3]
z.sum().backward()

# x.grad = gradient wrt x
# x.grad = [3*x1^2, 3*x2^2, 3*x3^2, 3*x4^2]
print(x)
print(z)
print(x.grad)

tensor([0., 1., 2., 3.], requires_grad=True)
tensor([ 0.,  1.,  8., 27.], grad_fn=<MulBackward0>)
tensor([ 0.,  3., 12., 27.])


In [66]:
x = torch.arange(4, requires_grad=True, dtype=torch.float32)
y = x*x
u = y.detach()
z = u*x
# here z = [u1*x1, u2*x2, u3*x3, u4*x4]
z.sum().backward()

# u is constant now and is not dependent on any variable 
# x.grad = [u1, u2, u3, u4]
print(x)
print(u)
print(z)
print(x.grad)

tensor([0., 1., 2., 3.], requires_grad=True)
tensor([0., 1., 4., 9.])
tensor([ 0.,  1.,  8., 27.], grad_fn=<MulBackward0>)
tensor([0., 1., 4., 9.])


In [68]:
# Pytorch accumulates the gradient into grad attribute, so it is required to clear them before next update
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
    
    # to clear gradients for next iteration
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
