In [1]:
import torch
import numpy as np
import os

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

device

'cuda'

In [3]:
tensor1 = torch.tensor([[1,2,3],[4,5,6],[7,8,9]])
tensor1

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [4]:
tensor1.dtype

torch.int64

In [5]:
tensor1.device  # gpu or cpu

device(type='cpu')

In [6]:
tensor1.shape

torch.Size([3, 3])

In [7]:
tensor1.requires_grad  # when doing back propogation are we going to calculate gradient of this tensor

False

In [8]:
tensor2 = torch.tensor([[1,2,3],[4,5,6],[7,8,9]],
                      dtype=torch.float,
                      device=device,
                      requires_grad=True)

In [9]:
tensor2.dtype

torch.float32

In [10]:
tensor2.device

device(type='cuda', index=0)

In [11]:
tensor2.requires_grad

True

## Common uses of tensors

In [12]:
torch.empty(size=(3,3))

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [13]:
torch.zeros(size=(3,3))

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [14]:
torch.ones(size=(3,3))

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [15]:
torch.rand(size=(3,3))

tensor([[0.6214, 0.8245, 0.7604],
        [0.6223, 0.3062, 0.3468],
        [0.7658, 0.7139, 0.8306]])

In [16]:
torch.eye(3)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [17]:
torch.arange(start=0, end=10)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [18]:
torch.linspace(start=0, end=10, steps=3)

tensor([ 0.,  5., 10.])

In [19]:
torch.rand(size=(3,3)).normal_(mean=0, std=1)

tensor([[-2.0368,  0.5181,  1.3644],
        [-0.3609,  0.0513, -0.7439],
        [ 0.4024,  0.1684,  1.9201]])

In [20]:
torch.rand(size=(3,3)).uniform_(4,5)

tensor([[4.4719, 4.1968, 4.0745],
        [4.7132, 4.7965, 4.1740],
        [4.6586, 4.6576, 4.1117]])

In [21]:
torch.diag(torch.ones(3))

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

## Conversions

In [22]:
tensor1

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [23]:
tensor1.bool()  # 0 will be false

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

In [24]:
tensor1.int()  # int32

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]], dtype=torch.int32)

In [25]:
tensor1.short()  # int16

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]], dtype=torch.int16)

In [26]:
tensor1.long()  # int64

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [27]:
tensor1.float()  # float32

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [28]:
tensor1.half()  # float16

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]], dtype=torch.float16)

In [29]:
tensor1.double()  # float64

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]], dtype=torch.float64)

### numpy array to tensor

In [30]:
array1 = np.array([[1,2,3],[4,5,6]])

array1

array([[1, 2, 3],
       [4, 5, 6]])

In [31]:
torch.from_numpy(array1)

tensor([[1, 2, 3],
        [4, 5, 6]], dtype=torch.int32)

### tensor to numpy array

In [32]:
tensor1.numpy()

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]], dtype=int64)

# Math operations

In [33]:
tensor1 = torch.arange(start=1,end=11, step=3)
tensor2 = torch.arange(start=10,end=20, step=3)

In [34]:
print(tensor1)
print('----------------')
print(tensor2)

tensor([ 1,  4,  7, 10])
----------------
tensor([10, 13, 16, 19])


In [35]:
tensor1 + tensor2

tensor([11, 17, 23, 29])

In [36]:
tensor1 * tensor2

tensor([ 10,  52, 112, 190])

In [37]:
tensor1 / tensor2

tensor([0.1000, 0.3077, 0.4375, 0.5263])

In [38]:
tensor3 = torch.ones(4)

In [39]:
tensor3.add_(tensor1)

tensor([ 2.,  5.,  8., 11.])

In [40]:
tensor1.pow(2)  # power

tensor([  1,  16,  49, 100])

In [41]:
tensor1**2

tensor([  1,  16,  49, 100])

In [42]:
tensor1 > 2

tensor([False,  True,  True,  True])

### Matrix Multiplication

In [43]:
# Matrix Multiplication
a = torch.rand(size=(3,5))
b = torch.rand(size=(5,4))

In [44]:
print(a)
print(b)

tensor([[0.9121, 0.3435, 0.3277, 0.2103, 0.2541],
        [0.2704, 0.6196, 0.1759, 0.5234, 0.5501],
        [0.7416, 0.2273, 0.1243, 0.7071, 0.0258]])
tensor([[0.4494, 0.2733, 0.6924, 0.6498],
        [0.2362, 0.8880, 0.8116, 0.1950],
        [0.7685, 0.4375, 0.8668, 0.1345],
        [0.2244, 0.4511, 0.4587, 0.5087],
        [0.4856, 0.6645, 0.7165, 0.0185]])


In [45]:
a @ b

tensor([[0.9134, 0.9614, 1.4729, 0.8154],
        [0.7877, 1.3028, 1.4769, 0.5967],
        [0.6537, 0.7951, 1.1486, 0.9031]])

In [46]:
a.matmul(b)

tensor([[0.9134, 0.9614, 1.4729, 0.8154],
        [0.7877, 1.3028, 1.4769, 0.5967],
        [0.6537, 0.7951, 1.1486, 0.9031]])

In [47]:
a = torch.rand(size=(3,3))
print(a)
a.matrix_exp()  # power to the value itself

tensor([[0.4533, 0.3710, 0.7956],
        [0.6005, 0.5879, 0.5985],
        [0.2269, 0.0801, 0.8262]])


tensor([[1.9594, 0.7391, 1.8520],
        [1.2280, 2.0764, 1.7733],
        [0.5134, 0.2569, 2.5566]])

In [48]:
a.matrix_power(2)

tensor([[0.6088, 0.4500, 1.2400],
        [0.7611, 0.6164, 1.3242],
        [0.3384, 0.1974, 0.9111]])

In [49]:
a = torch.tensor([1,2,3])
b = torch.tensor([3,4,5])

a.dot(b)

tensor(26)

### Broadcasting

In [50]:
print(a)
a+1

tensor([1, 2, 3])


tensor([2, 3, 4])

In [51]:
print(a)
a*5

tensor([1, 2, 3])


tensor([ 5, 10, 15])

In [52]:
b = torch.tensor([[1, 2, 3],[1, 2, 3],[1, 2, 3]])
b

tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])

In [53]:
b + a  # this will result in row wise operation

tensor([[2, 4, 6],
        [2, 4, 6],
        [2, 4, 6]])

In [54]:
c = torch.tensor([[1],[2],[3]])

In [55]:
print(b)
print(c)
b + c  # This will result in column wise operation

tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])
tensor([[1],
        [2],
        [3]])


tensor([[2, 3, 4],
        [3, 4, 5],
        [4, 5, 6]])

### max min

In [56]:
b

tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])

In [57]:
b.min()

tensor(1)

In [58]:
b.max()

tensor(3)

In [59]:
b.min(1)  # dimension = 1 = column wise

torch.return_types.min(
values=tensor([1, 1, 1]),
indices=tensor([0, 0, 0]))

In [60]:
b.min(0)  # dimension = 0 = row wise

torch.return_types.min(
values=tensor([1, 2, 3]),
indices=tensor([0, 0, 0]))

In [61]:
b.max(1)

torch.return_types.max(
values=tensor([3, 3, 3]),
indices=tensor([2, 2, 2]))

In [62]:
b.float().mean()

tensor(2.)

In [63]:
b.median()

tensor(2)

In [64]:
print(tensor1)
tensor1.argmax()

tensor([ 1,  4,  7, 10])


tensor(3)

In [65]:
print(tensor2)
tensor1.argmin()

tensor([10, 13, 16, 19])


tensor(0)

In [66]:
print(b)
b.float().mean(0)

tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])


tensor([1., 2., 3.])

In [67]:
print(b)
b.float().mean(1)

tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])


tensor([2., 2., 2.])

### Sort

In [68]:
a.sort(descending=True)

torch.return_types.sort(
values=tensor([3, 2, 1]),
indices=tensor([2, 1, 0]))

In [69]:
a.clamp(2)  # values less than given in clamp will be converted to given value

tensor([2, 2, 3])

# Indexing and Slicing

In [70]:
a = torch.rand(size=(3,4))
print(a)

tensor([[0.4839, 0.7349, 0.1390, 0.8870],
        [0.0030, 0.0640, 0.6783, 0.6581],
        [0.5533, 0.8784, 0.7565, 0.6288]])


In [71]:
a[0,:]  

tensor([0.4839, 0.7349, 0.1390, 0.8870])

In [72]:
a[:,0]

tensor([0.4839, 0.0030, 0.5533])

In [73]:
a[0:2, 1:4]

tensor([[0.7349, 0.1390, 0.8870],
        [0.0640, 0.6783, 0.6581]])

In [74]:
a[0,0]

tensor(0.4839)

In [75]:
a[0,0] = 1  # can use direct assignment

In [76]:
a

tensor([[1.0000, 0.7349, 0.1390, 0.8870],
        [0.0030, 0.0640, 0.6783, 0.6581],
        [0.5533, 0.8784, 0.7565, 0.6288]])

### Concatenation

In [77]:
a = torch.rand(size=(3,4))
b = torch.rand(size=(3,4))

print(a)
print(b)

tensor([[0.7599, 0.7629, 0.3551, 0.3250],
        [0.6384, 0.9303, 0.7343, 0.5234],
        [0.7888, 0.0869, 0.6986, 0.2195]])
tensor([[0.2076, 0.0493, 0.6947, 0.6893],
        [0.4354, 0.6819, 0.5594, 0.8233],
        [0.3711, 0.4964, 0.5877, 0.3718]])


In [78]:
torch.concat([a,b], dim=0)  # row wise concatenation

tensor([[0.7599, 0.7629, 0.3551, 0.3250],
        [0.6384, 0.9303, 0.7343, 0.5234],
        [0.7888, 0.0869, 0.6986, 0.2195],
        [0.2076, 0.0493, 0.6947, 0.6893],
        [0.4354, 0.6819, 0.5594, 0.8233],
        [0.3711, 0.4964, 0.5877, 0.3718]])

In [79]:
torch.concat([a,b], dim=1)  # column wise concatenation

tensor([[0.7599, 0.7629, 0.3551, 0.3250, 0.2076, 0.0493, 0.6947, 0.6893],
        [0.6384, 0.9303, 0.7343, 0.5234, 0.4354, 0.6819, 0.5594, 0.8233],
        [0.7888, 0.0869, 0.6986, 0.2195, 0.3711, 0.4964, 0.5877, 0.3718]])

### Reshaping

In [80]:
print(a.shape)
print(a)

torch.Size([3, 4])
tensor([[0.7599, 0.7629, 0.3551, 0.3250],
        [0.6384, 0.9303, 0.7343, 0.5234],
        [0.7888, 0.0869, 0.6986, 0.2195]])


In [81]:
a.reshape(6,2)  # no. of elements should match

tensor([[0.7599, 0.7629],
        [0.3551, 0.3250],
        [0.6384, 0.9303],
        [0.7343, 0.5234],
        [0.7888, 0.0869],
        [0.6986, 0.2195]])

### Flattening

In [82]:
a

tensor([[0.7599, 0.7629, 0.3551, 0.3250],
        [0.6384, 0.9303, 0.7343, 0.5234],
        [0.7888, 0.0869, 0.6986, 0.2195]])

In [83]:
a.view(-1)  # converting in row tensor

tensor([0.7599, 0.7629, 0.3551, 0.3250, 0.6384, 0.9303, 0.7343, 0.5234, 0.7888,
        0.0869, 0.6986, 0.2195])

In [84]:
# batch flattening
batch = 5
torch.rand((batch,2,3))\
.view((batch,-1))

tensor([[2.5108e-01, 7.2918e-01, 7.4709e-01, 5.9789e-01, 2.0356e-01, 1.2639e-01],
        [8.5224e-01, 4.7902e-01, 7.9157e-01, 3.6056e-01, 8.0428e-01, 2.3492e-01],
        [3.1007e-01, 6.3845e-01, 6.6712e-01, 8.0264e-02, 3.5299e-01, 3.6586e-01],
        [3.3430e-01, 6.1342e-02, 2.0774e-01, 6.5668e-01, 1.8418e-01, 4.5853e-01],
        [2.7627e-01, 8.3460e-01, 4.1640e-01, 3.6645e-04, 8.1767e-01, 9.6333e-01]])

# Advance math

## AutoGrad

Neural networks (NNs) are a collection of nested functions that are executed on some input data. These functions are defined by parameters (consisting of weights and biases), which in PyTorch are stored in tensors.

Training a NN happens in two steps:

Forward Propagation: In forward prop, the NN makes its best guess about the correct output. It runs the input data through each of its functions to make this guess.

Backward Propagation: In backprop, the NN adjusts its parameters proportionate to the error in its guess. It does this by traversing backwards from the output, collecting the derivatives of the error with respect to the parameters of the functions (gradients), and optimizing the parameters using gradient descent.

### Derivative

In [85]:
x = torch.tensor(5.0,
                 requires_grad=True)
print(x)

tensor(5., requires_grad=True)


In [87]:
f = x ** 2
print(f)

tensor(25., grad_fn=<PowBackward0>)


In [88]:
f.backward()

x.grad

tensor(10.)

$\frac{dy}{dx} = 2x$

$f'(x=5.0) = 2 * 5 = 10$

### Partial Derivative

In [89]:
x = torch.tensor(3.0, requires_grad=True)
y = torch.tensor(4.0, requires_grad=True)

In [90]:
f = x**2 + y**2  # function

In [91]:
f.backward()  # this is the step you must do before calculating derivative, idk why

In [92]:
f.grad_fn  # this gives what is the last operation done in function

<AddBackward0 at 0x1fc54218040>

In [93]:
x.grad  # partial derivative wrt x at x=5 and y=5 which will be 2x

tensor(6.)

In [94]:
y.grad  # partial derivative wrt y at x=5 and y=5 which will be 2y

tensor(8.)

In [95]:
x = torch.tensor(3.0, requires_grad=True)
y = torch.tensor(4.0, requires_grad=True)

In [96]:
f2 = x**2 * y**2

In [97]:
f2.backward()

In [98]:
f2.grad_fn

<MulBackward0 at 0x1fc54218310>

In [99]:
x.grad

tensor(96.)

In [100]:
y.grad

tensor(72.)

### Successive derivative

In [101]:
from torch.autograd import grad

def nth_derivative(f,wrt,n=2):  # f = function, wrt = with respect to
    # n = nth derivative, default 2
    
    for i in range(n):
        grads = grad(f, wrt, create_graph=True)[0]
        f = grads.sum()
    
    return grads

In [102]:
x

tensor(3., requires_grad=True)

In [103]:
f = x**2 + x**3

# double derivative
nth_derivative(f,x)

tensor(20., grad_fn=<AddBackward0>)

$f(x) = x^2 + x^3$

$f'(x) = 2x + 3x^2$

$f''(x) = 2 + 6x$

$f''(x=3) = 2 + 6(3)$

$f''(x=3) = 20$