## Pytorch Tensors

In [1]:
import torch

In [2]:
python_list = [1, 2, 3, 4, 5]
tensor = torch.tensor(python_list)
print(tensor)

tensor([1, 2, 3, 4, 5])


In [3]:
python_list_2d = [[1, 2, 3], [4, 5, 6]]
tensor_2d = torch.tensor(python_list_2d)
print(tensor_2d)

tensor([[1, 2, 3],
        [4, 5, 6]])


In [7]:
# print(dir(tensor))
print(tensor.shape)
print(tensor.dtype)
print(tensor.device)
print(tensor.size())

print('---')
print(tensor_2d.shape)
print(tensor_2d.dtype)
print(tensor_2d.device)
print(tensor_2d.size())

torch.Size([5])
torch.int64
cpu
torch.Size([5])
---
torch.Size([2, 3])
torch.int64
cpu
torch.Size([2, 3])


In [8]:
torch_tensor = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32, device='cpu')
print(torch_tensor)

tensor([1., 2., 3., 4., 5.])


In [9]:
torch_size = torch.Size([3, 4, 5])
print(torch_size)

torch.Size([3, 4, 5])


## Computations

In [70]:
X = torch.tensor([[1.0, 4.22323, 7.0], [-2.0, 3.0, 6.0]])
X

tensor([[ 1.0000,  4.2232,  7.0000],
        [-2.0000,  3.0000,  6.0000]])

In [71]:
print(f"abs values:\n{torch.abs(X)} \n")
print(f"max value:\n{torch.max(X)} \n")
print(f"min value:\n{torch.min(X)} \n")
print(f"mean value:\n{torch.mean(X)} \n")
print(f"sum value:\n{torch.sum(X)} \n")
print(f"standard deviation:\n{torch.std(X)} \n")
print(f"variance:\n{torch.var(X)} \n")
print(f"argmax:\n{torch.argmax(X)} \n")
print(f"argmin:\n{torch.argmin(X)} \n")
print(f"Cos val:\n{torch.cos(X)} \n")
print(f"Sin val:\n{torch.sin(X)} \n")
print(f"Tan val:\n{torch.tan(X)} \n")
print(f"log val:\n{torch.log(X)} \n")
print(f"square val:\n{torch.square(X)} \n")
print(f"prod val:\n{torch.prod(X)} \n")
print(f"round val:\n{torch.round(X)} \n")
print(f"sgn val:\n{torch.sign(X)} \n")
print(f"sqrt val:\n{torch.sqrt(X)} \n")
print(f"exp val:\n{torch.exp(X)} \n")

abs values:
tensor([[1.0000, 4.2232, 7.0000],
        [2.0000, 3.0000, 6.0000]]) 

max value:
7.0 

min value:
-2.0 

mean value:
3.203871726989746 

sum value:
19.223230361938477 

standard deviation:
3.324061155319214 

variance:
11.049382209777832 

argmax:
2 

argmin:
3 

Cos val:
tensor([[ 0.5403, -0.4699,  0.7539],
        [-0.4161, -0.9900,  0.9602]]) 

Sin val:
tensor([[ 0.8415, -0.8827,  0.6570],
        [-0.9093,  0.1411, -0.2794]]) 

Tan val:
tensor([[ 1.5574,  1.8786,  0.8714],
        [ 2.1850, -0.1425, -0.2910]]) 

log val:
tensor([[0.0000, 1.4406, 1.9459],
        [   nan, 1.0986, 1.7918]]) 

square val:
tensor([[ 1.0000, 17.8357, 49.0000],
        [ 4.0000,  9.0000, 36.0000]]) 

prod val:
-1064.25390625 

round val:
tensor([[ 1.,  4.,  7.],
        [-2.,  3.,  6.]]) 

sgn val:
tensor([[ 1.,  1.,  1.],
        [-1.,  1.,  1.]]) 

sqrt val:
tensor([[1.0000, 2.0550, 2.6458],
        [   nan, 1.7321, 2.4495]]) 

exp val:
tensor([[2.7183e+00, 6.8254e+01, 1.0966e+03],
       

In [15]:
# for row wise operations
print(f"actual tensor:\n{X} \n ")
print(f"row wise max:\n{torch.max(X, dim=1)} \n")
print(f"row wise min:\n{torch.min(X, dim=1)} \n")
print(f"row wise mean:\n{torch.mean(X, dim=1)} \n")
print(f"row wise sum:\n{torch.sum(X, dim=1)} \n")  
print(f"row wise std:\n{torch.std(X, dim=1)} \n")
print(f"row wise var:\n{torch.var(X, dim=1)} \n")
# for column wise operations
print(f"column wise max:\n{torch.max(X, dim=0)} \n")
print(f"column wise min:\n{torch.min(X, dim=0)} \n")
print(f"column wise mean:\n{torch.mean(X, dim=0)} \n")
print(f"column wise sum:\n{torch.sum(X, dim=0)} \n")  
print(f"column wise std:\n{torch.std(X, dim=0)} \n")
print(f"column wise var:\n{torch.var(X, dim=0)} \n")

actual tensor:
tensor([[ 1.,  4.,  7.],
        [-2.,  3.,  6.]]) 
 
row wise max:
torch.return_types.max(
values=tensor([7., 6.]),
indices=tensor([2, 2])) 

row wise min:
torch.return_types.min(
values=tensor([ 1., -2.]),
indices=tensor([0, 0])) 

row wise mean:
tensor([4.0000, 2.3333]) 

row wise sum:
tensor([12.,  7.]) 

row wise std:
tensor([3.0000, 4.0415]) 

row wise var:
tensor([ 9.0000, 16.3333]) 

column wise max:
torch.return_types.max(
values=tensor([1., 4., 7.]),
indices=tensor([0, 0, 0])) 

column wise min:
torch.return_types.min(
values=tensor([-2.,  3.,  6.]),
indices=tensor([1, 1, 1])) 

column wise mean:
tensor([-0.5000,  3.5000,  6.5000]) 

column wise sum:
tensor([-1.,  7., 13.]) 

column wise std:
tensor([2.1213, 0.7071, 0.7071]) 

column wise var:
tensor([4.5000, 0.5000, 0.5000]) 



## numpy <--> Tensor

In [16]:
import numpy as np

In [17]:
X.numpy()

array([[ 1.,  4.,  7.],
       [-2.,  3.,  6.]], dtype=float32)

In [18]:
torch.tensor(np.array([1, 2, 3, 4, 5]))

tensor([1, 2, 3, 4, 5])

In [29]:
X = torch.rand(3, 3)
print(X)

tensor([[0.3982, 0.5601, 0.8153],
        [0.2624, 0.8761, 0.0280],
        [0.1548, 0.3135, 0.8533]])


In [30]:
X[:, 0]  # first column

tensor([0.3982, 0.2624, 0.1548])

In [31]:
X[:, 1] = -99
X

tensor([[ 3.9816e-01, -9.9000e+01,  8.1527e-01],
        [ 2.6244e-01, -9.9000e+01,  2.7971e-02],
        [ 1.5478e-01, -9.9000e+01,  8.5327e-01]])

## Pytorch activation functions

In [32]:
print(f'relu activation:\n{torch.relu(X)}   \n')
print(f'sigmoid activation:\n{torch.sigmoid(X)}   \n')
print(f'tanh activation:\n{torch.tanh(X)}   \n')
print(f'softmax activation:\n{torch.softmax(X, dim=1)}   \n')

relu activation:
tensor([[0.3982, 0.0000, 0.8153],
        [0.2624, 0.0000, 0.0280],
        [0.1548, 0.0000, 0.8533]])   

sigmoid activation:
tensor([[0.5982, 0.0000, 0.6932],
        [0.5652, 0.0000, 0.5070],
        [0.5386, 0.0000, 0.7013]])   

tanh activation:
tensor([[ 0.3784, -1.0000,  0.6725],
        [ 0.2566, -1.0000,  0.0280],
        [ 0.1536, -1.0000,  0.6928]])   

softmax activation:
tensor([[3.9721e-01, 2.6625e-44, 6.0279e-01],
        [5.5835e-01, 4.3440e-44, 4.4165e-01],
        [3.3215e-01, 2.9427e-44, 6.6785e-01]])   



In [33]:
transposed_X = X.T
transposed_X

tensor([[ 3.9816e-01,  2.6244e-01,  1.5478e-01],
        [-9.9000e+01, -9.9000e+01, -9.9000e+01],
        [ 8.1527e-01,  2.7971e-02,  8.5327e-01]])

In [34]:
print(f"addition:\n{X + 10}   \n")
print(f"subtraction:\n{X - 10}   \n")
print(f"multiplication:\n{X * 10}   \n")
print(f"division:\n{X / 10}   \n")
print(f"exponentiation:\n{X ** 2}   \n")
print(f"matrix addition:\n{X + transposed_X}   \n")
print(f"matrix element-wise multiplication:\n{X * transposed_X}   \n")
print(f"matrix multiplication:\n{X @ transposed_X}   \n")

addition:
tensor([[ 10.3982, -89.0000,  10.8153],
        [ 10.2624, -89.0000,  10.0280],
        [ 10.1548, -89.0000,  10.8533]])   

subtraction:
tensor([[  -9.6018, -109.0000,   -9.1847],
        [  -9.7376, -109.0000,   -9.9720],
        [  -9.8452, -109.0000,   -9.1467]])   

multiplication:
tensor([[ 3.9816e+00, -9.9000e+02,  8.1527e+00],
        [ 2.6244e+00, -9.9000e+02,  2.7971e-01],
        [ 1.5478e+00, -9.9000e+02,  8.5327e+00]])   

division:
tensor([[ 3.9816e-02, -9.9000e+00,  8.1527e-02],
        [ 2.6244e-02, -9.9000e+00,  2.7971e-03],
        [ 1.5478e-02, -9.9000e+00,  8.5327e-02]])   

exponentiation:
tensor([[1.5853e-01, 9.8010e+03, 6.6466e-01],
        [6.8874e-02, 9.8010e+03, 7.8239e-04],
        [2.3956e-02, 9.8010e+03, 7.2806e-01]])   

matrix addition:
tensor([[   0.7963,  -98.7376,    0.9700],
        [ -98.7376, -198.0000,  -98.9720],
        [   0.9700,  -98.9720,    1.7065]])   

matrix element-wise multiplication:
tensor([[ 1.5853e-01, -2.5981e+01,  1.2618

## Autograd
PyTorch comes with an efficient implementation of reverse-mode auto-differentiation called autograd, which stands for automatic gradients. It is quite easy to use. For example, consider a simple function, f(x) = x^2. Differential calculus tells us that the derivative of this function is fâ€™(x) = 2x. If we evaluate f(5) and f'(5), we get 25 and 10, respectively. 

In [46]:
polynomial = torch.tensor([2, 0, 3], requires_grad=True, dtype=torch.float32)  # Represents 2 + 0*x + 3*x^2
f = polynomial ** 2  # f(x) = (2 + 0*x + 3*x^2)^2
f

tensor([4., 0., 9.], grad_fn=<PowBackward0>)

In [47]:
loss = f.sum()
print(loss)
loss.backward()
print(polynomial.grad)

tensor(13., grad_fn=<SumBackward0>)
tensor([4., 0., 6.])


In [None]:
# df/dx = 2*(2 + 0*x + 3*x^2) * (0 + 6*x) evaluated at x=0 => 2*2*0 = 0

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [41]:
x = torch.tensor(5.0, requires_grad=True)
f = x ** 2
f

tensor(25., grad_fn=<PowBackward0>)

In [42]:
f.backward()

In [43]:
x.grad

tensor(10.)

## polynomial equations and gradients

In [55]:
coeffs = torch.tensor([2.0, 0.0, 3.0, 1.0], requires_grad=True)
coeffs

tensor([2., 0., 3., 1.], requires_grad=True)

In [61]:
x = torch.tensor(2.0)
f = coeffs[0] + coeffs[1] * x + coeffs[2] * x ** 2 + coeffs[3] * x ** 3
f

tensor(22., grad_fn=<AddBackward0>)

In [62]:
f.backward()

In [58]:
coeffs.grad

tensor([1., 2., 4., 8.])

In [67]:
coeffs[0].item()

2.0

In [None]:
learning_rate = 0.1
x = torch.tensor(5.0, requires_grad=True)
for iteration in range(100):
    f = x ** 3  # forward pass
    print(f"Iteration {iteration}: f(x) = {f.item():.4f}, x = {x.item():.4f}")
    f.backward()  # backward pass
    print(f"x.grad: {x.grad}\n") # 3*x**2
    with torch.no_grad():
        x -= learning_rate * x.grad  # gradient descent step
        print(x) # reset the gradients

    x.grad.zero_()  # reset the gradients
    print(f"x.grad zeroed: {x.grad}\n")

Iteration 0: f(x) = 125.0000, x = 5.0000
x.grad: 75.0

tensor(-2.5000, requires_grad=True)
x.grad zeroed: 0.0

Iteration 1: f(x) = -15.6250, x = -2.5000
x.grad: 18.75

tensor(-4.3750, requires_grad=True)
x.grad zeroed: 0.0

Iteration 2: f(x) = -83.7402, x = -4.3750
x.grad: 57.421875

tensor(-10.1172, requires_grad=True)
x.grad zeroed: 0.0

Iteration 3: f(x) = -1035.5698, x = -10.1172
x.grad: 307.07244873046875

tensor(-40.8244, requires_grad=True)
x.grad zeroed: 0.0

Iteration 4: f(x) = -68039.3984, x = -40.8244
x.grad: 4999.90283203125

tensor(-540.8147, requires_grad=True)
x.grad zeroed: 0.0

Iteration 5: f(x) = -158177776.0000, x = -540.8147
x.grad: 877441.625

tensor(-88284.9766, requires_grad=True)
x.grad zeroed: 0.0

Iteration 6: f(x) = -688114024579072.0000, x = -88284.9766
x.grad: 23382710272.0

tensor(-2.3384e+09, requires_grad=True)
x.grad zeroed: 0.0

Iteration 7: f(x) = -12785971354604844136253095936.0000, x = -2338359296.0000
x.grad: 1.6403772304464544e+19

tensor(-1.6404e

In [64]:
x

tensor(1.0185e-09, requires_grad=True)