# pytorch_basics

> This is the beginning of my learning sessions for PyTorch. 2023 Goals!

In [10]:
#| default_exp pytorch_basics

In [11]:
#| hide
from nbdev.showdoc import *

In [12]:
#| hide
import nbdev; nbdev.nbdev_export()

In [13]:
import torch

In [14]:
device = "cuda" if torch.cuda.is_available() else "cpu"
my_tensor = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype = torch.float32, device = device, requires_grad=True)

In [15]:
print(my_tensor)

tensor([[1., 2., 3.],
        [4., 5., 6.]], requires_grad=True)


In [16]:
print(my_tensor.dtype)

torch.float32


In [17]:
print(my_tensor.device)

cpu


In [18]:
print(my_tensor.shape)

torch.Size([2, 3])


In [19]:
print(my_tensor.requires_grad)

True


In [20]:
# Other common initialization methods

x = torch.empty(size = (3, 3))  # random values, unitialized data

In [21]:
x

tensor([[ 0.0000e+00,  8.5899e+09, -4.2382e+31],
        [ 8.5920e+09,  9.8091e-45,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00]])

In [22]:
x = torch.zeros((3, 3))
x

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [23]:
x = torch.rand((3, 3))  # uniform distribution (0, 1)
x

tensor([[0.6410, 0.8879, 0.9022],
        [0.6368, 0.0999, 0.1754],
        [0.4208, 0.4480, 0.3392]])

In [24]:
x = torch.ones((3, 3))
x

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [25]:
x = torch.eye(5) # I, eye
x

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [26]:
x = torch.arange(start=0, end=5, step = 1)
x

tensor([0, 1, 2, 3, 4])

In [27]:
x = torch.linspace(start = 0.1, end = 1, steps = 10)
x

tensor([0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000, 0.9000,
        1.0000])

In [28]:
x = torch.empty(size=(1, 5)).normal_(mean=0, std=1)
x

tensor([[ 1.7559, -0.2665,  0.2907,  0.0128, -0.6279]])

In [29]:
x = torch.empty(size=(1, 5)).uniform_(0, 1)
x

tensor([[0.4725, 0.9797, 0.1570, 0.3379, 0.5222]])

In [30]:
x = torch.diag(torch.ones(3))
x

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [31]:
tensor = torch.arange(4)

In [32]:
tensor

tensor([0, 1, 2, 3])

In [33]:
tensor.bool()

tensor([False,  True,  True,  True])

In [34]:
 tensor.dtype

torch.int64

In [35]:
tensor.short()

tensor([0, 1, 2, 3], dtype=torch.int16)

In [36]:
tensor.long() #int64, important

tensor([0, 1, 2, 3])

In [37]:
tensor.half()

tensor([0., 1., 2., 3.], dtype=torch.float16)

In [38]:
tensor.float() # float32 important

tensor([0., 1., 2., 3.])

In [39]:
tensor.double()

tensor([0., 1., 2., 3.], dtype=torch.float64)

In [40]:
import numpy as np
np_array = np.zeros((5, 5))
tensor = torch.from_numpy(np_array)
tensor

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]], dtype=torch.float64)

In [41]:
np_array_back = tensor.numpy()
np_array_back

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

## Tensor Math & Comparison Operations

In [42]:
x = torch.tensor([1, 2, 3])
y = torch.tensor([9, 8, 7])

### Addition

In [43]:
x + y

tensor([10, 10, 10])

In [44]:
# Addition

z1 = torch.empty(3)
torch.add(x, y, out=z1)
z1

tensor([10., 10., 10.])

In [45]:
z2 = torch.add(x, y)
z = x + y 

### Subtraction

In [46]:
z = x - y
z

tensor([-8, -6, -4])

### Division

In [47]:
z = torch.true_divide(x, y)
z

tensor([0.1111, 0.2500, 0.4286])

In [48]:
t = torch.zeros(3)
t.add_(x)
t

tensor([1., 2., 3.])

In [49]:
t = t+x
t

tensor([2., 4., 6.])

In [50]:
z = x.pow(2)
z

tensor([1, 4, 9])

In [51]:
x**2

tensor([1, 4, 9])

In [52]:
# Simple Comparison
z = x > 0
z

tensor([True, True, True])

In [53]:
z = x < 0
z

tensor([False, False, False])

In [54]:
# Matrix Mul
x1 = torch.rand((2, 5))
x2 = torch.rand((5, 3))
x3 = torch.mm(x1, x2) # 2X3

In [55]:
x3

tensor([[0.9657, 0.9245, 1.0498],
        [1.1735, 1.0772, 1.1241]])

In [56]:
x3 = x1.mm(x2)
x3

tensor([[0.9657, 0.9245, 1.0498],
        [1.1735, 1.0772, 1.1241]])

In [57]:
matrix_exp = torch.rand(5, 5)
print(matrix_exp.matrix_power(3))

tensor([[3.4061, 3.3285, 4.6868, 4.7045, 4.3597],
        [4.0044, 3.5835, 4.6551, 5.1733, 5.3702],
        [4.2547, 3.5539, 4.7585, 5.2007, 5.7601],
        [5.2401, 4.6679, 6.3958, 6.7681, 6.9491],
        [3.6504, 3.6664, 4.9608, 4.9669, 4.8504]])


In [58]:
# element wise multiply
z = x * y
z

tensor([ 9, 16, 21])

In [59]:
# dot product
z = torch.dot(x,y)
z

tensor(46)

In [60]:
# Batch Matrix Multiplication

batch = 32
n = 10
m = 20
p = 30

tensor1 = torch.rand((batch, n, m))
tensor2 = torch.rand((batch, m, p))
out_bmm = torch.bmm(tensor1, tensor2) # (batch, n, p)
# out_bmm


In [61]:
# Example of broadcasting

x1 = torch.rand((5, 5))
x2 = torch.rand((1, 5))

z = x1-x2
z

tensor([[ 0.0517, -0.0518,  0.0576,  0.4054,  0.2466],
        [ 0.0380,  0.2111,  0.0332,  0.0230, -0.0932],
        [ 0.0586,  0.5420, -0.3055,  0.3387,  0.5463],
        [ 0.4310,  0.1885, -0.0734, -0.2408, -0.3285],
        [ 0.7844,  0.1728, -0.5003,  0.1454, -0.1766]])

In [62]:
z = x1**x2
z

tensor([[0.9244, 0.6636, 0.8768, 0.9402, 0.8372],
        [0.9192, 0.8212, 0.8567, 0.7176, 0.6221],
        [0.9268, 0.9449, 0.5635, 0.9058, 0.9711],
        [0.9759, 0.8107, 0.7679, 0.5010, 0.3230],
        [0.9936, 0.8031, 0.3751, 0.7965, 0.5458]])

In [63]:
sum_x = torch.sum(x, dim=0)

In [64]:
sum_x

tensor(6)

In [65]:
x

tensor([1, 2, 3])

In [66]:
value, indices = torch.max(x, dim=0)  # x.max(dim=0)
value, indices

(tensor(3), tensor(2))

In [67]:
value, indices = torch.min(x, dim=0)
value, indices

(tensor(1), tensor(0))

In [68]:
abs_x = torch.abs(x)
abs_x

tensor([1, 2, 3])

In [69]:
z = torch.argmax(x, dim=0)
z

tensor(2)

In [70]:
z = torch.argmin(x, dim=0)
z

tensor(0)

In [71]:
mean_x = torch.mean(x, dim=0)
mean_x

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [72]:
mean_x = torch.mean(x.float(), dim=0)
mean_x

tensor(2.)

In [75]:
z = torch.eq(x, y)
x, y, z

(tensor([1, 2, 3]), tensor([9, 8, 7]), tensor([False, False, False]))

In [77]:
torch.eq(torch.tensor([1, 2, 3]), torch.tensor([1, 2, 3]))

tensor([True, True, True])

In [80]:
sorted_y, indices = torch.sort(y, dim=0, descending=False)
sorted_y, indices

(tensor([7, 8, 9]), tensor([2, 1, 0]))

In [82]:
z = torch.clamp(x, min=2, max=10)
z

tensor([2, 2, 3])

In [84]:
x = torch.tensor([1, 0, 1, 1, 1], dtype=torch.bool)
z = torch.any(x)
z

tensor(True)

In [86]:
z = torch.all(x)
z

tensor(False)

### Indexing in the tensor

In [87]:
batch_size = 10
features = 25
x = torch.rand((batch_size, features))

In [90]:
x[0].shape


torch.Size([25])

In [92]:
x[0,:]

tensor([5.0281e-02, 8.0261e-01, 4.5414e-01, 7.8423e-01, 5.4225e-01, 8.7366e-03,
        4.7087e-01, 7.5007e-01, 1.1418e-01, 2.0105e-04, 9.3874e-01, 4.5055e-01,
        4.3165e-01, 4.1808e-01, 6.4711e-01, 3.1144e-01, 1.5807e-01, 6.2771e-01,
        3.7119e-01, 5.6732e-01, 1.2717e-01, 3.3985e-01, 4.8032e-01, 2.8782e-01,
        8.5632e-02])

In [93]:
x[:, 0]

tensor([0.0503, 0.5784, 0.5308, 0.7473, 0.8517, 0.0040, 0.9692, 0.6234, 0.0279,
        0.5623])

In [94]:
x[2, :10]  # third example first 10 elements

tensor([0.5308, 0.2278, 0.0590, 0.2630, 0.2816, 0.3264, 0.3253, 0.1864, 0.4376,
        0.1643])

In [95]:
x[0, 0] = 100

In [97]:
x = torch.arange(10)
indices = [2, 5, 8]

In [98]:
x[indices]

tensor([2, 5, 8])

In [102]:
x = torch.rand((3, 5))
rows = torch.tensor([1, 0])
cols = torch.tensor([4, 0])
x[rows, cols].shape

torch.Size([2])

In [103]:
x = torch.arange(10)
x[(x < 2) | (x > 8)]

tensor([0, 1, 9])

In [104]:
x[(x < 2) & (x > 8)]

tensor([], dtype=torch.int64)

In [105]:
x[x.remainder(2) == 0]

tensor([0, 2, 4, 6, 8])

In [106]:
torch.where(x > 5, x, x * 2)

tensor([ 0,  2,  4,  6,  8, 10,  6,  7,  8,  9])

In [107]:
torch.tensor([0, 0, 1, 1, 2, 3, 4, 5, 5]).unique()

tensor([0, 1, 2, 3, 4, 5])

In [108]:
x.ndimension()

1

### Tensor Reshaping

In [109]:
x = torch.arange(9)

In [110]:
x_3x3 = x.view(3, 3)  # works on contiguous arrays

In [111]:
x_3x3

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [112]:
x_3x3 = x.reshape(3, 3)
x_3x3

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [113]:
y = x_3x3.t()

In [114]:
y

tensor([[0, 3, 6],
        [1, 4, 7],
        [2, 5, 8]])

In [115]:
y.view(9) # because after transpose, the memory does not stay contiguous. Reshape always works, but where view also works - it could be not that performant.

RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.

In [116]:
y.reshape(9)

tensor([0, 3, 6, 1, 4, 7, 2, 5, 8])

In [117]:
y.contiguous().view(9)

tensor([0, 3, 6, 1, 4, 7, 2, 5, 8])

In [118]:
x1 = torch.rand((2, 5))
x2 = torch.rand((2, 5))

torch.cat((x1, x2), dim = 0).shape

torch.Size([4, 5])

In [119]:
torch.cat((x1, x2), dim = 1).shape

torch.Size([2, 10])

In [122]:
z = x1.view(-1)
z.shape

torch.Size([10])

In [123]:
batch = 64
x = torch.rand(batch, 2, 5, 5)
z = x.view(batch, -1)
z.shape

torch.Size([64, 50])

In [124]:
# How to switch the axis

x = torch.rand((batch, 2, 5))
z = x.permute(0, 2, 1)
z.shape

torch.Size([64, 5, 2])

In [127]:
x = torch.arange(10)
x.shape

torch.Size([10])

In [126]:
x.unsqueeze(0).shape # (1-dimensional to 2-dimensional)

torch.Size([1, 10])

In [128]:
x.unsqueeze(1).shape # (1-dimensional to 2-dimensional)

torch.Size([10, 1])

In [130]:
x = torch.arange(10).unsqueeze(0).unsqueeze(1) # 1x1x10
x.shape 

torch.Size([1, 1, 10])

In [131]:
z = x.squeeze(1)
z.shape

torch.Size([1, 10])