# PyTorch tutorial - basics

[Video](https://www.youtube.com/playlist?app=desktop&list=PLhhyoLH6IjfxeoooqP9rhU3HJIAVAJ3Vz)

[GitHub](https://github.com/aladdinpersson/Machine-Learning-Collection)

In [42]:
import torch

print(torch.__version__)  # get PyTorch version

1.7.1+cu101


In [43]:
# Choose device to run on
device = "cuda" if torch.cuda.is_available() else "cpu"

# Define tensor
my_tensor = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32,
                         device=device, requires_grad=True)
print(my_tensor)
print(my_tensor.dtype)
print(my_tensor.device)
print(my_tensor.shape)
print(my_tensor.requires_grad)

tensor([[1., 2., 3.],
        [4., 5., 6.]], requires_grad=True)
torch.float32
cpu
torch.Size([2, 3])
True


In [44]:
# Other common initialization methods
x1 = torch.empty(size=(3, 3))  # uninitialized non-zero data
print(x1)
x2 = torch.zeros(size=(3, 3))  # init by zeros
print(x2)
x3 = torch.rand(size=(3, 3))  # init by random uniform distribution [0, 1)
print(x3)
x4 = torch.empty(size=(1, 5)).uniform_(0, 1)  # the same as torch.rand
print(x4)
x5 = torch.ones(size=(3, 3))  # init by ones
print(x5)
x6 = torch.eye(3, 4)  # identiry matrix
print(x6)
x7 = torch.arange(start=1, end=6, step=2)  # get range
print(x7)
x8 = torch.linspace(start=0.1, end=1, steps=5)  # get linear distribution
print(x8)
x9 = torch.normal(mean=0, std=1, size=(1, 5))  # normal random distribution
print(x9)
x10 = torch.empty(size=(1, 5)).normal_(mean=0, std=1)  # the same as torch.normal
print(x10)
x11 = torch.diag(torch.arange(1, 4))  # diagonal matrix
print(x11)

tensor([[-1.8307e+30,  3.0906e-41,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[0.0979, 0.9953, 0.4948],
        [0.9748, 0.1958, 0.7254],
        [0.6927, 0.0022, 0.6694]])
tensor([[0.3619, 0.5125, 0.7839, 0.5680, 0.9033]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.]])
tensor([1, 3, 5])
tensor([0.1000, 0.3250, 0.5500, 0.7750, 1.0000])
tensor([[ 0.8910, -0.5620,  0.6136,  0.3580, -0.3728]])
tensor([[-1.8806,  0.4408,  0.3450,  0.2282, -1.1250]])
tensor([[1, 0, 0],
        [0, 2, 0],
        [0, 0, 3]])


In [45]:
# How to initialize and convert tensors to other types
t = torch.arange(4)
print(t)
print(t.bool())  # conver to bool
print(t.short())
print(t.long())
print(t.float())
print(t.half())  # useful to train on older GPU
print(t.double())

tensor([0, 1, 2, 3])
tensor([False,  True,  True,  True])
tensor([0, 1, 2, 3], dtype=torch.int16)
tensor([0, 1, 2, 3])
tensor([0., 1., 2., 3.])
tensor([0., 1., 2., 3.], dtype=torch.float16)
tensor([0., 1., 2., 3.], dtype=torch.float64)


In [46]:
# Array to tensor conversion and vice-versa
import numpy as np

n = np.arange(4)
print(n)
t = torch.from_numpy(n)
print(t)
n_back = t.numpy()
print(n_back)

[0 1 2 3]
tensor([0, 1, 2, 3])
[0 1 2 3]


In [47]:
# Tensor math and comparison operations
x = torch.tensor([1, 2, 3])
y = torch.tensor([9, 8, 7])

z1 = x + y
print(z1)

z2 = torch.empty(x.shape)
torch.add(x, y, out=z2)
print(z2)

z3 = torch.add(x, y)
print(z3)

tensor([10, 10, 10])
tensor([10., 10., 10.])
tensor([10, 10, 10])


In [48]:
print(x - y)
print(torch.sub(x, y))

print(x/y)
print(torch.true_divide(x, y))

print(x/2)
print(torch.true_divide(x, 2))

# Element wise multiplication
print(x * y)

tensor([-8, -6, -4])
tensor([-8, -6, -4])
tensor([0.1111, 0.2500, 0.4286])
tensor([0.1111, 0.2500, 0.4286])
tensor([0.5000, 1.0000, 1.5000])
tensor([0.5000, 1.0000, 1.5000])
tensor([ 9, 16, 21])


In [49]:
# Inplace operations. All operations followed by "_".
t = torch.zeros(3)
t.add_(x)
print(t)
t += x  # inplace
#t = t + x  # not inplace, create copy first
print(t)

tensor([1., 2., 3.])
tensor([2., 4., 6.])


In [50]:
# Exponentiation
print(x.pow(2))
print(x**2)

# Simple comparison
print(x > 1)
print(x <= 2)

tensor([1, 4, 9])
tensor([1, 4, 9])
tensor([False,  True,  True])
tensor([ True,  True, False])


In [51]:
# Matrix multiplication
x1 = torch.rand((2, 5))
x2 = torch.rand((5, 3))

x3 = torch.mm(x1, x2)  # 2x3 shape
print(x3)
print(x1.mm(x2))

tensor([[0.8915, 0.9073, 0.7461],
        [2.1568, 2.1722, 1.4329]])
tensor([[0.8915, 0.9073, 0.7461],
        [2.1568, 2.1722, 1.4329]])


In [52]:
# Matrix exponentiation
x4 = torch.rand((3, 3))
print(x4.matrix_power(3))
print(x4.size(0))

tensor([[0.6179, 0.2030, 0.1900],
        [0.8602, 0.7680, 0.6012],
        [0.4138, 0.3906, 0.3039]])
3


In [53]:
# dot product
print(torch.dot(x, y))
print(torch.sum(x * y))

tensor(46)
tensor(46)


In [54]:
# Batch matrix multiplication
batch = 32
n = 10
m = 20
p = 30

t1 = torch.rand((batch, n, m))
t2 = torch.rand((batch, m, p))
out_bmm = torch.bmm(t1, t2)
print(out_bmm.shape)  # (batch, n, p)

torch.Size([32, 10, 30])


In [55]:
# Example of Broadcasting
x1 = torch.ones((5, 5))
x2 = torch.rand((1, 5))
print(x1 - x2)
print(x2 - x1)
print(x2 ** x1)
print(x1 ** x2)

tensor([[0.1166, 0.5048, 0.3982, 0.7542, 0.2776],
        [0.1166, 0.5048, 0.3982, 0.7542, 0.2776],
        [0.1166, 0.5048, 0.3982, 0.7542, 0.2776],
        [0.1166, 0.5048, 0.3982, 0.7542, 0.2776],
        [0.1166, 0.5048, 0.3982, 0.7542, 0.2776]])
tensor([[-0.1166, -0.5048, -0.3982, -0.7542, -0.2776],
        [-0.1166, -0.5048, -0.3982, -0.7542, -0.2776],
        [-0.1166, -0.5048, -0.3982, -0.7542, -0.2776],
        [-0.1166, -0.5048, -0.3982, -0.7542, -0.2776],
        [-0.1166, -0.5048, -0.3982, -0.7542, -0.2776]])
tensor([[0.8834, 0.4952, 0.6018, 0.2458, 0.7224],
        [0.8834, 0.4952, 0.6018, 0.2458, 0.7224],
        [0.8834, 0.4952, 0.6018, 0.2458, 0.7224],
        [0.8834, 0.4952, 0.6018, 0.2458, 0.7224],
        [0.8834, 0.4952, 0.6018, 0.2458, 0.7224]])
tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])


In [56]:
# Other useful tensor operations
s1 = t1.sum(dim=0)
print(s1.shape)
s2 = t1.sum(dim=1)
print(s2.shape)

torch.Size([10, 20])
torch.Size([32, 20])


In [57]:
t = torch.rand((3, 3))

values, indices = torch.max(t, dim=0)
print(t)
print()
print('values =', values)
print('indices =', indices)

values, indices = torch.max(t, dim=1)
print()
print('values =', values)
print('indices =', indices)

values, indices = torch.min(t, dim=0)
print()
print('values =', values)
print('indices =', indices)

tensor([[0.5569, 0.1972, 0.5379],
        [0.0478, 0.2776, 0.8224],
        [0.1490, 0.8188, 0.5611]])

values = tensor([0.5569, 0.8188, 0.8224])
indices = tensor([0, 2, 1])

values = tensor([0.5569, 0.8224, 0.8188])
indices = tensor([0, 2, 1])

values = tensor([0.0478, 0.1972, 0.5379])
indices = tensor([1, 0, 0])


In [58]:
norm = torch.normal(mean=0, std=1, size=(1, 5))
print(norm)
print(torch.abs(norm))  # absolute values
print(torch.argmax(norm, dim=1))  # index of max value
print(torch.argmin(norm, dim=1))  # index of min value

tensor([[ 0.9182,  0.4683,  0.7068, -0.5870,  2.4510]])
tensor([[0.9182, 0.4683, 0.7068, 0.5870, 2.4510]])
tensor([4])
tensor([3])


In [59]:
mean = torch.mean(norm.float(), dim=1)
print(mean)

tensor([0.7915])


In [60]:
# Compare two tensors
a = torch.tensor([1, 2, 3])
b = torch.tensor([1, 2, 4])
print(torch.eq(a, b))

tensor([ True,  True, False])


In [61]:
# Sorting
v, i = norm.sort(dim=1, descending=False)
print('values =', v)
print('indices =', i)

v, i = norm.sort(dim=1, descending=True)
print()
print('values =', v)
print('indices =', i)

values = tensor([[-0.5870,  0.4683,  0.7068,  0.9182,  2.4510]])
indices = tensor([[3, 1, 2, 0, 4]])

values = tensor([[ 2.4510,  0.9182,  0.7068,  0.4683, -0.5870]])
indices = tensor([[4, 0, 2, 1, 3]])


In [62]:
c = torch.tensor([1, 2, 3, 4, 5, 6, 7])
print(c.clamp(min=3))
print(torch.clamp(c, max=5))
print(torch.clamp(c, min=3, max=5))

tensor([3, 3, 3, 4, 5, 6, 7])
tensor([1, 2, 3, 4, 5, 5, 5])
tensor([3, 3, 3, 4, 5, 5, 5])


In [63]:
# Boolean
a = torch.tensor([1, 0, 1, 1, 1], dtype=torch.bool)
print(a.any())  # Any values is True
print(a.all())  # All values are True

tensor(True)
tensor(False)


In [64]:
# Indexing
batch_size = 10
features = 25
x = torch.rand((batch_size, features))
print(x[0].shape)  # x[0, :]
print(x[:, 0].shape)
x[2, 0] = 3
print(x[2, 0:5])

torch.Size([25])
torch.Size([10])
tensor([3.0000, 0.0726, 0.5314, 0.5512, 0.5918])


In [65]:
# Fancy indexing
x = torch.arange(start=2, end=12)
indices = [2, 5, 8]
print(x[indices])

tensor([ 4,  7, 10])


In [66]:
x = torch.rand((3, 5))
rows = torch.tensor([1, 2])
cols = torch.tensor([4, 0])

print(x)
print(x[rows, cols])

tensor([[0.5910, 0.7710, 0.9252, 0.8779, 0.7468],
        [0.6535, 0.6709, 0.2584, 0.7095, 0.7978],
        [0.4410, 0.9899, 0.1236, 0.3846, 0.6510]])
tensor([0.7978, 0.4410])


In [67]:
# More advanced indexing
x = torch.arange(10)
print(x[(x < 2) | (x > 8)])
print(x[(x < 2) & (x > 8)])
print(x[x.remainder(2) == 0])  # get all even numbers

tensor([0, 1, 9])
tensor([], dtype=torch.int64)
tensor([0, 2, 4, 6, 8])


In [68]:
# Useful operations
print(torch.where(x > 5, x, x**2))

tensor([ 0,  1,  4,  9, 16, 25,  6,  7,  8,  9])


In [69]:
# Get unique values
t = torch.tensor([0, 0, 1, 1, 2, 3, 3, 3, 4])
print(t.unique())

tensor([0, 1, 2, 3, 4])


In [70]:
d3 = torch.zeros((2, 3, 4))
print(d3.ndimension())  # number of dimensions

3


In [71]:
print(d3.numel())  # count the number of elements

24


In [72]:
x = torch.arange(12)
x_3x4 = x.view(3, 4)
print(x_3x4)            # faster - contiguous in memory
print(x.reshape(3, 4))  # safer - makes copy if necessary

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])


In [73]:
y = x_3x4.t()
print(y)
#print(y.view(2, 6))  # error - not contiguous in memory
print(y.contiguous().view(2, 6))

tensor([[ 0,  4,  8],
        [ 1,  5,  9],
        [ 2,  6, 10],
        [ 3,  7, 11]])
tensor([[ 0,  4,  8,  1,  5,  9],
        [ 2,  6, 10,  3,  7, 11]])


In [74]:
x1 = torch.rand((2, 3))
x2 = torch.rand((2, 3))

print(torch.cat((x1, x2), dim=0))
print(torch.cat((x1, x2), dim=1))

tensor([[0.4174, 0.7489, 0.3582],
        [0.0237, 0.5542, 0.5416],
        [0.6618, 0.0204, 0.7045],
        [0.3342, 0.0436, 0.1071]])
tensor([[0.4174, 0.7489, 0.3582, 0.6618, 0.0204, 0.7045],
        [0.0237, 0.5542, 0.5416, 0.3342, 0.0436, 0.1071]])


In [75]:
# Unroll
print(y.contiguous().view(-1))
print(y.reshape(-1))

tensor([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])
tensor([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])


In [76]:
batch = 64
x = torch.rand(batch, 3, 5)
print(x.view(batch, -1).shape)
print(x.reshape(batch, -1).shape)

print()
print(x.shape)
print(x.permute(0, 2, 1).shape)  # permute dimensions

torch.Size([64, 15])
torch.Size([64, 15])

torch.Size([64, 3, 5])
torch.Size([64, 5, 3])


In [77]:
print(x.unsqueeze(-1).shape)
print(x.unsqueeze(0).shape)

torch.Size([64, 3, 5, 1])
torch.Size([1, 64, 3, 5])


In [78]:
x = torch.arange(10).unsqueeze(0).unsqueeze(-1)
print(x.shape)
print(x.squeeze(-1).shape)
print(x.squeeze().shape)

torch.Size([1, 10, 1])
torch.Size([1, 10])
torch.Size([10])
