# Basics of PyTorch

## Import the dependencies

In [1]:
import random
import numpy as np
import torch

## Set the seed for reproducibility

In [2]:
def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [3]:
set_seed(31415)

## Elementary algebra

In [4]:
A=1; B=2.5
print(A+B, A-B, A*B, A/B)

3.5 -1.5 2.5 0.4


## Constants

In [5]:
1j, np.e, np.exp(1), np.pi, np.inf

(1j, 2.718281828459045, 2.718281828459045, 3.141592653589793, inf)

## Prohibited epressions

In [6]:
expressions = ['1/0','-1/0','0/0']
for e in expressions:
    try:
        eval(e)    
    except ZeroDivisionError:
        pass
    else:
        raise NotImplementedError('Should not be here!')

## Vectors and matrices in PyTorch

In [7]:
torch.tensor([1,2,3])  # row vector

tensor([1, 2, 3])

In [8]:
torch.tensor([[1],[2],[3]])  # column vector

tensor([[1],
        [2],
        [3]])

In [9]:
torch.tensor([1,2,3]).reshape(3,1)  # column vector from the row vector

tensor([[1],
        [2],
        [3]])

In [10]:
torch.tensor([1,2,3]).reshape(3,-1)  # exectly the same as above, 
                                     # but the last dimension is infered

tensor([[1],
        [2],
        [3]])

In [11]:
torch.tensor([[1,2],[3,4]])  # a matrix

tensor([[1, 2],
        [3, 4]])

In [12]:
torch.arange(1,3)  # a range, values on the interval [1,3)

tensor([1, 2])

In [13]:
torch.arange(1,11,step=3)  # a range, but with a step size 

tensor([ 1,  4,  7, 10])

In [14]:
torch.arange(1,-10,step=-3)  # a reverse range

tensor([ 1, -2, -5, -8])

In [15]:
torch.rand(size=(3,2))   # a random matix

tensor([[0.3296, 0.2113],
        [0.8446, 0.9235],
        [0.5002, 0.4071]])

In [16]:
torch.ones(size=(3, 2))   # a matrix of all ones

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])

In [17]:
torch.zeros(size=(3, 2))   # a matrix of all zeros

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [18]:
torch.eye(3)  # identity matrix

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [19]:
torch.eye(4,2)  # non-square "identity"

tensor([[1., 0.],
        [0., 1.],
        [0., 0.],
        [0., 0.]])

In [20]:
torch.rand(3)   # a random vector

tensor([0.5099, 0.4392, 0.7209])

In [21]:
torch.ones(3)   # a vector of ones

tensor([1., 1., 1.])

In [22]:
torch.zeros(3)  # a vector of zeros

tensor([0., 0., 0.])

In [23]:
shape = (3,2,3)  # shape of a tensor
A = torch.arange(np.prod(shape)).reshape(shape) # an order-3 tensor of shape (3,2,3)
A

tensor([[[ 0,  1,  2],
         [ 3,  4,  5]],

        [[ 6,  7,  8],
         [ 9, 10, 11]],

        [[12, 13, 14],
         [15, 16, 17]]])

In [24]:
A[0,:,:]  # a slice of the tensor above

tensor([[0, 1, 2],
        [3, 4, 5]])

In [25]:
A[:,0,:]

tensor([[ 0,  1,  2],
        [ 6,  7,  8],
        [12, 13, 14]])

In [26]:
A[:,:,0]

tensor([[ 0,  3],
        [ 6,  9],
        [12, 15]])

## Matrix operations

In [27]:
A = torch.rand((3,2)); B = torch.rand((2,4))

In [28]:
A @ B  # matrix multiplication

tensor([[0.0908, 0.1058, 0.8519, 0.7139],
        [0.0722, 0.1007, 0.6356, 0.5250],
        [0.1219, 0.1782, 1.0526, 0.8653]])

In [29]:
shape = (2,2);
A = torch.arange(0, np.prod(shape)).reshape(shape).type(torch.DoubleTensor)
B = (3+torch.arange(np.prod(shape))).reshape(shape).type(torch.DoubleTensor)
A, B

(tensor([[0., 1.],
         [2., 3.]], dtype=torch.float64),
 tensor([[3., 4.],
         [5., 6.]], dtype=torch.float64))

In [30]:
A+B, A-B, A*B, A/B  # element-wise operations

(tensor([[3., 5.],
         [7., 9.]], dtype=torch.float64),
 tensor([[-3., -3.],
         [-3., -3.]], dtype=torch.float64),
 tensor([[ 0.,  4.],
         [10., 18.]], dtype=torch.float64),
 tensor([[0.0000, 0.2500],
         [0.4000, 0.5000]], dtype=torch.float64))

## Matrix Inverse

In [31]:
# A=BC => C=AB^(-1) => A-CB^(-1)=0
C = A @ torch.pinverse(B)  # for square matrices one can use just torch.inverse
A - C @ B

tensor([[-3.5527e-15,  0.0000e+00],
        [ 1.7764e-15,  5.3291e-15]], dtype=torch.float64)

## Size commands

In [32]:
A = torch.rand((3,2))

In [33]:
A.shape, A.size(), A.size(0), A.numel()

(torch.Size([3, 2]), torch.Size([3, 2]), 3, 6)

## Transpose and Hermitian

In [34]:
shape = (2,2)
A = torch.rand(shape) + 1j*torch.rand(shape)  # a random complex matrix
B = torch.rand(shape, dtype=torch.cfloat)       # a random complex matrix
                                                # (alternative)
A, B

(tensor([[0.5617+0.4708j, 0.2980+0.8029j],
         [0.0865+0.7965j, 0.0658+0.0790j]]),
 tensor([[0.2839+0.1560j, 0.0029+0.4112j],
         [0.7281+0.9481j, 0.5525+0.1749j]]))

In [35]:
H = A + A.conj().T  # Hermitian matrix
H

tensor([[1.1234+0.0000j, 0.3846+0.0064j],
        [0.3846-0.0064j, 0.1317+0.0000j]])

In [36]:
H.T, H.conj().T  # Hermitian transpose and conj transpose

(tensor([[1.1234+0.0000j, 0.3846-0.0064j],
         [0.3846+0.0064j, 0.1317+0.0000j]]),
 tensor([[1.1234+-0.0000j, 0.3846+0.0064j],
         [0.3846-0.0064j, 0.1317+-0.0000j]]))

In [37]:
H.conj().T  # conjugate transpose

tensor([[1.1234+-0.0000j, 0.3846+0.0064j],
        [0.3846-0.0064j, 0.1317+-0.0000j]])

In [38]:
np.conj(1j) # complex conjugation for a number

-1j

## Tensor slicing

In [39]:
shape = (3,5)
A = torch.rand(shape)
A

tensor([[0.0225, 0.7661, 0.7259, 0.2711, 0.7052],
        [0.1349, 0.6725, 0.6366, 0.1626, 0.4018],
        [0.4128, 0.6438, 0.0160, 0.8131, 0.9429]])

In [40]:
A[1,3]  # element on the 1st row 3rd column

tensor(0.1626)

In [41]:
A[:,2]  # elements on the 2nd column

tensor([0.7259, 0.6366, 0.0160])

In [42]:
A[:,2].reshape(-1,1)  # reshaping to a column vector 

tensor([[0.7259],
        [0.6366],
        [0.0160]])

In [43]:
A[0:3,1:3]  # first three rows of the second and third columns

tensor([[0.7661, 0.7259],
        [0.6725, 0.6366],
        [0.6438, 0.0160]])

In [44]:
A

tensor([[0.0225, 0.7661, 0.7259, 0.2711, 0.7052],
        [0.1349, 0.6725, 0.6366, 0.1626, 0.4018],
        [0.4128, 0.6438, 0.0160, 0.8131, 0.9429]])

In [45]:
A[-2:,-2:]  # the last two rows and last 2 colums

tensor([[0.1626, 0.4018],
        [0.8131, 0.9429]])

In [46]:
A[1,1] = 10  # reassigning the value
A

tensor([[ 0.0225,  0.7661,  0.7259,  0.2711,  0.7052],
        [ 0.1349, 10.0000,  0.6366,  0.1626,  0.4018],
        [ 0.4128,  0.6438,  0.0160,  0.8131,  0.9429]])

In [47]:
A[:2,:2] = 1.0  # reassigning multiple values
A

tensor([[1.0000, 1.0000, 0.7259, 0.2711, 0.7052],
        [1.0000, 1.0000, 0.6366, 0.1626, 0.4018],
        [0.4128, 0.6438, 0.0160, 0.8131, 0.9429]])

In [48]:
A[:,:] = 0  # reassigning all values
A

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

In [49]:
A = torch.rand(3,3, dtype=torch.cfloat)
A

tensor([[0.1814+0.5839j, 0.5615+0.9333j, 0.3104+0.7233j],
        [0.0416+0.7359j, 0.8710+0.2497j, 0.0947+0.3995j],
        [0.1085+0.6504j, 0.0276+0.7532j, 0.6545+0.1453j]])

In [50]:
A[0]  # linear index

tensor([0.1814+0.5839j, 0.5615+0.9333j, 0.3104+0.7233j])

In [51]:
shape = (3,2,3)
A = torch.rand(shape)
A

tensor([[[0.2124, 0.6912, 0.9369],
         [0.3774, 0.8324, 0.3773]],

        [[0.0043, 0.9230, 0.8115],
         [0.1299, 0.3255, 0.5572]],

        [[0.8675, 0.7110, 0.0338],
         [0.1243, 0.7369, 0.1481]]])

In [52]:
A[0]  # consistency for the multidimnsional array

tensor([[0.2124, 0.6912, 0.9369],
        [0.3774, 0.8324, 0.3773]])

## Reshape and permute matrices

In [53]:
A = torch.arange(6)
A

tensor([0, 1, 2, 3, 4, 5])

In [54]:
B = A.reshape(3,2)
B

tensor([[0, 1],
        [2, 3],
        [4, 5]])

In [55]:
B.permute(1,0)  # same as transpose for matrices

tensor([[0, 2, 4],
        [1, 3, 5]])

## Sum and product

In [56]:
A = torch.arange(1,10)
A, A.sum(), A.prod()

(tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]), tensor(45), tensor(362880))

In [57]:
A = A.reshape(3,3)
A, A.sum(0), A.sum(1)  # summing along a specific dimension

(tensor([[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]),
 tensor([12, 15, 18]),
 tensor([ 6, 15, 24]))

In [58]:
A, A.prod(0), A.prod(1)  # product of all the elements

(tensor([[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]),
 tensor([ 28,  80, 162]),
 tensor([  6, 120, 504]))

## Eigenvalues and eigenvectors

In [59]:
def is_hermitian(A):
    if A.shape[0]==A.shape[1]:
        return torch.allclose(A, A.conj().T)
    else:
        return False

In [60]:
shape = (2,2)
A = torch.rand(shape, dtype=torch.complex128)
A = (A + A.conj().T)  # Hermitian
A, is_hermitian(A)

(tensor([[0.8235+0.0000j, 0.9416-0.4705j],
         [0.9416+0.4705j, 0.0874+0.0000j]], dtype=torch.complex128),
 True)

In [61]:
D, V = torch.linalg.eigh(A)  # eigenvalues and eigenvectors
D, V

(tensor([-0.6596,  1.5705], dtype=torch.float64),
 tensor([[-0.5788+-0.0000j, -0.8155+0.0000j],
         [ 0.7295+0.3645j, -0.5177-0.2587j]], dtype=torch.complex128))

In [62]:
B = V @ D.to(V.dtype).diag() @ V.conj().T
torch.allclose(B, A)  # should be close to the original

True

In [63]:
B = V.conj().T @ V # left unitarity
torch.allclose(B, torch.eye(B.shape[0], dtype=torch.complex128))

True

## Singular value decomposition

In [64]:
shape = (3,2)
A = torch.rand(shape, dtype=torch.complex128)  # non-Hermitian 
A, is_hermitian(A)

(tensor([[0.5158+0.0465j, 0.6354+0.0024j],
         [0.7792+0.5281j, 0.4283+0.7780j],
         [0.6249+0.7593j, 0.9958+0.3477j]], dtype=torch.complex128),
 False)

In [65]:
U,S,Vh = torch.linalg.svd(A, full_matrices=False)  #  SVD
U, S, Vh

(tensor([[-0.3978-0.0379j, -0.1286-0.0198j],
         [-0.3877-0.4666j,  0.6495-0.3665j],
         [-0.5499-0.4123j, -0.4248+0.4965j]], dtype=torch.complex128),
 tensor([2.0452, 0.4933], dtype=torch.float64),
 tensor([[-0.6904+0.0000j, -0.7201+0.0688j],
         [ 0.7234+0.0000j, -0.6873+0.0657j]], dtype=torch.complex128))

In [66]:
torch.allclose(U @ S.to(U.dtype).diag() @ Vh, A)  # should be zero up to numerical precision

True

In [67]:
torch.allclose(U.conj().T @ U, torch.eye(U.shape[1], dtype=torch.complex128))  # identity

True

In [68]:
torch.allclose(Vh @ Vh.conj().T, torch.eye(Vh.shape[0], dtype=torch.complex128))  # identity

True

## QR decomposition

In [69]:
shape = (4,3)
A = torch.rand(shape, dtype=torch.complex128)

In [70]:
Q,R = torch.linalg.qr(A, mode='reduced')  # A=Q*R. Default mode is 'reduced'

In [71]:
Q, R

(tensor([[-0.5137-0.0296j,  0.3741-0.3864j,  0.6028+0.2553j],
         [-0.4997-0.4105j, -0.0464+0.1453j, -0.0801-0.5317j],
         [-0.0023-0.2212j, -0.6830-0.3944j, -0.0666+0.3969j],
         [-0.3612-0.3709j, -0.1283-0.2212j, -0.3412-0.0629j]],
        dtype=torch.complex128),
 tensor([[-1.9250+0.0000j, -1.2290+0.4509j, -1.6596-0.2966j],
         [ 0.0000+0.0000j, -1.2934+0.0000j, -0.6753-0.7547j],
         [ 0.0000+0.0000j,  0.0000+0.0000j, -0.4149+0.0000j]],
        dtype=torch.complex128))

In [72]:
torch.allclose(Q @ R, A)  # close to zero

True

In [73]:
Q.T @ Q

tensor([[ 0.2882+0.7097j, -0.2422+0.4133j, -0.2924+0.3127j],
        [-0.2422+0.4133j,  0.2502+0.2929j,  0.6371-0.2856j],
        [-0.2924+0.3127j,  0.6371-0.2856j, -0.0186+0.3830j]],
       dtype=torch.complex128)

In [74]:
torch.allclose(Q.conj().T @ Q, torch.eye(Q.shape[1], dtype=torch.complex128))

True

## Time counter

In [75]:
from timeit import default_timer as timer

In [76]:
start = timer()
A = torch.rand(size=(10,10)) @ torch.rand(size=(10,10))
timer()-start

0.0004894530000001396

## Masked selection

In [77]:
shape = (4,3)
A = torch.rand(shape)
A

tensor([[0.5417, 0.5293, 0.0345],
        [0.9850, 0.1409, 0.6582],
        [0.5572, 0.3236, 0.9068],
        [0.3798, 0.5328, 0.4019]])

In [78]:
A.masked_select(A<0.2)  # return only the values that are < 0.2

tensor([0.0345, 0.1409])

In [79]:
mask = A.le(0.2)  # alternatively, create a mask as select
mask, A.masked_select(mask)

(tensor([[False, False,  True],
         [False,  True, False],
         [False, False, False],
         [False, False, False]]),
 tensor([0.0345, 0.1409]))