# Basics of PyTorch

## Import the dependencies

In [10]:
import random
import numpy as np
import torch

## Set the seed for reproducibility

In [19]:
def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [20]:
seed_everything(31415)

## Elementary algebra

In [21]:
A=1; B=2.5
print(A+B, A-B, A*B, A/B)

3.5 -1.5 2.5 0.4


## Constants

In [80]:
1j, np.e, np.exp(1), np.pi, np.inf

(1j, 2.718281828459045, 2.718281828459045, 3.141592653589793, inf)

## Prohibited epressions

In [4]:
expressions = ['1/0','-1/0','0/0']
for e in expressions:
    try:
        eval(e)    
    except ZeroDivisionError:
        pass
    else:
        raise NotImplementedError('Should not be here!')

## Vectors and matrices in PyTorch

In [22]:
torch.tensor([1,2,3])  # row vector

tensor([1, 2, 3])

In [23]:
torch.tensor([[1],[2],[3]])  # column vector

tensor([[1],
        [2],
        [3]])

In [24]:
torch.tensor([1,2,3]).reshape(3,1)  # column vector from the row vector

tensor([[1],
        [2],
        [3]])

In [25]:
torch.tensor([1,2,3]).reshape(3,-1)  # exectly the same as above, 
                                     # but the last dimension is infered

tensor([[1],
        [2],
        [3]])

In [26]:
torch.tensor([[1,2],[3,4]])  # a matrix

tensor([[1, 2],
        [3, 4]])

In [27]:
torch.arange(1,3)  # a range, values on the interval [1,3)

tensor([1, 2])

In [28]:
torch.arange(1,11,step=3)  # a range, but with a step size 

tensor([ 1,  4,  7, 10])

In [29]:
torch.arange(1,-10,step=-3)  # a reverse range

tensor([ 1, -2, -5, -8])

In [30]:
torch.rand(size=(3,2))   # a random matix

tensor([[0.3296, 0.2113],
        [0.8446, 0.9235],
        [0.5002, 0.4071]])

In [31]:
torch.ones(size=(3, 2))   # a matrix of all ones

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])

In [32]:
torch.zeros(size=(3, 2))   # a matrix of all zeros

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [33]:
torch.eye(3)  # identity matrix

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [37]:
torch.eye(4,2)  # non-square "identity"

tensor([[1., 0.],
        [0., 1.],
        [0., 0.],
        [0., 0.]])

In [38]:
torch.rand(3)   # a random vector

tensor([0.5099, 0.4392, 0.7209])

In [39]:
torch.ones(3)   # a vector of ones

tensor([1., 1., 1.])

In [40]:
torch.zeros(3)  # a vector of zeros

tensor([0., 0., 0.])

In [48]:
shape = (3,2,3)  # shape of a tensor
A = torch.arange(np.prod(shape)).reshape(shape) # an order-3 tensor of shape (3,2,3)
A

tensor([[[ 0,  1,  2],
         [ 3,  4,  5]],

        [[ 6,  7,  8],
         [ 9, 10, 11]],

        [[12, 13, 14],
         [15, 16, 17]]])

In [43]:
A[0,:,:]  # a slice of the tensor above

tensor([[0, 1, 2],
        [3, 4, 5]])

In [44]:
A[:,0,:]

tensor([[ 0,  1,  2],
        [ 6,  7,  8],
        [12, 13, 14]])

In [45]:
A[:,:,0]

tensor([[ 0,  3],
        [ 6,  9],
        [12, 15]])

## Matrix operations

In [47]:
A = torch.rand((3,2)); B = torch.rand((2,4))

In [96]:
A @ B  # matrix multiplication

tensor([[0.2016, 0.2191, 0.0886, 0.0267],
        [0.2173, 0.2395, 0.0960, 0.0303],
        [0.5292, 0.4758, 0.2159, 0.0267]])

In [50]:
shape = (2,2);
A = torch.arange(0, np.prod(shape)).reshape(shape).type(torch.DoubleTensor)
B = (3+torch.arange(np.prod(shape))).reshape(shape).type(torch.DoubleTensor)
A, B

(tensor([[0., 1.],
         [2., 3.]], dtype=torch.float64),
 tensor([[3., 4.],
         [5., 6.]], dtype=torch.float64))

In [51]:
A+B, A-B, A*B, A/B  # element-wise operations

(tensor([[3., 5.],
         [7., 9.]], dtype=torch.float64),
 tensor([[-3., -3.],
         [-3., -3.]], dtype=torch.float64),
 tensor([[ 0.,  4.],
         [10., 18.]], dtype=torch.float64),
 tensor([[0.0000, 0.2500],
         [0.4000, 0.5000]], dtype=torch.float64))

## Matrix Inverse

In [52]:
# A=BC => C=AB^(-1) => A-CB^(-1)=0
C = A @ torch.pinverse(B)  # for square matrices one can use just torch.inverse
A - C @ B

tensor([[-3.5527e-15,  0.0000e+00],
        [ 1.7764e-15,  5.3291e-15]], dtype=torch.float64)

## Size commands

In [54]:
A = torch.rand((3,2))

In [53]:
A.shape, A.size(), A.size(0), A.numel()

(torch.Size([2, 2]), torch.Size([2, 2]), 2, 4)

## Transpose and Hermitian

In [55]:
shape = (2,2)
A = torch.rand(shape) + 1j*torch.rand(shape)  # a random complex matrix
B = torch.rand(shape, dtype=torch.cfloat)       # a random complex matrix
                                                # (alternative)
A, B

(tensor([[0.5525+0.7259j, 0.1749+0.2711j],
         [0.0225+0.7052j, 0.7661+0.1349j]]),
 tensor([[0.6725+0.6366j, 0.1626+0.4018j],
         [0.4128+0.6438j, 0.0160+0.8131j]]))

In [56]:
H = A + A.conj().T  # Hermitian matrix
H

tensor([[1.1050+0.0000j, 0.1974-0.4340j],
        [0.1974+0.4340j, 1.5321+0.0000j]])

In [57]:
H.T, H.conj().T  # Hermitian transpose and conj transpose

(tensor([[1.1050+0.0000j, 0.1974+0.4340j],
         [0.1974-0.4340j, 1.5321+0.0000j]]),
 tensor([[1.1050+-0.0000j, 0.1974-0.4340j],
         [0.1974+0.4340j, 1.5321+-0.0000j]]))

In [58]:
H.conj().T  # conjugate transpose

tensor([[1.1050+-0.0000j, 0.1974-0.4340j],
        [0.1974+0.4340j, 1.5321+-0.0000j]])

In [59]:
np.conj(1j) # complex conjugation for a number

-1j

## Tensor slicing

In [62]:
shape = (3,5)
A = torch.rand(shape)
A

tensor([[0.9429, 0.1814, 0.5839, 0.5615, 0.9333],
        [0.3104, 0.7233, 0.0416, 0.7359, 0.8710],
        [0.2497, 0.0947, 0.3995, 0.1085, 0.6504]])

In [63]:
A[1,3]  # element on the 1st row 3rd column

tensor(0.7359)

In [64]:
A[:,2]  # elements on the 2nd column

tensor([0.5839, 0.0416, 0.3995])

In [65]:
A[:,2].reshape(-1,1)  # reshaping to a column vector 

tensor([[0.5839],
        [0.0416],
        [0.3995]])

In [66]:
A[0:3,1:3]  # first three rows of the second and third columns

tensor([[0.1814, 0.5839],
        [0.7233, 0.0416],
        [0.0947, 0.3995]])

In [67]:
A

tensor([[0.9429, 0.1814, 0.5839, 0.5615, 0.9333],
        [0.3104, 0.7233, 0.0416, 0.7359, 0.8710],
        [0.2497, 0.0947, 0.3995, 0.1085, 0.6504]])

In [68]:
A[-2:,-2:]  # the last two rows and last 2 colums

tensor([[0.7359, 0.8710],
        [0.1085, 0.6504]])

In [69]:
A[1,1] = 10  # reassigning the value
A

tensor([[ 0.9429,  0.1814,  0.5839,  0.5615,  0.9333],
        [ 0.3104, 10.0000,  0.0416,  0.7359,  0.8710],
        [ 0.2497,  0.0947,  0.3995,  0.1085,  0.6504]])

In [70]:
A[:2,:2] = 1.0  # reassigning multiple values
A

tensor([[1.0000, 1.0000, 0.5839, 0.5615, 0.9333],
        [1.0000, 1.0000, 0.0416, 0.7359, 0.8710],
        [0.2497, 0.0947, 0.3995, 0.1085, 0.6504]])

In [71]:
A[:,:] = 0  # reassigning all values
A

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

In [72]:
A = torch.rand(3,3, dtype=torch.cfloat)
A

tensor([[0.0276+0.7532j, 0.6545+0.1453j, 0.2124+0.6912j],
        [0.9369+0.3774j, 0.8324+0.3773j, 0.0043+0.9230j],
        [0.8115+0.1299j, 0.3255+0.5572j, 0.8675+0.7110j]])

In [73]:
A[0]  # linear index

tensor([0.0276+0.7532j, 0.6545+0.1453j, 0.2124+0.6912j])

In [74]:
shape = (3,2,3)
A = torch.rand(shape)
A

tensor([[[0.0338, 0.1243, 0.7369],
         [0.1481, 0.0515, 0.7567]],

        [[0.1946, 0.1889, 0.3952],
         [0.2636, 0.3091, 0.3420]],

        [[0.3475, 0.6656, 0.4929],
         [0.6536, 0.5055, 0.6471]]])

In [75]:
A[0]  # consistency for the multidimnsional array

tensor([[0.0338, 0.1243, 0.7369],
        [0.1481, 0.0515, 0.7567]])

## Reshape and permute matrices

In [76]:
A = torch.arange(6)
A

tensor([0, 1, 2, 3, 4, 5])

In [77]:
B = A.reshape(3,2)
B

tensor([[0, 1],
        [2, 3],
        [4, 5]])

In [78]:
B.permute(1,0)  # same as transpose for matrices

tensor([[0, 2, 4],
        [1, 3, 5]])

## Sum and product

In [79]:
A = torch.arange(1,10)
A, A.sum(), A.prod()

(tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]), tensor(45), tensor(362880))

In [80]:
A = A.reshape(3,3)
A, A.sum(0), A.sum(1)  # summing along a specific dimension

(tensor([[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]),
 tensor([12, 15, 18]),
 tensor([ 6, 15, 24]))

In [81]:
A, A.prod(0), A.prod(1)  # product of all the elements

(tensor([[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]),
 tensor([ 28,  80, 162]),
 tensor([  6, 120, 504]))

## Eigenvalues and eigenvectors

In [83]:
def is_hermitian(A):
    if A.shape[0]==A.shape[1]:
        return torch.allclose(A, A.conj().T)
    else:
        return False

In [85]:
shape = (2,2)
A = torch.rand(shape, dtype=torch.complex128)
A = (A + A.conj().T)  # Hermitian
A, is_hermitian(A)

(tensor([[1.5560+0.0000j, 1.1070+0.0070j],
         [1.1070-0.0070j, 0.1139+0.0000j]], dtype=torch.complex128),
 True)

In [86]:
D, V = torch.linalg.eigh(A)  # eigenvalues and eigenvectors
D, V

(tensor([-0.4862,  2.1561], dtype=torch.float64),
 tensor([[-0.4766+-0.0000j, -0.8791+0.0000j],
         [ 0.8791-0.0056j, -0.4766+0.0030j]], dtype=torch.complex128))

In [87]:
B = V @ D.to(V.dtype).diag() @ V.conj().T
torch.allclose(B, A)  # should be close to the original

True

In [61]:
B = V.conj().T @ V # left unitarity
torch.allclose(B, torch.eye(B.shape[0], dtype=torch.complex128))

True

## Singular value decomposition

In [88]:
shape = (3,2)
A = torch.rand(shape, dtype=torch.complex128)  # non-Hermitian 
A, is_hermitian(A)

(tensor([[0.3045+0.0493j, 0.0741+0.9620j],
         [0.7902+0.8593j, 0.0912+0.8819j],
         [0.9870+0.0044j, 0.4259+0.9860j]], dtype=torch.complex128),
 False)

In [99]:
U,S,Vh = torch.linalg.svd(A, full_matrices=False)  #  SVD
U, S, Vh

(tensor([[-0.5112-2.7459e-01j,  0.3585-2.3117e-01j, -0.1850+6.4428e-01j],
         [-0.4329-3.0769e-01j,  0.4065+2.4439e-01j,  0.4923-4.9339e-01j],
         [-0.3337-1.8159e-04j, -0.4809+1.5086e-02j, -0.0708-7.7650e-02j],
         [-0.4361-2.8215e-01j, -0.5816+1.5286e-01j, -0.2053-1.0804e-01j]],
        dtype=torch.complex128),
 tensor([3.0349, 0.5047, 0.3210], dtype=torch.float64),
 tensor([[-0.3979+0.0000j, -0.6488-0.0208j, -0.6214-0.1845j],
         [ 0.7160+0.0000j,  0.2222-0.0122j, -0.6396-0.1698j],
         [-0.5736+0.0000j,  0.7274-0.0008j, -0.3672-0.0839j]],
        dtype=torch.complex128))

In [90]:
torch.allclose(U @ S.to(U.dtype).diag() @ Vh, A)  # should be zero up to numerical precision

True

In [91]:
torch.allclose(U.conj().T @ U, torch.eye(U.shape[1], dtype=torch.complex128))  # identity

True

In [92]:
torch.allclose(Vh @ Vh.conj().T, torch.eye(Vh.shape[0], dtype=torch.complex128))  # identity

True

## QR decomposition

In [93]:
shape = (4,3)
A = torch.rand(shape, dtype=torch.complex128)

In [94]:
Q,R = torch.linalg.qr(A, mode='reduced')  # A=Q*R. Default mode is 'reduced'

In [100]:
Q, R

(tensor([[-0.6131-0.1016j,  0.2226-0.6874j,  0.0362+0.2420j],
         [-0.4545-0.4323j, -0.2522+0.4236j,  0.5952-0.0444j],
         [-0.1902-0.0157j, -0.3406+0.0472j, -0.4195-0.1525j],
         [-0.2781-0.3264j, -0.3348+0.0676j, -0.6021-0.1486j]],
        dtype=torch.complex128),
 tensor([[-1.2739+0.0000j, -1.8647-0.0582j, -1.7133-0.5105j],
         [ 0.0000+0.0000j, -0.6841+0.0000j, -0.6744-0.1659j],
         [ 0.0000+0.0000j,  0.0000+0.0000j, -0.5642+0.0000j]],
        dtype=torch.complex128))

In [101]:
torch.allclose(Q @ R, A)  # close to zero

True

In [102]:
Q.T @ Q

tensor([[ 0.3919+0.7051j,  0.2722+0.4021j, -0.0910-0.1157j],
        [ 0.2722+0.4021j, -0.3174-0.5971j,  0.4048+0.3335j],
        [-0.0910-0.1157j,  0.4048+0.3335j,  0.7882+0.2716j]],
       dtype=torch.complex128)

In [103]:
torch.allclose(Q.conj().T @ Q, torch.eye(Q.shape[1], dtype=torch.complex128))

True

## Time counter

In [104]:
from timeit import default_timer as timer

In [107]:
start = timer()
A = torch.rand(size=(10,10)) @ torch.rand(size=(10,10))
timer()-start

0.0005266590000019278

## Masked selection

In [108]:
shape = (4,3)
A = torch.rand(shape)
A

tensor([[0.7499, 0.3097, 0.6786],
        [0.3739, 0.8280, 0.5729],
        [0.7549, 0.3841, 0.7998],
        [0.0834, 0.6908, 0.3372]])

In [109]:
A.masked_select(A<0.2)  # return only the values that are < 0.2

tensor([0.0834])

In [110]:
mask = A.le(0.2)  # alternatively, create a mask as select
mask, A.masked_select(mask)

(tensor([[False, False, False],
         [False, False, False],
         [False, False, False],
         [ True, False, False]]),
 tensor([0.0834]))