# Numpy tutorial

In [119]:
!pip install numpy



In [120]:
!pip show numpy|grep Version

Version: 1.19.5


In [121]:
import numpy as np

## Create new array and basic operations

Link all numpy types: https://numpy.org/devdocs/user/basics.types.html

In [122]:
a = np.array([[1,2,3,4,5], [6,7,8,9,10]], dtype=np.int32)
a1 = np.array([[1,2,3,4,5], [6,7,8,9,10]], dtype='int32')
a

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]], dtype=int32)

In [123]:
a.shape

(2, 5)

In [124]:
a.ndim

2

In [125]:
print(type(a.dtype))
print(a.dtype)

<class 'numpy.dtype'>
int32


In [126]:
a.itemsize

4

In [127]:
# get total size of array
a.nbytes

40

In [128]:
# get number of elements array
a.size

10

In [129]:
# Return a new array of given shape and type, without initializing entries.
c = np.empty((3, 2, 4), dtype=np.float64)
c

array([[[1.9948363e-316, 0.0000000e+000, 0.0000000e+000, 0.0000000e+000],
        [0.0000000e+000, 0.0000000e+000, 0.0000000e+000, 0.0000000e+000]],

       [[0.0000000e+000, 0.0000000e+000, 0.0000000e+000, 0.0000000e+000],
        [0.0000000e+000, 0.0000000e+000, 0.0000000e+000, 0.0000000e+000]],

       [[0.0000000e+000, 0.0000000e+000, 0.0000000e+000, 0.0000000e+000],
        [0.0000000e+000, 0.0000000e+000, 0.0000000e+000, 0.0000000e+000]]])

In [130]:
# Return a new array with the same shape and type as a given array. Array of uninitialized (arbitrary) data with the same shape and type as prototype.
c1 = np.empty_like(a)
c1

array([[         0, 1076428800,          0, 1076690944,          0],
       [1076920320,          0, 1077051392,          0, 1077182464]],
      dtype=int32)

In [131]:
# Return a new array of given shape and type, filled with zeros.
d = np.zeros((2, 3, 4), dtype=np.int8)
d

array([[[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]]], dtype=int8)

In [132]:
d1 = np.zeros_like(a)
d1

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]], dtype=int32)

In [133]:
# Return a new array of given shape and type, filled with ones.
e = np.ones((2, 2, 2, 5), dtype=np.int64)
e

array([[[[1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1]],

        [[1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1]]],


       [[[1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1]],

        [[1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1]]]])

In [134]:
e1 = np.ones_like(a)
e1

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]], dtype=int32)

In [135]:
# Return a new array of given shape and type, filled with fill_value.
f = np.full((2,2), 42, dtype=np.float32)
f

array([[42., 42.],
       [42., 42.]], dtype=float32)

In [136]:
f1 = np.full_like(a, 12.8)
f1

array([[12, 12, 12, 12, 12],
       [12, 12, 12, 12, 12]], dtype=int32)

In [137]:
# Return the identity array. The identity array is a square array with ones on the main diagonal.
g = np.identity(10, dtype=np.float32)
g

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)

In [138]:
# Return evenly spaced numbers over a specified interval. Returns num evenly spaced samples, calculated over the interval [start, stop]. The endpoint of the interval can optionally be excluded.
h = np.linspace(start=2, stop=10, num=9, endpoint=True, dtype=np.int16)
h

array([ 2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=int16)

In [139]:
# Return evenly spaced values within a given interval. Values are generated within the half-open interval [start, stop) 
i = np.arange(start=12.5, stop=22, step=2, dtype=np.float64)
i

array([12.5, 14.5, 16.5, 18.5, 20.5])

## Accessing/Changing specific elements, rows, columns, etc

In [140]:
arr = np.array([[[1,2,3,4,5], [8,9,10,11,12]], [[4,5,6,7,8], [1,2,3,4,5]]], dtype=np.float32)
print(arr)
print(arr.shape)

[[[ 1.  2.  3.  4.  5.]
  [ 8.  9. 10. 11. 12.]]

 [[ 4.  5.  6.  7.  8.]
  [ 1.  2.  3.  4.  5.]]]
(2, 2, 5)


In [141]:
# get element in given position
arr[0, 1, 3]

11.0

In [142]:
# get specific row
arr[1, 1]
arr[1,1, :]

array([1., 2., 3., 4., 5.], dtype=float32)

In [143]:
# slicing
arr[:, :, 4]

array([[ 5., 12.],
       [ 8.,  5.]], dtype=float32)

In [144]:
# [startindex:endindex:stepsize]
arr[:, :, 1::2]

array([[[ 2.,  4.],
        [ 9., 11.]],

       [[ 5.,  7.],
        [ 2.,  4.]]], dtype=float32)

In [145]:
arr[:, 1]
arr[:, 1, :]

array([[ 8.,  9., 10., 11., 12.],
       [ 1.,  2.,  3.,  4.,  5.]], dtype=float32)

In [146]:
# replace/modify an element
arr[0, 1, :] = 5.5
arr

array([[[1. , 2. , 3. , 4. , 5. ],
        [5.5, 5.5, 5.5, 5.5, 5.5]],

       [[4. , 5. , 6. , 7. , 8. ],
        [1. , 2. , 3. , 4. , 5. ]]], dtype=float32)

In [147]:
# replace 2 elements
arr[:, 0, 0] = [75, 85]
arr

array([[[75. ,  2. ,  3. ,  4. ,  5. ],
        [ 5.5,  5.5,  5.5,  5.5,  5.5]],

       [[85. ,  5. ,  6. ,  7. ,  8. ],
        [ 1. ,  2. ,  3. ,  4. ,  5. ]]], dtype=float32)

In [148]:
# replace a row
arr[0, 0, :] = [11, 11, 11, 11, 11]
arr

array([[[11. , 11. , 11. , 11. , 11. ],
        [ 5.5,  5.5,  5.5,  5.5,  5.5]],

       [[85. ,  5. ,  6. ,  7. ,  8. ],
        [ 1. ,  2. ,  3. ,  4. ,  5. ]]], dtype=float32)

In [149]:
# replace 2 rows
arr[:,1,:] = [[9,9,9,9,9],[4,4,4,4,4]]
arr

array([[[11., 11., 11., 11., 11.],
        [ 9.,  9.,  9.,  9.,  9.]],

       [[85.,  5.,  6.,  7.,  8.],
        [ 4.,  4.,  4.,  4.,  4.]]], dtype=float32)

## Where

In [150]:
w1 = np.random.randn(3,3)
w2 = np.random.randn(3,3)
print(w1)
print(w2)

[[-0.88351501 -0.26117721 -0.80661494]
 [ 0.53560437  0.64550501  1.13456921]
 [-0.09412003  1.53307569  0.50595754]]
[[-1.76166556  1.58996059  0.70586326]
 [ 0.02735162 -0.00363765  1.82395612]
 [ 0.30866773 -0.47334936  0.03078749]]


In [151]:
c = w1 < w2
c

array([[False,  True,  True],
       [False, False,  True],
       [ True, False, False]])

In [152]:
# Return elements chosen from x or y depending on condition.
np.where(c, w1, w2)

array([[-1.76166556, -0.26117721, -0.80661494],
       [ 0.02735162, -0.00363765,  1.13456921],
       [-0.09412003, -0.47334936,  0.03078749]])

In [153]:
np.where(c, w1, np.nan)

array([[        nan, -0.26117721, -0.80661494],
       [        nan,         nan,  1.13456921],
       [-0.09412003,         nan,         nan]])

## Initialize random arrays

In [154]:
# Create an array of the given shape and populate it with random samples from a uniform distribution over [0, 1).
rd = np.random.rand(1,2,2,4)
rd

array([[[[0.85972021, 0.14503769, 0.59207671, 0.85893986],
         [0.03927955, 0.45276211, 0.37877428, 0.48265808]],

        [[0.80838425, 0.65241938, 0.58784771, 0.93290751],
         [0.40309753, 0.08785603, 0.81650875, 0.32993529]]]])

In [155]:
# Return random floats in the half-open interval [0.0, 1.0).
rd = np.random.random_sample(size=(1,2,2,4))
rd

array([[[[0.22484716, 0.40782271, 0.49268266, 0.79234478],
         [0.41608493, 0.77474181, 0.41685221, 0.49992129]],

        [[0.84033205, 0.71951319, 0.23043266, 0.13336904],
         [0.92242646, 0.81223532, 0.7611376 , 0.7109278 ]]]])

In [156]:
# Return a sample (or samples) from the “standard normal” distribution.
rd = np.random.randn(1,2,2,4)
rd

array([[[[-0.97702332,  0.2650154 ,  0.37316737,  0.22926563],
         [ 0.31621186, -0.76473889,  0.94858736,  0.76903978]],

        [[ 1.27882872, -0.35062435,  0.22364077,  0.82096681],
         [-1.34667634,  0.99956364,  0.26419185,  0.28031868]]]])

In [157]:
# Return random integers from low (inclusive) to high (exclusive).
rd = np.random.randint(low=12, high=58, size=(1,2,2,4))
rd

array([[[[44, 47, 49, 52],
         [45, 50, 33, 15]],

        [[51, 29, 16, 24],
         [40, 57, 23, 37]]]])

In [158]:
# Draw samples from a von Mises distribution.
# All kinds of distributions are available
rd = np.random.vonmises(mu=15, kappa=1, size=(1,2,2,4))
rd

array([[[[-2.75405553, -1.13960914, -2.12831564, -1.22896764],
         [ 0.47701662,  2.97336337,  2.44556508, -2.83355057]],

        [[ 1.70827687, -3.04319494,  2.46066268,  1.97445572],
         [ 0.00505973,  2.04869289, -1.62183931,  2.97648506]]]])

In [159]:
print(*list(filter(lambda x: '_' not in x and x.islower(), dir(np.random))), sep='\n')

beta
binomial
bytes
chisquare
choice
dirichlet
exponential
f
gamma
geometric
gumbel
hypergeometric
laplace
logistic
lognormal
logseries
mtrand
multinomial
normal
pareto
permutation
poisson
power
rand
randint
randn
random
ranf
rayleigh
sample
seed
shuffle
test
triangular
uniform
vonmises
wald
weibull
zipf


## Repeat/Tile an array

In [160]:
rep = np.repeat(a=10, repeats=5, axis=None)
rep

array([10, 10, 10, 10, 10])

In [161]:
rep = np.array([[1,2,3], [4,5,6]])
rep = np.repeat(rep, 3, axis=0)
rep

array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3],
       [4, 5, 6],
       [4, 5, 6],
       [4, 5, 6]])

In [162]:
rep = np.array([[1,2,3], [4,5,6]])
rep = np.repeat(rep, 3, axis=1)
rep

array([[1, 1, 1, 2, 2, 2, 3, 3, 3],
       [4, 4, 4, 5, 5, 5, 6, 6, 6]])

In [163]:
# Construct an array by repeating A the number of times given by reps.
tl = np.array([[1,2,3], [4,5,6]])
tl = np.tile(A=tl, reps=(1,3))
tl

array([[1, 2, 3, 1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6, 4, 5, 6]])

In [164]:
tl = np.array([[1,2,3], [4,5,6]])
tl = np.tile(A=tl, reps=(3,1))
tl

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

## Copy array

In [165]:
a = np.array([[1,2,3], [4,5,6]])
nocp = a
cp = a.copy()
print(nocp is a)
print(cp is a)

True
False


## Mathematics and linear algebra
Link to list of functions: https://numpy.org/doc/stable/reference/routines.math.html

Most of the functions are element-wise and NOT operate in-place by default.

out (function argument): A location into which the result is stored. If provided, it must have a shape that the inputs broadcast to. If not provided or None, a freshly-allocated array is returned. A tuple (possible only as a keyword argument) must have length equal to the number of outputs.

In [166]:
m = np.array([1,2,3], dtype=np.float64)
n = np.array([10,20,30], dtype=np.float64)
print(m)
print(n)

[1. 2. 3.]
[10. 20. 30.]


In [167]:
m1 = m + 2
print(m)
print(m1)
print(m is m1)

[1. 2. 3.]
[3. 4. 5.]
False


In [168]:
m + n

array([11., 22., 33.])

In [169]:
m - 10

array([-9., -8., -7.])

In [170]:
m - n

array([ -9., -18., -27.])

In [171]:
m * 4

array([ 4.,  8., 12.])

In [172]:
m * n

array([10., 40., 90.])

In [173]:
m / 3

array([0.33333333, 0.66666667, 1.        ])

In [174]:
m / n

array([0.1, 0.1, 0.1])

In [175]:
m ** 3

array([ 1.,  8., 27.])

In [176]:
m ** n

array([1.00000000e+00, 1.04857600e+06, 2.05891132e+14])

In [177]:
np.exp(m)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [178]:
m1 = np.cos(m)
print(m)
print(m1)
print(m is m1)

[1. 2. 3.]
[ 0.54030231 -0.41614684 -0.9899925 ]
False


In [179]:
print(m)
m1 = np.sinh(m, out=m)
print(m)
print(m1)
print(m1 is m)

[1. 2. 3.]
[ 1.17520119  3.62686041 10.01787493]
[ 1.17520119  3.62686041 10.01787493]
True


In [180]:
np.mod([[10,20,30], [40,50,60]], [[5,1,4], [8,7.5,3.3]])

array([[0. , 0. , 2. ],
       [0. , 5. , 0.6]])

In [181]:
# Returns the discrete, linear convolution of two one-dimensional sequences.
np.convolve(np.linspace(0, 100, 101), [1, 2, 1])

array([  0.,   1.,   4.,   8.,  12.,  16.,  20.,  24.,  28.,  32.,  36.,
        40.,  44.,  48.,  52.,  56.,  60.,  64.,  68.,  72.,  76.,  80.,
        84.,  88.,  92.,  96., 100., 104., 108., 112., 116., 120., 124.,
       128., 132., 136., 140., 144., 148., 152., 156., 160., 164., 168.,
       172., 176., 180., 184., 188., 192., 196., 200., 204., 208., 212.,
       216., 220., 224., 228., 232., 236., 240., 244., 248., 252., 256.,
       260., 264., 268., 272., 276., 280., 284., 288., 292., 296., 300.,
       304., 308., 312., 316., 320., 324., 328., 332., 336., 340., 344.,
       348., 352., 356., 360., 364., 368., 372., 376., 380., 384., 388.,
       392., 396., 299., 100.])

### Linear algebra

In [182]:
mat1 = np.random.randint(1, 10, (5,2))
mat2 = np.random.randint(1, 10, (2,3))
np.matmul(mat1, mat2)

array([[26, 16, 35],
       [54, 42, 61],
       [28, 20, 34],
       [58, 47, 63],
       [18, 12, 23]])

In [183]:
# Dot product of two array. Different behaviours according to the dimensions of arrays
mat1 = np.random.randint(1, 10, (10))
mat2 = np.random.randint(1, 10, (10))
np.dot(mat1, mat2)

318

In [184]:
# Return the dot product of two vectors.
np.vdot(mat1, mat2)

318

In [185]:
# Inner product of two arrays.
np.inner(mat1, mat2)

318

In [186]:
# Compute the outer product of two vectors.
np.outer(mat1, mat2)

array([[18, 48, 24, 54, 36, 12, 30, 30, 18, 24],
       [27, 72, 36, 81, 54, 18, 45, 45, 27, 36],
       [12, 32, 16, 36, 24,  8, 20, 20, 12, 16],
       [27, 72, 36, 81, 54, 18, 45, 45, 27, 36],
       [27, 72, 36, 81, 54, 18, 45, 45, 27, 36],
       [ 9, 24, 12, 27, 18,  6, 15, 15,  9, 12],
       [ 3,  8,  4,  9,  6,  2,  5,  5,  3,  4],
       [21, 56, 28, 63, 42, 14, 35, 35, 21, 28],
       [15, 40, 20, 45, 30, 10, 25, 25, 15, 20],
       [12, 32, 16, 36, 24,  8, 20, 20, 12, 16]])

In [187]:
# Compute the determinant of an array.
mat1 = np.random.randint(1, 10, (5, 5))
np.linalg.det(mat1)

-2258.0000000000014

In [188]:
# Compute the eigenvalues and right eigenvectors of a square array.
v, w = np.linalg.eig(mat1)
print(v.shape)
print(w.shape)

(5,)
(5, 5)


In [189]:
np.linalg.inv(mat1)

array([[ 0.01594331,  0.10628875, -0.15057573,  0.04605846, -0.03985828],
       [-0.15899026,  0.10673162,  0.0571302 , -0.0704163 ,  0.14747564],
       [ 0.22232064,  0.14880425, -0.21080602, -0.13551816, -0.05580159],
       [ 0.13906112, -0.73959256,  0.13108946,  0.51284322,  0.15234721],
       [ 0.04428698, -0.0380868 ,  0.13728964,  0.01682905, -0.11071745]])

In [190]:
# Singular Value Decomposition.
u, s, vh = np.linalg.svd(mat1)
print(u.shape)
print(s.shape)
print(vh.shape)

(5, 5)
(5,)
(5, 5)


In [191]:
np.linalg.matrix_rank(mat1)

5

In [192]:
print(*list(filter(lambda x: '_' not in x and x.islower(), dir(np.linalg))), sep='\n')

cholesky
cond
det
eig
eigh
eigvals
eigvalsh
inv
linalg
lstsq
norm
pinv
qr
slogdet
solve
svd
tensorinv
tensorsolve
test


### Min, max, sum, etc

In [193]:
s = np.array([[[1,2,3],[4,5,6]], [[11,22,33],[44,55,66]]])
s

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[11, 22, 33],
        [44, 55, 66]]])

In [194]:
print(np.min(s))
print(np.min(s, axis=0))
print(np.min(s, axis=1))
print(np.min(s, axis=2))

1
[[1 2 3]
 [4 5 6]]
[[ 1  2  3]
 [11 22 33]]
[[ 1  4]
 [11 44]]


In [195]:
np.amin(s, axis=0)

array([[1, 2, 3],
       [4, 5, 6]])

In [196]:
# Return the maximum of an array or maximum along an axis. Returns maximum of a. If axis is None, the result is a scalar value. If axis is given, the result is an array of dimension a.ndim - 1.
print(np.max(s))
print(np.max(s, axis=0))
print(np.max(s, axis=1))
print(np.max(s, axis=2))

66
[[11 22 33]
 [44 55 66]]
[[ 4  5  6]
 [44 55 66]]
[[ 3  6]
 [33 66]]


In [197]:
np.amax(s, axis=0)

array([[11, 22, 33],
       [44, 55, 66]])

In [198]:
print(np.sum(s))
print(np.sum(s, axis=0))
print(np.sum(s, axis=1))
print(np.sum(s, axis=2))

252
[[12 24 36]
 [48 60 72]]
[[ 5  7  9]
 [55 77 99]]
[[  6  15]
 [ 66 165]]


In [199]:
mat1 = np.random.randint(1, 10, (2,3,1))
mat2 = np.random.randint(1, 10, (2,3,1))
print(mat1)
print(mat2)

[[[8]
  [3]
  [5]]

 [[8]
  [5]
  [3]]]
[[[4]
  [7]
  [4]]

 [[8]
  [1]
  [4]]]


In [200]:
# Element-wise maximum of array elements. The maximum is equivalent to np.where(x1 >= x2, x1, x2) when neither x1 nor x2 are nans, but it is faster and does proper broadcasting.
np.maximum(mat1, mat2)

array([[[8],
        [7],
        [5]],

       [[8],
        [5],
        [4]]])

In [201]:
np.minimum(mat1, mat2)

array([[[4],
        [3],
        [4]],

       [[8],
        [1],
        [3]]])

## Horizontal and vertical stacking, concatenate

In [202]:
# Stack arrays in sequence vertically (row wise). This is equivalent to concatenation along the first axis after 1-D arrays of shape (N,) have been reshaped to (1,N). Rebuilds arrays divided by vsplit.
v1 = np.array([[1,2,3,4], [1,2,3,4]])
v2 = np.array([5,6,7,8])
np.vstack([v1, v2])

array([[1, 2, 3, 4],
       [1, 2, 3, 4],
       [5, 6, 7, 8]])

In [203]:
# Stack arrays in sequence horizontally (column wise). This is equivalent to concatenation along the second axis, except for 1-D arrays where it concatenates along the first axis. Rebuilds arrays divided by hsplit.
v1 = np.array([[1,2,3,4], [1,2,3,4]])
v2 = np.array([[5,6,7,8], [500,600,700,800]])
np.hstack([v1, v2])

array([[  1,   2,   3,   4,   5,   6,   7,   8],
       [  1,   2,   3,   4, 500, 600, 700, 800]])

In [204]:
# Join a sequence of arrays along an existing axis.
np.concatenate([v1, v2], axis=0, out=None)

array([[  1,   2,   3,   4],
       [  1,   2,   3,   4],
       [  5,   6,   7,   8],
       [500, 600, 700, 800]])

In [205]:
np.concatenate([v1, v2], axis=1, out=None)

array([[  1,   2,   3,   4,   5,   6,   7,   8],
       [  1,   2,   3,   4, 500, 600, 700, 800]])