# Numpy

In [1]:
import numpy as np

## Create numpy arrays

In [2]:
np.array([1, 2]).shape

(2,)

In [3]:
np.array([ [1, 2], [3, 4] ]).shape

(2, 2)

In [4]:
np.array([ [1, 2], [3, 4], [5, 6] ]).shape

(3, 2)

In [5]:
np.zeros((2, 2))

array([[0., 0.],
       [0., 0.]])

In [6]:
np.ones((2, 2))

array([[1., 1.],
       [1., 1.]])

In [7]:
np.full((2, 2), 5)

array([[5, 5],
       [5, 5]])

In [8]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

## Generate data

In [9]:
np.random.random()

0.16322250497456814

In [10]:
np.random.randint(0, 10)

2

In [11]:
lower_bound_value = 0
upper_bound_value = 100
num_rows = 1000
num_cols = 50
A = np.random.randint(lower_bound_value, upper_bound_value, size=(num_rows, num_cols))
A

array([[ 4, 82, 31, ..., 35, 97, 62],
       [36, 18, 18, ..., 98, 35, 84],
       [89, 88, 18, ..., 44, 60,  6],
       ...,
       [98, 12, 36, ..., 31, 19, 26],
       [35, 29, 26, ..., 96, 59, 80],
       [54,  6, 70, ..., 74, 82, 83]])

In [12]:
A.shape

(1000, 50)

In [13]:
A.min()

0

In [14]:
A.max()

99

In [15]:
v = np.random.uniform(size=4)
v

array([0.28575524, 0.27710292, 0.96089712, 0.74744013])

In [16]:
np.random.choice(v)

0.2857552374232847

In [17]:
np.random.choice(10, size=(3, 3))

array([[2, 6, 8],
       [3, 6, 7],
       [8, 9, 1]])

In [18]:
np.random.normal(size=4)

array([-1.04695407,  2.03204155,  0.32551179,  1.07881577])

In [19]:
# gaussian (normal) distribution, mean = 0 and variance = 1
np.random.randn(2, 3)

array([[-0.18387128,  1.50131601, -0.3037708 ],
       [ 0.40540408,  0.70965781, -0.12583727]])

## Numpy operations

In [20]:
array = np.array([ [1, 2, 3], [4, 5, 6], [7, 8, 9] ])
array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [21]:
array[:]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [22]:
array[0]

array([1, 2, 3])

In [23]:
array[2]

array([7, 8, 9])

In [24]:
array[:, 0]

array([1, 4, 7])

In [25]:
array[:, 2:]

array([[3],
       [6],
       [9]])

In [26]:
array[1, 1]

5

In [27]:
array[-1, -1]

9

In [28]:
array_2 = np.concatenate([array, np.array([ [10, 11, 12] ])])
array_2

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [29]:
array_2[0, 0] = 0
array_2

array([[ 0,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

## Vectors, Matrices arithmetic and linear systems

In [30]:
array_1 = np.array([1, 2])
array_1

array([1, 2])

In [31]:
array_2 = np.array([3, 4])
array_2

array([3, 4])

In [32]:
array_1 + array_2

array([4, 6])

In [33]:
array_1 - array_2

array([-2, -2])

In [34]:
array_1 * 2

array([2, 4])

In [35]:
array_1 ** 3

array([1, 8])

In [36]:
array_1 * array_2

array([3, 8])

In [37]:
np.dot(array_1, array_2)

11

In [38]:
mat_1 = np.array([ [1, 2, 3], [4, 5, 6] ])
mat_1

array([[1, 2, 3],
       [4, 5, 6]])

In [39]:
mat_1.T

array([[1, 4],
       [2, 5],
       [3, 6]])

In [40]:
mat_2 = np.array([ [1, 2, 3], [4, 5, 6] ])
mat_2

array([[1, 2, 3],
       [4, 5, 6]])

In [41]:
mat_1 * 10

array([[10, 20, 30],
       [40, 50, 60]])

In [42]:
mat_1 * mat_2

array([[ 1,  4,  9],
       [16, 25, 36]])

In [43]:
np.dot(mat_1, mat_2.T)

array([[14, 32],
       [32, 77]])

In [44]:
mat_1 @ mat_2.T

array([[14, 32],
       [32, 77]])

In [45]:
np.linalg.inv(np.eye(3))

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [46]:
mat_3 = np.matrix([ [1, 2], [3, 4] ])
mat_3

matrix([[1, 2],
        [3, 4]])

In [47]:
np.linalg.det(mat_3)

-2.0000000000000004

In [48]:
np.linalg.inv(mat_3)

matrix([[-2. ,  1. ],
        [ 1.5, -0.5]])

In [49]:
np.linalg.inv(mat_3).dot(mat_3)

matrix([[1.00000000e+00, 0.00000000e+00],
        [1.11022302e-16, 1.00000000e+00]])

In [50]:
np.trace(mat_3)

5

In [51]:
np.diag(mat_3)

array([1, 4])

In [52]:
np.diag([1, 4])

array([[1, 0],
       [0, 4]])

$$ a^T b = \vert\vert a \vert\vert \vert\vert b \vert\vert \cos(\theta) $$

$$ \cos \theta_{ab} = \frac{a^T b}{ \vert\vert a \vert\vert \vert\vert b \vert\vert} $$

$$ \vert\vert a \vert\vert = \sqrt{ \sum_{d=1}^{D} a^2_{d} } $$ 

In [53]:
a = np.array([1, 2])
b = np.array([3, 4])

In [54]:
a_mag = np.sqrt((a * a).sum())
a_mag

2.23606797749979

In [55]:
np.linalg.norm(a)

2.23606797749979

In [56]:
cos_theta = a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))
cos_theta

0.9838699100999074

In [57]:
angle = np.arccos(cos_theta)
angle

0.17985349979247847

## Eigen vectors and eigen values

In [58]:
A =  np.matrix([ [1, 2], [3, 4] ])

In [59]:
eig_values, eig_vectors = np.linalg.eig(A)

In [60]:
eig_values

array([-0.37228132,  5.37228132])

In [61]:
eig_vectors

matrix([[-0.82456484, -0.41597356],
        [ 0.56576746, -0.90937671]])

In [62]:
eig_vectors[:, 0] * eig_values[0]

matrix([[ 0.30697009],
        [-0.21062466]])

In [63]:
A @ eig_vectors[:, 0]

matrix([[ 0.30697009],
        [-0.21062466]])

In [64]:
# not true true because of numerical precision, need to use np.allclose
eig_vectors[:, 0] * eig_values[0] == A @ eig_vectors[:, 0]

matrix([[ True],
        [False]])

In [65]:
np.allclose(eig_vectors[:, 0] * eig_values[0], A @ eig_vectors[:, 0])

True

In [66]:
# check all
np.allclose(eig_vectors @ np.diag(eig_values), A @ eig_vectors)

True

## Broadcasting

perform arithmetic operations on different shaped arrays

smaller array is broadcast across the larger array to ensure shape consistency

Rules:

* one dimension (either column or row) should have the same dimension for both arrays
* the lower dimension array should be a 1d array

In [67]:
mat_1 = np.arange(20).reshape(5, 4)
mat_1

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [68]:
mat_2 = np.arange(5) # 1x5 cannot be added to mat_1
mat_3 = mat_2.reshape(5, 1)
mat_3

array([[0],
       [1],
       [2],
       [3],
       [4]])

In [69]:
mat_1 + mat_3

array([[ 0,  1,  2,  3],
       [ 5,  6,  7,  8],
       [10, 11, 12, 13],
       [15, 16, 17, 18],
       [20, 21, 22, 23]])

In [70]:
mat_1 * mat_3

array([[ 0,  0,  0,  0],
       [ 4,  5,  6,  7],
       [16, 18, 20, 22],
       [36, 39, 42, 45],
       [64, 68, 72, 76]])

## Solve equations

In [71]:
mat_1 = np.array([ [2, 1], [1, -1] ])
mat_1

array([[ 2,  1],
       [ 1, -1]])

In [72]:
array = np.array([4, -1])
array

array([ 4, -1])

In [73]:
%%time
np.linalg.inv(mat_1).dot(array)

CPU times: user 121 µs, sys: 155 µs, total: 276 µs
Wall time: 215 µs


array([1., 2.])

In [74]:
%%time
np.linalg.solve(mat_1, array)

CPU times: user 89 µs, sys: 114 µs, total: 203 µs
Wall time: 94.7 µs


array([1., 2.])

In [75]:
inv_mat_1 = np.linalg.inv(mat_1)
inv_mat_1

array([[ 0.33333333,  0.33333333],
       [ 0.33333333, -0.66666667]])

In [76]:
inv_mat_1.dot(array)

array([1., 2.])

In [77]:
mat_1 = np.array([ [1, 2, 3], [4, 5, 2], [2, 8, 5] ])
mat_1

array([[1, 2, 3],
       [4, 5, 2],
       [2, 8, 5]])

In [78]:
array = np.array([5, 10, 15])
array

array([ 5, 10, 15])

In [79]:
np.linalg.solve(mat_1, array)

array([0.46511628, 1.39534884, 0.58139535])

## Statistical operations

In [80]:
mat_1 = np.array([ [1, 2, 3, 4], [3, 4, 5, 6], [7, 8, 9, 6], [12, 7, 10, 9], [2, 11, 8, 10] ])
mat_1

array([[ 1,  2,  3,  4],
       [ 3,  4,  5,  6],
       [ 7,  8,  9,  6],
       [12,  7, 10,  9],
       [ 2, 11,  8, 10]])

In [81]:
mat_1.sum()

127

In [82]:
np.sum(mat_1)

127

In [83]:
mat_1.sum(axis=0) # column wise sum

array([25, 32, 35, 35])

In [84]:
mat_1.sum(axis=1) # row wise sum

array([10, 18, 30, 38, 31])

In [85]:
mat_1.mean()

6.35

In [86]:
mat_1.mean(axis=0)

array([5. , 6.4, 7. , 7. ])

In [87]:
mat_1.mean(axis=1)

array([2.5 , 4.5 , 7.5 , 9.5 , 7.75])

In [88]:
np.median(mat_1)

6.5

In [89]:
np.median(mat_1, axis=0)

array([3., 7., 8., 6.])

In [90]:
np.std(mat_1, axis=1)

array([1.11803399, 1.11803399, 1.11803399, 1.80277564, 3.49106001])

In [91]:
np.std(mat_1)

3.182373328193912

In [92]:
# percentile: value below which a given percentage of observations can be found
percentile = [25, 50, 75]
for p in percentile:
    print(f'Percentile {p}: {np.percentile(mat_1, p, axis=1)}')

Percentile 25: [1.75 3.75 6.75 8.5  6.5 ]
Percentile 50: [2.5 4.5 7.5 9.5 9. ]
Percentile 75: [ 3.25  5.25  8.25 10.5  10.25]


In [93]:
R = np.random.randn(10_000)

In [94]:
R.mean()

0.014934400041167384

In [95]:
R.var()

1.0045741926581078

In [96]:
R.std()

1.0022844868888812

In [97]:
np.sqrt(R.var())

1.0022844868888812

In [98]:
R = np.random.randn(10_000, 3)
R.mean(axis=0).shape

(3,)

In [99]:
R.mean(axis=1).shape

(10000,)

In [100]:
np.cov(R).shape

(10000, 10000)

In [101]:
np.cov(R.T)

array([[ 0.97716958,  0.00889627,  0.01394901],
       [ 0.00889627,  0.99637251, -0.00101092],
       [ 0.01394901, -0.00101092,  0.99373545]])

In [102]:
np.cov(R, rowvar=False)

array([[ 0.97716958,  0.00889627,  0.01394901],
       [ 0.00889627,  0.99637251, -0.00101092],
       [ 0.01394901, -0.00101092,  0.99373545]])