# numpy basics: part 2

In [1]:
import numpy as np

In [6]:
x = np.array([1,2,3,4,5])
X = np.ones(shape = (3,3), dtype = int)
x, X

(array([1, 2, 3, 4, 5]),
 array([[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]]))

In [None]:
# Note that Python list won't interpret list1 * 2 as each elements times 2
list1 = [i for i in range(5)]
list1 * 2

## numpy support operations including +, -, *, /,//,**,%,sin/cos/tan,exp,abs,power,log

In [11]:
x * 2

array([3, 4, 5, 6, 7])

In [12]:
x + 1

array([2, 3, 4, 5, 6])

In [17]:
X[0,0] = 9
X//2

array([[4, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [18]:
X**2

array([[81,  1,  1],
       [ 1,  1,  1],
       [ 1,  1,  1]])

In [21]:
X%3

array([[0, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [22]:
np.sin(X)

array([[0.41211849, 0.84147098, 0.84147098],
       [0.84147098, 0.84147098, 0.84147098],
       [0.84147098, 0.84147098, 0.84147098]])

In [23]:
np.exp(X)

array([[8.10308393e+03, 2.71828183e+00, 2.71828183e+00],
       [2.71828183e+00, 2.71828183e+00, 2.71828183e+00],
       [2.71828183e+00, 2.71828183e+00, 2.71828183e+00]])

In [24]:
np.power(X,2)

array([[81,  1,  1],
       [ 1,  1,  1],
       [ 1,  1,  1]])

In [26]:
np.log(X)

array([[2.19722458, 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ]])

In [28]:
np.log2(X)

array([[3.169925, 0.      , 0.      ],
       [0.      , 0.      , 0.      ],
       [0.      , 0.      , 0.      ]])

## Likewise, numpy also supports operations between matrices

In [30]:
A = np.arange(4).reshape(2, 2)
B = np.full(shape=(2, 2), fill_value = 2)
A, B

(array([[0, 1],
        [2, 3]]),
 array([[10, 10],
        [10, 10]]))

In [31]:
A + B

array([[10, 11],
       [12, 13]])

In [32]:
# NOTE that this is not the product of 2 matrices (use A.dot(B) instead)
A * B

array([[ 0, 10],
       [20, 30]])

In [34]:
A.dot(B)

array([[10, 10],
       [50, 50]])

In [35]:
# The transposed (rows become columns, and vice versa) array of A
A.T

array([[0, 2],
       [1, 3]])

In [37]:
# Note that although adding a vector to a matrix is "meaningless", it can be done in numpy as shown below
v = np.array([1,2])
v + A

array([[1, 3],
       [3, 5]])

In [38]:
np.tile?

In [41]:
np.tile(v, (2,1)) + A

array([[1, 3],
       [3, 5]])

In [42]:
v * A

array([[0, 2],
       [2, 6]])

In [50]:
print(v)
# Although v seems like a row, the line below convert the "row" (1-D) vector into a "column" vector
A.dot(v)

[1 2]


array([2, 8])

In [44]:
v.dot(A)

array([4, 7])

In [54]:
t = np.array([[1],[2]])
t.ndim

2

In [58]:
# Note that only a square matrix has a inverse
invA = np.linalg.inv(A)
invA

array([[-1.5,  0.5],
       [ 1. ,  0. ]])

In [60]:
A.dot(invA), invA.dot(A)

(array([[1., 0.],
        [0., 1.]]),
 array([[1., 0.],
        [0., 1.]]))

In [71]:
A2 = np.arange(16).reshape(2,8)
# np.linalg.inv(A2)
# Use pinv() to get the pseudo-inverse of a matrix
pinvA2 = np.linalg.pinv(A2)
pinvA2

array([[-1.35416667e-01,  5.20833333e-02],
       [-1.01190476e-01,  4.16666667e-02],
       [-6.69642857e-02,  3.12500000e-02],
       [-3.27380952e-02,  2.08333333e-02],
       [ 1.48809524e-03,  1.04166667e-02],
       [ 3.57142857e-02, -1.04083409e-17],
       [ 6.99404762e-02, -1.04166667e-02],
       [ 1.04166667e-01, -2.08333333e-02]])

In [72]:
# Due to the unaccuracy of float compution, the result is not all zero
pinvA2.dot(A2)

array([[ 4.16666667e-01,  3.33333333e-01,  2.50000000e-01,
         1.66666667e-01,  8.33333333e-02,  3.05311332e-16,
        -8.33333333e-02, -1.66666667e-01],
       [ 3.33333333e-01,  2.73809524e-01,  2.14285714e-01,
         1.54761905e-01,  9.52380952e-02,  3.57142857e-02,
        -2.38095238e-02, -8.33333333e-02],
       [ 2.50000000e-01,  2.14285714e-01,  1.78571429e-01,
         1.42857143e-01,  1.07142857e-01,  7.14285714e-02,
         3.57142857e-02,  2.22044605e-16],
       [ 1.66666667e-01,  1.54761905e-01,  1.42857143e-01,
         1.30952381e-01,  1.19047619e-01,  1.07142857e-01,
         9.52380952e-02,  8.33333333e-02],
       [ 8.33333333e-02,  9.52380952e-02,  1.07142857e-01,
         1.19047619e-01,  1.30952381e-01,  1.42857143e-01,
         1.54761905e-01,  1.66666667e-01],
       [-8.32667268e-17,  3.57142857e-02,  7.14285714e-02,
         1.07142857e-01,  1.42857143e-01,  1.78571429e-01,
         2.14285714e-01,  2.50000000e-01],
       [-8.33333333e-02, -2.380952

# sum(), max(), min(), std(), mean(), var(), median(), percentile()

In [75]:
L = np.random.random(100)
L

array([0.19592953, 0.16758626, 0.72536581, 0.38817664, 0.29734573,
       0.74524748, 0.22716268, 0.47246473, 0.47541124, 0.7443643 ,
       0.49936368, 0.46768775, 0.52432548, 0.90295151, 0.54222074,
       0.78737437, 0.01849755, 0.84814806, 0.68506779, 0.06652979,
       0.57468254, 0.36911287, 0.05806102, 0.33760403, 0.97059526,
       0.9506378 , 0.52932408, 0.9761326 , 0.49863569, 0.71855444,
       0.20095111, 0.68571955, 0.52313136, 0.52200308, 0.43147882,
       0.50254729, 0.09171763, 0.1473037 , 0.59122053, 0.42978251,
       0.52655043, 0.24619331, 0.7676259 , 0.29011213, 0.61225046,
       0.37364003, 0.52181891, 0.68081709, 0.83872147, 0.30578737,
       0.75585444, 0.7083482 , 0.89332608, 0.64065632, 0.36697974,
       0.05269007, 0.36962237, 0.16408004, 0.08591885, 0.18176434,
       0.13579893, 0.12115811, 0.50559203, 0.99049397, 0.1233084 ,
       0.32867795, 0.14693247, 0.38708181, 0.8325207 , 0.17406836,
       0.64257991, 0.63475051, 0.24773781, 0.8079329 , 0.24404

In [80]:
%timeit sum(np.random.rand(1000000))
%timeit np.sum(np.random.rand(1000000))

111 ms ± 1.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
5 ms ± 14.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [82]:
# Same as np.sum(). Yet, using np.method() is better since it explicitly indicate the method is from numpy
L.sum()

46.78396635640065

In [90]:
X = np.arange(16).reshape(4, -1)
print(X)
# Compute the sum along the axis (when axis = 0, the matrix is "compressed" along the row)
np.sum(X, axis = 0), np.sum(X, axis = 1), np.sum(X, axis = (0,1)), np.sum(X)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


(array([24, 28, 32, 36]), array([ 6, 22, 38, 54]), 120, 120)

In [83]:
np.max(L), np.min(L)

(0.9971755396441313, 0.018497551091968933)

In [91]:
np.prod(X), np.prod(X, axis = 0), np.prod(X, axis = 1)

(0, array([   0,  585, 1680, 3465]), array([    0,   840,  7920, 32760]))

In [92]:
np.mean(X)

7.5

In [93]:
np.median(X)

7.5

In [94]:
np.percentile(X, q = 50)

7.5

In [95]:
for percent in [0, 25, 50, 75, 100]:
    print(np.percentile(X, q = percent))

0.0
3.75
7.5
11.25
15.0


In [98]:
x = np.random.normal(0, 1, size = 1000000)

In [99]:
np.var(x)

0.9992500743728923

In [100]:
np.std(x)

0.9996249668615187

# Add arg before some methods to get the indeices (e.g., argmin(), argsort())

In [104]:
np.argmin(X), np.argmax(X)

(0, 15)

In [103]:
np.min(x) == x[np.argmin(x)]

True

In [111]:
x = np.arange(15)
np.random.shuffle(x)
x

array([ 7,  4,  2,  6,  5, 10,  8, 11,  1, 12,  0,  9, 13, 14,  3])

In [112]:
# Return a sorted array instead of sorting the array in place
np.sort(x)
print(x)

[ 7  4  2  6  5 10  8 11  1 12  0  9 13 14  3]


In [113]:
x.sort()
print(x)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]


In [117]:
# For 2-D array
X = np.random.randint(10, size = (4, 4))
X

array([[3, 4, 2, 3],
       [0, 4, 9, 9],
       [8, 5, 4, 9],
       [1, 3, 9, 3]])

In [118]:
# Sort each row (axis = 1 by default)
np.sort(X)

array([[2, 3, 3, 4],
       [0, 4, 9, 9],
       [4, 5, 8, 9],
       [1, 3, 3, 9]])

In [119]:
# Sort each column
np.sort(X, axis = 0)

array([[0, 3, 2, 3],
       [1, 4, 4, 3],
       [3, 4, 9, 9],
       [8, 5, 9, 9]])

In [120]:
np.random.shuffle(x)
x

array([10,  1, 14,  4,  2,  5,  6,  9, 11,  8,  7,  3, 12,  0, 13])

In [121]:
np.argsort(x)

array([13,  1,  4, 11,  3,  5,  6, 10,  9,  7,  0,  8, 12, 14,  2])

In [124]:
np.partition(x, 8)

array([ 1,  2,  0,  4,  3,  5,  6,  7,  8,  9, 10, 11, 12, 14, 13])

In [125]:
np.argpartition(x, 3)

array([13,  1,  4, 11,  3,  5,  6, 10,  7,  9,  0,  8, 12,  2, 14])

# Fancy indexing

In [127]:
x.sort()
ind = [2, 4, 7, 10]
# numpy can pass a array with no pattern to retrive elements
x[ind]

array([ 2,  4,  7, 10])

In [128]:
ind = np.array([[5, 6],
               [7, 8]])
x[ind]

array([[5, 6],
       [7, 8]])

In [133]:
X = np.arange(16).reshape(4, -1)
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [141]:
rowInd = [0, 1, 2]
colInd = [0, 1, 2]
X[rowInd, colInd]

array([ 0,  5, 10])

In [142]:
X[[1,3], 0]

array([ 4, 12])

In [143]:
X[:2, colInd]

array([[0, 1, 2],
       [4, 5, 6]])

In [145]:
colInd = [True, False, True, False]
X[:, colInd]

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10],
       [12, 14]])

In [146]:
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [148]:
x >= 2

array([False, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True])

In [149]:
x == 2

array([False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False])

In [152]:
np.sum(x <= 7)

8

In [153]:
np.count_nonzero(x <= 7)

8

In [159]:
np.sum(X%2 == 0, axis = 1)

array([2, 2, 2, 2])

In [169]:
x[x%2 == 0]

array([ 0,  2,  4,  6,  8, 10, 12, 14])

# any(), all()

In [154]:
np.any(x > 20)

False

In [156]:
np.all(x <= 15)

True

In [166]:
print(X)
np.all(X > 3, axis = 1)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


array([False,  True,  True,  True])

In [167]:
np.sum((x > 5) & (x < 10))

4