# SECTION 26, NumPy

In [1]:
import random
import time
import numpy as np

In [2]:
a = []
for i in range(100000000):
    a.append(random.random())
t1 = time.time()

# Python processing
sum_py = sum(a)
t2 = time.time()

b = np.array(a)
t4 = time.time()

# NumPy processing
sum_np = np.sum(b)
t5 = time.time()

print(f'Python: {t2 - t1}, NumPy: {t5 - t4}')

Python: 2.7554380893707275, NumPy: 0.43738889694213867


In [3]:
# Different ways to create arrays
# 1. Directly from a list
list1 = [1, 2, 3, 4]
oneArray = np.array(list1)
print()
print(f'oneArray: {oneArray, type(oneArray)}')

t1 = np.array([1, 2, 3, 4])
print(f't1: {t1, type(t1)}')

# 2. From a range
t2 = np.array(range(10))
print(f't2: {t2, type(t2)}')

# 3. Using numpy's np.arange() to generate arrays
t3 = np.arange(0, 10, 2)
print(f't3: {t3, type(t3)}')


oneArray: (array([1, 2, 3, 4]), <class 'numpy.ndarray'>)
t1: (array([1, 2, 3, 4]), <class 'numpy.ndarray'>)
t2: (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), <class 'numpy.ndarray'>)
t3: (array([0, 2, 4, 6, 8]), <class 'numpy.ndarray'>)


In [4]:
# Two-dimensional array
list2 = [[1, 2], [3, 4], [5, 6]]
twoArray = np.array(list2)
print(twoArray)

[[1 2]
 [3 4]
 [5 6]]


In [5]:
# Get the dimensions of the array
print(twoArray.ndim)

# Get the shape of the array (rows, columns)
print(twoArray.shape)

# Get the number of elements in the array
print(twoArray.size)


2
(3, 2)
6


In [6]:
arr_1 = np.array([[1, 2, 3], [4, 5, 6]])

# Modify the existing array
arr_1.shape = (3, 2)
print(arr_1)

# Return a new array
arr_1 = arr_1.reshape(arr_1.shape)
print(f'\narr_1:\n{arr_1}')

# Flatten the multi-dimensional array into a one-dimensional array
arr_2 = arr_1.reshape((arr_1.size), order='F')
print(f'\narr_2:\n{arr_2}')
arr_3 = arr_1.flatten(order='F')
print(f'\narr_3:\n{arr_3}')

[[1 2]
 [3 4]
 [5 6]]

arr_1:
[[1 2]
 [3 4]
 [5 6]]

arr_2:
[1 3 5 2 4 6]

arr_3:
[1 3 5 2 4 6]


In [7]:
# Shape of the array
t = np.arange(24,)
print(f't:\n{t}')
print(t.shape)

# Convert to 2D
t1 = t.reshape((4, 6))
print(f'\nt1:\n{t1}')
print(t1.shape)

# Convert to 3D
t2 = t1.reshape((2, 3, 4))
print(f'\nt2:\n{t2}')
print(t2.shape)

t:
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
(24,)

t1:
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
(4, 6)

t2:
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
(2, 3, 4)


In [8]:
# Convert array to list
a = np.array([9, 12, 88, 14, 25])
items = a.tolist()
print(items, type(items))

[9, 12, 88, 14, 25] <class 'list'>


In [9]:
arr = np.array([1, 2, 3, 4, 5], dtype=np.int16)
# Direct unit size of each element in the array
print(arr.itemsize)
# Get data type
print(arr.dtype)

# Adjust data type
arr_2 = arr.astype(np.int64)
print(arr_2.dtype)

# Generate random decimals
# Using Python syntax, rounded to two decimal places
print(round(random.random(), 2))
# Using NumPy to generate an array
arr_3 = np.round([random.random() for i in range(10)], 2)
print(arr_3)

2
int16
int64
0.01
[0.98 0.95 0.34 0.91 0.61 0.68 0.95 0.7  0.04 0.69]


In [10]:
t1 = np.arange(24).reshape((6, 4))
print('Original array:\n', t1)
print('Add 2:\n', t1 + 2)
print('Multiply by 2:\n', t1 * 2)
print('Divide by 2:\n', t1 / 2)

Original array:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]
Add 2:
 [[ 2  3  4  5]
 [ 6  7  8  9]
 [10 11 12 13]
 [14 15 16 17]
 [18 19 20 21]
 [22 23 24 25]]
Multiply by 2:
 [[ 0  2  4  6]
 [ 8 10 12 14]
 [16 18 20 22]
 [24 26 28 30]
 [32 34 36 38]
 [40 42 44 46]]
Divide by 2:
 [[ 0.   0.5  1.   1.5]
 [ 2.   2.5  3.   3.5]
 [ 4.   4.5  5.   5.5]
 [ 6.   6.5  7.   7.5]
 [ 8.   8.5  9.   9.5]
 [10.  10.5 11.  11.5]]


In [11]:
t1 = np.arange(24).reshape((6, 4))
t2 = np.arange(100, 124).reshape((6, 4))
print('Addition:\n', t1 + t2)
print('Multiplication:\n', t1 * t2)

Addition:
 [[100 102 104 106]
 [108 110 112 114]
 [116 118 120 122]
 [124 126 128 130]
 [132 134 136 138]
 [140 142 144 146]]
Multiplication:
 [[   0  101  204  309]
 [ 416  525  636  749]
 [ 864  981 1100 1221]
 [1344 1469 1596 1725]
 [1856 1989 2124 2261]
 [2400 2541 2684 2829]]


In [49]:
t1 = np.arange(24).reshape((4,6))
t2 = np.arange(18).reshape((3,6))
print(t1)
print(t2)
print(t1-t2)

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]]


ValueError: operands could not be broadcast together with shapes (4,6) (3,6) 

In [52]:
t1 = np.arange(24).reshape(4,6)
t2 = np.arange(0, 6)
print(t1)
print(t2)
print(t1 - t2)

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
[0 1 2 3 4 5]
[[ 0  0  0  0  0  0]
 [ 6  6  6  6  6  6]
 [12 12 12 12 12 12]
 [18 18 18 18 18 18]]


In [56]:
t1 = np.arange(24).reshape(4,6)
t2 = np.arange(4).reshape(4,1)
print(t1)
print(t2)
print(t1-t2)

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
[[0]
 [1]
 [2]
 [3]]
[[ 0  1  2  3  4  5]
 [ 5  6  7  8  9 10]
 [10 11 12 13 14 15]
 [15 16 17 18 19 20]]


In [59]:
a = np.array([[1,2,3], [4,5,6]])
print(np.sum(a, axis=0))
print(np.sum(a, axis=1))

print(np.sum(a))

[5 7 9]
[ 6 15]
21


In [62]:
a = np.arange(27).reshape(3,3,3)
print(a)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


In [64]:
print(np.sum(a, axis=0))

[[27 30 33]
 [36 39 42]
 [45 48 51]]


In [65]:
print(np.sum(a, axis=1))

[[ 9 12 15]
 [36 39 42]
 [63 66 69]]


In [66]:
print(np.sum(a, axis=2))

[[ 3 12 21]
 [30 39 48]
 [57 66 75]]


## Indexing and slicing

In [None]:
a = np.arange(10)
print(a[2:7:2])

[2 4 6]


In [69]:
print(a[2], a)

2 [0 1 2 3 4 5 6 7 8 9]


In [70]:
print(a[2:])

[2 3 4 5 6 7 8 9]


In [80]:
t1 = np.arange(24).reshape(4,6)
print(t1)

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [81]:
print(t1[2])

[12 13 14 15 16 17]


In [83]:
print(t1[2,:])
print(t1[2:])

[12 13 14 15 16 17]
[[12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [101]:
print(t1[1:3])
print(t1[1:3,:])

[[ 6  7  8  9 10 11]
 [12 13 14 15 16 17]]
[[ 6  7  8  9 10 11]
 [12 13 14 15 16 17]]


In [88]:
print(t1[[0,2,3]])

[[ 0  1  2  3  4  5]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
[[ 0  1  2  3  4  5]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [102]:
print(t1[2,3])

15


In [104]:
print(t1[,2])

SyntaxError: invalid syntax (3385441949.py, line 1)

In [105]:
print(t1[:,2])

[ 2  8 14 20]


In [106]:
print(t1[:, 2:])

[[ 2  3  4  5]
 [ 8  9 10 11]
 [14 15 16 17]
 [20 21 22 23]]


In [107]:
print(t1[:,[0,2,3]])

[[ 0  2  3]
 [ 6  8  9]
 [12 14 15]
 [18 20 21]]


In [108]:
print(t1[[0,1,1],[0,1,3]])

[0 7 9]


In [113]:
t1 = np.arange(24).reshape(4,6)
t1[1] = 0
print(t1)

[[ 0  1  2  3  4  5]
 [ 0  0  0  0  0  0]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [112]:
t1 = np.arange(24).reshape(4,6)
t1[:,1] = 0
print(t1)

[[ 0  0  2  3  4  5]
 [ 6  0  8  9 10 11]
 [12  0 14 15 16 17]
 [18  0 20 21 22 23]]


In [114]:
t1 = np.arange(24).reshape(4,6)
t1[1:3,1:4] = 0
print(t1)

[[ 0  1  2  3  4  5]
 [ 6  0  0  0 10 11]
 [12  0  0  0 16 17]
 [18 19 20 21 22 23]]


In [115]:
t1 = np.arange(24).reshape(4,6)
t1[[0,1],[0,3]] = 0
print(t1)

[[ 0  1  2  3  4  5]
 [ 6  7  8  0 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [123]:
t1 = np.arange(24).reshape(4,6)
t1[(2<t1)&(t1<12)] = 0
print(t1)

[[ 0  1  2  0  0  0]
 [ 0  0  0  0  0  0]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [124]:
t1 = np.arange(24).reshape(4,6)
t1[(np.logical_and(t1>2, t1<12))] = 0
print(t1)

[[ 0  1  2  0  0  0]
 [ 0  0  0  0  0  0]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [125]:
score = np.array([[80,88],[82,81],[75,81]])
result = np.where(score>80, True, False)
print(result)

[[False  True]
 [ True  True]
 [False  True]]


In [12]:
# Array addition
a = np.array([[1, 2, 3], [4, 5, 6]])
print(f'First array:\n{a}\n')
print(f'Add elements to array:\n{np.append(a, [7, 8, 9])}\n')
print(f'Add elements along axis 0:\n{np.append(a, [[7, 8, 9]], axis=0)}\n')
print(f'Add elements along axis 1:\n{np.append(a, [[5, 5, 5], [7, 8, 9]], axis=1)}\n')

First array:
[[1 2 3]
 [4 5 6]]

Add elements to array:
[1 2 3 4 5 6 7 8 9]

Add elements along axis 0:
[[1 2 3]
 [4 5 6]
 [7 8 9]]

Add elements along axis 1:
[[1 2 3 5 5 5]
 [4 5 6 7 8 9]]



In [13]:
a = np.array([[1, 2], [3, 4], [5, 6]])
print(f'First array:\n{a}\n')
print(f'Without axis parameter, input array will be flattened before insertion:\n{np.insert(a, 3, [11, 12])}\n')
print('\nWith axis parameter, value array will be broadcast to match input array')
print(f'Broadcast along axis 0:\n{np.insert(a, 1, [11], axis=0)}\n')
print(f'Broadcast along axis 0:\n{np.insert(a, 1, [12, 13], axis=0)}\n')
print(f'Broadcast along axis 1:\n{np.insert(a, 1, 11, axis=1)}\n')

First array:
[[1 2]
 [3 4]
 [5 6]]

Without axis parameter, input array will be flattened before insertion:
[ 1  2  3 11 12  4  5  6]


With axis parameter, value array will be broadcast to match input array
Broadcast along axis 0:
[[ 1  2]
 [11 11]
 [ 3  4]
 [ 5  6]]

Broadcast along axis 0:
[[ 1  2]
 [12 13]
 [ 3  4]
 [ 5  6]]

Broadcast along axis 1:
[[ 1 11  2]
 [ 3 11  4]
 [ 5 11  6]]



In [14]:
a = np.arange(12).reshape(3, 4)
print(f'First array:\n{a}\n')
print(f'Without axis parameter, input array will be flattened before deletion:\n{np.delete(a, 5)}\n')
print(f'Delete the second column from each row:\n{np.delete(a, 1, axis=1)}\n')

First array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

Without axis parameter, input array will be flattened before deletion:
[ 0  1  2  3  4  6  7  8  9 10 11]

Delete the second column from each row:
[[ 0  2  3]
 [ 4  6  7]
 [ 8 10 11]]



In [15]:
a = np.array([5, 2, 6, 2, 7, 5, 6, 7, 2, 9])
print(f'First array:\n{a}\n')
print(f'Distinct values of the first array:\n{np.unique(a)}\n')
print('Index array corresponding to the distinct array of the original array:')
u, indices = np.unique(a, return_index=True)
print(indices)
print('\nWe can see the corresponding values for each index in the original array:')
print(u)
print('\nIndex array of the distinct array in the original array:')
u, indices = np.unique(a, return_inverse=True)
print(u)
print(indices)
print('\nReturn the count of occurrences of each distinct element:')
u, indices = np.unique(a, return_counts=True)
print(u)
print(indices)

First array:
[5 2 6 2 7 5 6 7 2 9]

Distinct values of the first array:
[2 5 6 7 9]

Index array corresponding to the distinct array of the original array:
[1 0 2 4 9]

We can see the corresponding values for each index in the original array:
[2 5 6 7 9]

Index array of the distinct array in the original array:
[2 5 6 7 9]
[1 0 2 0 3 1 2 3 0 4]

Return the count of occurrences of each distinct element:
[2 5 6 7 9]
[3 2 2 2 1]


## Calculate

In [147]:
score = np.array([[80,88],[82,81],[75,81]])
score

array([[80, 88],
       [82, 81],
       [75, 81]])

In [148]:
result = np.max(score)
print(result)

88


In [150]:
result = np.max(score,axis=0)
print(result)

[82 88]


In [151]:
result = np.min(score)
print(result)

75


In [152]:
result = np.min(score,axis=1)
print(result)

[80 81 75]


In [153]:
result = np.maximum([-2, -1, 0, 1, 2], 0)
print(result)

[0 0 0 1 2]


In [154]:
result = np.minimum([-2, -1, 0, 1, 2], 0)
print(result)

[-2 -1  0  0  0]


In [155]:
result = np.maximum([-2, -1, 0, 1, 2], [1,2,3,4,5]) 
print(result)

[1 2 3 4 5]


In [156]:
result = np.mean(score)
print(result)

81.16666666666667


In [157]:
result = np.mean(score, axis=0)
print(result)

[79.         83.33333333]


In [161]:
t1 = np.array([[1,2,3],[4,5,6]])
print(t1)
print(t1.cumsum(0))

[[1 2 3]
 [4 5 6]]
[[1 2 3]
 [5 7 9]]


```
[1 2 3] --------> |1 	 |2 	|3      |
[5 7 9] --------> |5=1+4 |7=2+5 |9=3+6  |
```

In [159]:
print(t1.cumsum(1))

[[ 1  3  6]
 [ 4  9 15]]


```
[ 1  3  6] ------> |1   |3=2+1  |6=3+2+1    |
[ 4  9 15] ------> |4   |9=4+5  |15=4+5+6   |
```

In [168]:
result = np.argmin(score, axis=0)
print(result)
print(score)

[2 1]
[[80 88]
 [82 81]
 [75 81]]


In [170]:
score[2,1] = 64
result = np.argmin(score, axis=0)
print(result)

[2 2]


In [171]:
result = np.std(score, axis=0)
print(result)

[ 2.94392029 10.07747764]


In [174]:
result = np.ptp(score,axis=None) 
print(result)

24


## Splicing of arrays

In [175]:
a = np.array([[1,2],[3,4]])
b = np.array([[5,6],[7,8]])

print(np.concatenate((a,b), axis=0))

[[1 2]
 [3 4]
 [5 6]
 [7 8]]


In [176]:
print(np.concatenate((a,b), axis=1))

[[1 2 5 6]
 [3 4 7 8]]


In [180]:
print(np.stack((a,b), axis=0))

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [179]:
print(np.stack((a,b), axis=1))

[[[1 2]
  [5 6]]

 [[3 4]
  [7 8]]]


In [181]:
v1 = [[0,1,2,3,4,5], [6,7,8,9,10,11]]
v2 = [[12,13,14,15,16,17],[18,19,20,21,22,23]]
result = np.vstack((v1, v2))
print(result)

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [182]:
result = np.hstack((v1, v2))
print(result)

[[ 0  1  2  3  4  5 12 13 14 15 16 17]
 [ 6  7  8  9 10 11 18 19 20 21 22 23]]


In [16]:
arr = np.arange(9).reshape(3, 3)
print('Split the array into three equal-sized sub-arrays:')
b = np.split(arr, 3)
print(b)

Split the array into three equal-sized sub-arrays:
[array([[0, 1, 2]]), array([[3, 4, 5]]), array([[6, 7, 8]])]


In [17]:
harr = np.floor(10 * np.random.random((2, 6)))
print(f'Original array:\n{harr}')
print(f'\nAfter horizontal split:\n{np.hsplit(harr, 3)}')

Original array:
[[1. 6. 9. 5. 1. 6.]
 [8. 1. 6. 2. 5. 0.]]

After horizontal split:
[array([[1., 6.],
       [8., 1.]]), array([[9., 5.],
       [6., 2.]]), array([[1., 6.],
       [5., 0.]])]


In [18]:
a = np.arange(16).reshape(4, 4)
print(f'First array:\n{a}')
print(f'\nAfter vertical split:\n{np.vsplit(a, 2)}')

First array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]

After vertical split:
[array([[0, 1, 2, 3],
       [4, 5, 6, 7]]), array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])]


## `nan` and `inf`

In [190]:
a = np.nan
b = np.inf
print(a, type(a))
print(b, type(b))

nan <class 'float'>
inf <class 'float'>


In [191]:
t = np.arange(24,dtype=float).reshape(4,6)

In [192]:
print(np.count_nonzero(t))

23


In [194]:
t[3,4] = np.nan
print(t[3,4] != np.nan)

True


In [195]:
print(t)

[[ 0.  1.  2.  3.  4.  5.]
 [ 6.  7.  8.  9. 10. 11.]
 [12. 13. 14. 15. 16. 17.]
 [18. 19. 20. 21. nan 23.]]


In [196]:
print(np.count_nonzero(t != t))

1


In [197]:
print(np.sum(t,axis=0))

[36. 40. 44. 48. nan 56.]


In [19]:
# Exercise, handling nan in arrays
t = np.arange(24).reshape(4, 6).astype('float')

# Replace a portion of the array with nan
t[1, 3:] = np.nan
print(t)

[[ 0.  1.  2.  3.  4.  5.]
 [ 6.  7.  8. nan nan nan]
 [12. 13. 14. 15. 16. 17.]
 [18. 19. 20. 21. 22. 23.]]


In [20]:
# Try iterating over each column and checking if it contains `nan`

for i in range(t.shape[1]):
    # Get current column data
    temp_col = t[:, i]
    # Check if the current column contains nan
    nan_num = np.count_nonzero(temp_col != temp_col)

    # If condition is met, it contains nan
    if nan_num != 0:
        # Extract non-nan data from this column
        temp_col_not_nan = temp_col[temp_col == temp_col]

        # Replace nan with the mean of this column
        temp_col[np.isnan(temp_col)] = np.mean(temp_col_not_nan)

print(t)

[[ 0.  1.  2.  3.  4.  5.]
 [ 6.  7.  8. 13. 14. 15.]
 [12. 13. 14. 15. 16. 17.]
 [18. 19. 20. 21. 22. 23.]]


In [21]:
a = np.arange(12).reshape(3, 4)
print(f'Original array:\n{a}')
print(f'\nTransposed array:\n{np.transpose(a)}')

Original array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

Transposed array:
[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [22]:
# Equivalent to transpose
a = np.arange(12).reshape(3, 4)
print(f'\nOriginal array:\n{a}')
print(f'\nTransposed array:\n{a.T}')


Original array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

Transposed array:
[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [23]:
# Function to swap two axes of an array
t1 = np.arange(24).reshape(4, 6)
re = t1.swapaxes(1, 0)
print(f'\nOriginal array:\n{t1}')
print(f'\nArray after calling swapaxes function:\n{re}')


Original array:
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]

Array after calling swapaxes function:
[[ 0  6 12 18]
 [ 1  7 13 19]
 [ 2  8 14 20]
 [ 3  9 15 21]
 [ 4 10 16 22]
 [ 5 11 17 23]]


In [24]:
arr = np.arange(36).reshape(6,6)
re = arr.T