# NumPy

In [1]:
import random
import time
import numpy as np

a = []
for i in range(100000000):
    a.append(random.random())
t1 = time.time()

# Python 处理
sum_py = sum(a)
t2 = time.time()

b = np.array(a)
t4 = time.time()

# NumPy 处理
sum_np = np.sum(b)
t5 = time.time()

print(f'Python:{t2-t1}, NumPy:{t5-t4}')

Python:1.782839059829712, NumPy:0.2144303321838379


In [5]:
# 创建数组的多种形式
# 1. 直接传入列表的方式
list1 = [1, 2, 3, 4]
oneArray = np.array(list1)
print()
print(f'oneArray: {oneArray, type(oneArray)}')

t1 = np.array([1, 2, 3, 4])
print(f't1: {t1, type(t1)}')

# 2. 传入range生成序列
t2 = np.array(range(10))
print(f't2: {t2, type(t2)}')

# 3. 使用numpy自带的np.arange()生成数组
t3 = np.arange(0, 10, 2)
print(f't3: {t3, type(t3)}')


oneArray: (array([1, 2, 3, 4]), <class 'numpy.ndarray'>)
t1: (array([1, 2, 3, 4]), <class 'numpy.ndarray'>)
t2: (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), <class 'numpy.ndarray'>)
t3: (array([0, 2, 4, 6, 8]), <class 'numpy.ndarray'>)


In [8]:
# 二维数组
list2 = [[1,2],[3,4],[5,6]]
twoArray = np.array(list2)
print(twoArray)

[[1 2]
 [3 4]
 [5 6]]


In [9]:
# 获取数组的维度
print(twoArray.ndim)

# 获取数据的形状（行、列）
print(twoArray.shape)

# 获取数组的元素个数
print(twoArray.size)

2
(3, 2)
6


In [22]:
arr_1 = np.array([[1,2,3],[4,5,6]])

# 修改的是原有的
arr_1.shape = (3, 2)
print(arr_1)

# 返回一个新的数组
arr_1 = arr_1.reshape(arr_1.shape)
print(f'\narr_1:\n{arr_1}')

# 将多维变成一维数组
arr_2 = arr_1.reshape((arr_1.size), order='F')
print(f'\narr_2:\n{arr_2}')
arr_3 = arr_1.flatten(order='F')
print(f'\narr_3:\n{arr_3}')

[[1 2]
 [3 4]
 [5 6]]

arr_1:
[[1 2]
 [3 4]
 [5 6]]

arr_2:
[1 3 5 2 4 6]

arr_3:
[1 3 5 2 4 6]


In [20]:
# 数组的形状
t = np.arange(24,)
print(f't:\n{t}')
print(t.shape)

# 转换成二维
t1 = t.reshape((4,6))
print(f'\nt1:\n{t1}')
print(t1.shape)

# 转换成三维
t2 = t1.reshape((2, 3, 4))
print(f'\nt2:\n{t2}')
print(t2.shape)

t:
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
(24,)

t1:
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
(4, 6)

t2:
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
(2, 3, 4)


In [24]:
# 将数组转为list
a = np.array([9, 12, 88, 14, 25])
items = a.tolist()
print(items, type(items))

[9, 12, 88, 14, 25] <class 'list'>


In [38]:
arr = np.array([1, 2, 3, 4, 5], dtype=np.int16)
# 返回数组中每个元素的直接单位长度
print(arr.itemsize)
# 获取数据类型
print(arr.dtype)

# 调整数据类型
arr_2 = arr.astype(np.int64)
print(arr_2.dtype)

# 随机生成小数
# 使用Python语法，保留两位
print(round(random.random(), 2))
# Numpy生成数组
arr_3 = np.round([random.random() for i in range(10)],2)
print(arr_3)

2
int16
int64
0.47
[0.97 0.81 0.1  0.23 0.66 0.98 0.06 0.44 0.33 0.14]


In [42]:
t1 = np.arange(24).reshape((6,4))
print('原数组：\n', t1)
print('加2:\n', t1+2)
print('乘2:\n', t1*2)
print('除2:\n', t1/2)

原数组：
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]
加2:
 [[ 2  3  4  5]
 [ 6  7  8  9]
 [10 11 12 13]
 [14 15 16 17]
 [18 19 20 21]
 [22 23 24 25]]
乘2:
 [[ 0  2  4  6]
 [ 8 10 12 14]
 [16 18 20 22]
 [24 26 28 30]
 [32 34 36 38]
 [40 42 44 46]]
除2:
 [[ 0.   0.5  1.   1.5]
 [ 2.   2.5  3.   3.5]
 [ 4.   4.5  5.   5.5]
 [ 6.   6.5  7.   7.5]
 [ 8.   8.5  9.   9.5]
 [10.  10.5 11.  11.5]]


In [48]:
t1 = np.arange(24).reshape((6,4))
t2 = np.arange(100, 124).reshape((6,4))
print('相加:\n',t1+t2)
print('相乘:\n',t1*t2)

相加:
 [[100 102 104 106]
 [108 110 112 114]
 [116 118 120 122]
 [124 126 128 130]
 [132 134 136 138]
 [140 142 144 146]]
相乘:
 [[   0  101  204  309]
 [ 416  525  636  749]
 [ 864  981 1100 1221]
 [1344 1469 1596 1725]
 [1856 1989 2124 2261]
 [2400 2541 2684 2829]]


In [49]:
t1 = np.arange(24).reshape((4,6))
t2 = np.arange(18).reshape((3,6))
print(t1)
print(t2)
print(t1-t2)

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]]


ValueError: operands could not be broadcast together with shapes (4,6) (3,6) 

In [52]:
t1 = np.arange(24).reshape(4,6)
t2 = np.arange(0, 6)
print(t1)
print(t2)
print(t1 - t2)

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
[0 1 2 3 4 5]
[[ 0  0  0  0  0  0]
 [ 6  6  6  6  6  6]
 [12 12 12 12 12 12]
 [18 18 18 18 18 18]]


In [56]:
t1 = np.arange(24).reshape(4,6)
t2 = np.arange(4).reshape(4,1)
print(t1)
print(t2)
print(t1-t2)

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
[[0]
 [1]
 [2]
 [3]]
[[ 0  1  2  3  4  5]
 [ 5  6  7  8  9 10]
 [10 11 12 13 14 15]
 [15 16 17 18 19 20]]


In [59]:
a = np.array([[1,2,3], [4,5,6]])
print(np.sum(a, axis=0))
print(np.sum(a, axis=1))

print(np.sum(a))

[5 7 9]
[ 6 15]
21


In [62]:
a = np.arange(27).reshape(3,3,3)
print(a)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


In [64]:
print(np.sum(a, axis=0))

[[27 30 33]
 [36 39 42]
 [45 48 51]]


In [65]:
print(np.sum(a, axis=1))

[[ 9 12 15]
 [36 39 42]
 [63 66 69]]


In [66]:
print(np.sum(a, axis=2))

[[ 3 12 21]
 [30 39 48]
 [57 66 75]]


## 索引和切片

In [None]:
a = np.arange(10)
print(a[2:7:2])

[2 4 6]


In [69]:
print(a[2], a)

2 [0 1 2 3 4 5 6 7 8 9]


In [70]:
print(a[2:])

[2 3 4 5 6 7 8 9]


In [80]:
t1 = np.arange(24).reshape(4,6)
print(t1)

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [81]:
print(t1[2])

[12 13 14 15 16 17]


In [83]:
print(t1[2,:])
print(t1[2:])

[12 13 14 15 16 17]
[[12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [101]:
print(t1[1:3])
print(t1[1:3,:])

[[ 6  7  8  9 10 11]
 [12 13 14 15 16 17]]
[[ 6  7  8  9 10 11]
 [12 13 14 15 16 17]]


In [88]:
print(t1[[0,2,3]])

[[ 0  1  2  3  4  5]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]
[[ 0  1  2  3  4  5]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [102]:
print(t1[2,3])

15


In [104]:
print(t1[,2])

SyntaxError: invalid syntax (3385441949.py, line 1)

In [105]:
print(t1[:,2])

[ 2  8 14 20]


In [106]:
print(t1[:, 2:])

[[ 2  3  4  5]
 [ 8  9 10 11]
 [14 15 16 17]
 [20 21 22 23]]


In [107]:
print(t1[:,[0,2,3]])

[[ 0  2  3]
 [ 6  8  9]
 [12 14 15]
 [18 20 21]]


In [108]:
print(t1[[0,1,1],[0,1,3]])

[0 7 9]


In [113]:
t1 = np.arange(24).reshape(4,6)
t1[1] = 0
print(t1)

[[ 0  1  2  3  4  5]
 [ 0  0  0  0  0  0]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [112]:
t1 = np.arange(24).reshape(4,6)
t1[:,1] = 0
print(t1)

[[ 0  0  2  3  4  5]
 [ 6  0  8  9 10 11]
 [12  0 14 15 16 17]
 [18  0 20 21 22 23]]


In [114]:
t1 = np.arange(24).reshape(4,6)
t1[1:3,1:4] = 0
print(t1)

[[ 0  1  2  3  4  5]
 [ 6  0  0  0 10 11]
 [12  0  0  0 16 17]
 [18 19 20 21 22 23]]


In [115]:
t1 = np.arange(24).reshape(4,6)
t1[[0,1],[0,3]] = 0
print(t1)

[[ 0  1  2  3  4  5]
 [ 6  7  8  0 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [123]:
t1 = np.arange(24).reshape(4,6)
t1[(2<t1)&(t1<12)] = 0
print(t1)

[[ 0  1  2  0  0  0]
 [ 0  0  0  0  0  0]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [124]:
t1 = np.arange(24).reshape(4,6)
t1[(np.logical_and(t1>2, t1<12))] = 0
print(t1)

[[ 0  1  2  0  0  0]
 [ 0  0  0  0  0  0]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [125]:
score = np.array([[80,88],[82,81],[75,81]])
result = np.where(score>80, True, False)
print(result)

[[False  True]
 [ True  True]
 [False  True]]


In [129]:
# 数组的添加
a = np.array([[1,2,3], [4,5,6]])
print(f'第一个数组:\n{a}\n')
print(f'向数组添加元素:\n{np.append(a, [7,8,9])}\n')
print(f'沿轴0添加元素:\n{np.append(a, [[7,8,9]], axis=0)}\n')
print(f'沿轴1添加元素:\n{np.append(a, [[5,5,5],[7,8,9]], axis=1)}\n')

第一个数组:
[[1 2 3]
 [4 5 6]]

向数组添加元素:
[1 2 3 4 5 6 7 8 9]

沿轴0添加元素:
[[1 2 3]
 [4 5 6]
 [7 8 9]]

沿轴1添加元素:
[[1 2 3 5 5 5]
 [4 5 6 7 8 9]]



In [135]:
a = np.array([[1,2],[3,4],[5,6]])
print(f'第一个数组:\n{a}\n')
print(f'未传递Axis参数,在插入之前输入数组会被展开:\n{np.insert(a,3,[11,12])}\n')
print('\n传递了Axis参数,会广播值数组来配输入数组')
print(f'沿轴0广播:\n{np.insert(a,1,[11], axis=0)}\n')
print(f'沿轴0广播:\n{np.insert(a,1,[12,13], axis=0)}\n')
print(f'沿轴1广播:\n{np.insert(a,1,11, axis=1)}\n')

第一个数组:
[[1 2]
 [3 4]
 [5 6]]

未传递Axis参数,在插入之前输入数组会被展开:
[ 1  2  3 11 12  4  5  6]


传递了Axis参数,会广播值数组来配输入数组
沿轴0广播:
[[ 1  2]
 [11 11]
 [ 3  4]
 [ 5  6]]

沿轴0广播:
[[ 1  2]
 [12 13]
 [ 3  4]
 [ 5  6]]

沿轴1广播:
[[ 1 11  2]
 [ 3 11  4]
 [ 5 11  6]]



In [137]:
a = np.arange(12).reshape(3,4)
print(f'第一个数组:\n{a}\n')
print(f'未传递Axis参数。在删除之前输入数组会被展开:\n{np.delete(a, 5)}\n')
print(f'删除每一行中的第二列：\n{np.delete(a,1,axis=1)}\n')

第一个数组:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

未传递Axis参数。在删除之前输入数组会被展开:
[ 0  1  2  3  4  6  7  8  9 10 11]

删除每一行中的第二列：:
[[ 0  2  3]
 [ 4  6  7]
 [ 8 10 11]]



In [146]:
a = np.array([5,2,6,2,7,5,6,7,2,9])
print(f'第一个数组:\n{a}\n')
print(f'第一个数组的去重值：\n{np.unique(a)}\n')
print('去重数组对应原数组的索引下标数组：')
u,indices = np.unique(a, return_index = True)
print(indices)
print('\n我们可以看到每个和原数组下标对应的数值：')
print(u)
print('\n原数组对应去重数组的下标数组：')
u,indices = np.unique(a, return_inverse = True)
print(u)
print(indices)
print ('\n返回去重元素的重复数量：')
u,indices = np.unique(a,return_counts = True) 
print (u)
print (indices)

第一个数组:
[5 2 6 2 7 5 6 7 2 9]

第一个数组的去重值：
[2 5 6 7 9]

去重数组对应原数组的索引下标数组：
[1 0 2 4 9]

我们可以看到每个和原数组下标对应的数值：
[2 5 6 7 9]

原数组对应去重数组的下标数组：
[2 5 6 7 9]
[1 0 2 0 3 1 2 3 0 4]

返回去重元素的重复数量：
[2 5 6 7 9]
[3 2 2 2 1]


## 计算

In [147]:
score = np.array([[80,88],[82,81],[75,81]])
score

array([[80, 88],
       [82, 81],
       [75, 81]])

In [148]:
result = np.max(score)
print(result)

88


In [150]:
result = np.max(score,axis=0)
print(result)

[82 88]


In [151]:
result = np.min(score)
print(result)

75


In [152]:
result = np.min(score,axis=1)
print(result)

[80 81 75]


In [153]:
result = np.maximum([-2, -1, 0, 1, 2], 0)
print(result)

[0 0 0 1 2]


In [154]:
result = np.minimum([-2, -1, 0, 1, 2], 0)
print(result)

[-2 -1  0  0  0]


In [155]:
result = np.maximum([-2, -1, 0, 1, 2], [1,2,3,4,5]) 
print(result)

[1 2 3 4 5]


In [156]:
result = np.mean(score)
print(result)

81.16666666666667


In [157]:
result = np.mean(score, axis=0)
print(result)

[79.         83.33333333]


In [161]:
t1 = np.array([[1,2,3],[4,5,6]])
print(t1)
print(t1.cumsum(0))

[[1 2 3]
 [4 5 6]]
[[1 2 3]
 [5 7 9]]


```
[1 2 3] --------> |1 	 |2 	|3      |
[5 7 9] --------> |5=1+4 |7=2+5 |9=3+6  |
```

In [159]:
print(t1.cumsum(1))

[[ 1  3  6]
 [ 4  9 15]]


```
[ 1  3  6] ------> |1   |3=2+1  |6=3+2+1    |
[ 4  9 15] ------> |4   |9=4+5  |15=4+5+6   |
```

In [168]:
result = np.argmin(score, axis=0)
print(result)
print(score)

[2 1]
[[80 88]
 [82 81]
 [75 81]]


In [170]:
score[2,1] = 64
result = np.argmin(score, axis=0)
print(result)

[2 2]


In [171]:
result = np.std(score, axis=0)
print(result)

[ 2.94392029 10.07747764]


In [174]:
result = np.ptp(score,axis=None) 
print(result)

24


## 数组的拼接

In [175]:
a = np.array([[1,2],[3,4]])
b = np.array([[5,6],[7,8]])

print(np.concatenate((a,b), axis=0))

[[1 2]
 [3 4]
 [5 6]
 [7 8]]


In [176]:
print(np.concatenate((a,b), axis=1))

[[1 2 5 6]
 [3 4 7 8]]


In [180]:
print(np.stack((a,b), axis=0))

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [179]:
print(np.stack((a,b), axis=1))

[[[1 2]
  [5 6]]

 [[3 4]
  [7 8]]]


In [181]:
v1 = [[0,1,2,3,4,5], [6,7,8,9,10,11]]
v2 = [[12,13,14,15,16,17],[18,19,20,21,22,23]]
result = np.vstack((v1, v2))
print(result)

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]


In [182]:
result = np.hstack((v1, v2))
print(result)

[[ 0  1  2  3  4  5 12 13 14 15 16 17]
 [ 6  7  8  9 10 11 18 19 20 21 22 23]]


In [183]:
arr = np.arange(9).reshape(3,3)
print('将数组分成三个大小相等的子数组：')
b = np.split(arr,3)
print(b)

将数组分成三个大小相等的子数组：
[array([[0, 1, 2]]), array([[3, 4, 5]]), array([[6, 7, 8]])]


In [188]:
harr = np.floor(10 * np.random.random((2,6)))
print(f'原array:\n{harr}')
print(f'\n水平分割后:\n{np.hsplit(harr, 3)}')

原array:
[[1. 1. 8. 2. 3. 9.]
 [3. 1. 6. 6. 5. 6.]]

拆分后:
[array([[1., 1.],
       [3., 1.]]), array([[8., 2.],
       [6., 6.]]), array([[3., 9.],
       [5., 6.]])]


In [189]:
a = np.arange(16).reshape(4,4)
print(f'第一个数组：\n{a}')
print(f'\n垂直分割之后：\n{np.vsplit(a,2)}')

第一个数组：
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]

垂直分割之后：
[array([[0, 1, 2, 3],
       [4, 5, 6, 7]]), array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])]


## `nan`和`inf`

In [190]:
a = np.nan
b = np.inf
print(a, type(a))
print(b, type(b))

nan <class 'float'>
inf <class 'float'>


In [191]:
t = np.arange(24,dtype=float).reshape(4,6)

In [192]:
print(np.count_nonzero(t))

23


In [194]:
t[3,4] = np.nan
print(t[3,4] != np.nan)

True


In [195]:
print(t)

[[ 0.  1.  2.  3.  4.  5.]
 [ 6.  7.  8.  9. 10. 11.]
 [12. 13. 14. 15. 16. 17.]
 [18. 19. 20. 21. nan 23.]]


In [196]:
print(np.count_nonzero(t != t))

1


In [197]:
print(np.sum(t,axis=0))

[36. 40. 44. 48. nan 56.]


In [198]:
# 练习，处理数组中的nan
t = np.arange(24).reshape(4,6).astype('float')

# 将数组中的一部分替换nan
t[1, 3:] = np.nan
print(t)

[[ 0.  1.  2.  3.  4.  5.]
 [ 6.  7.  8. nan nan nan]
 [12. 13. 14. 15. 16. 17.]
 [18. 19. 20. 21. 22. 23.]]


In [199]:
# 尝试便利每一列，然后判断每一列是否有`nan`

for i in range(t.shape[1]):
    # 获取当前列数据
    temp_col = t[:, i]
    # 判断当前列的数据中是否含有nan
    nan_num = np.count_nonzero(temp_col != temp_col)

    # 条件成立说明含有nan
    if nan_num != 0:
        # 将这一列部位nan的数据拿出来
        temp_col_not_nan = temp_col[temp_col == temp_col]

        # 将nan替换成这一列的平均值
        temp_col[np.isnan(temp_col)] = np.mean(temp_col_not_nan)

print(t)

[[ 0.  1.  2.  3.  4.  5.]
 [ 6.  7.  8. 13. 14. 15.]
 [12. 13. 14. 15. 16. 17.]
 [18. 19. 20. 21. 22. 23.]]


In [206]:
a = np.arange(12).reshape(3,4)
print (f'原数组：\n{a}') 
print (f'\n对换数组：\n{np.transpose(a)}') 

原数组：
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

对换数组：
[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [207]:
# 与transpose一致
a = np.arange(12).reshape(3,4)
print (f'\n原数组：\n{a}') 
print (f'\n转置数组：\n{a.T}') 


原数组：
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

转置数组：
[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [208]:
# 函数用于交换数组的两个轴
t1 = np.arange(24).reshape(4,6)
re = t1.swapaxes(1,0)
print (f'\n原数组：\n{t1}') 
print (f'\n调用 swapaxes 函数后的数组：\n{re}') 


原数组：
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]]

调用 swapaxes 函数后的数组：
[[ 0  6 12 18]
 [ 1  7 13 19]
 [ 2  8 14 20]
 [ 3  9 15 21]
 [ 4 10 16 22]
 [ 5 11 17 23]]


In [None]:
arr = np.arange(36).reshape(6,6)
re = arr.T