# Numpy 基础

---

导入库文件

In [1]:
import numpy as np

创建 array


In [2]:
a = np.array([1, 2, 3])
print(a, a.dtype)

b = np.array([1., 2., 3.])
print(b, b.dtype)

c = np.array([1, 2, 3], dtype=complex)
print(c, c.dtype)

print(np.r_[1:6, 6, 6])
print(np.c_[1:6])


[1 2 3] int64
[1. 2. 3.] float64
[1.+0.j 2.+0.j 3.+0.j] complex128
[1 2 3 4 5 6 6]
[[1]
 [2]
 [3]
 [4]
 [5]]


创建 n 维数组


In [3]:
b = np.array([(1., 2., 3.), (4., 5., 6.)])
print(b)

# 错误示例: (维度不对)
c = np.array([(1, 2), (3, 4, 5)])
print(c, c.shape)


[[1. 2. 3.]
 [4. 5. 6.]]
[(1, 2) (3, 4, 5)] (2,)


快速创建方法


In [4]:
np.zeros((3, 5))


array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [5]:
np.ones((2,3,4), dtype=np.int16)


array([[[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]], dtype=int16)

In [6]:
# 结果没有初始化
np.empty((2, 3))


array([[1., 2., 3.],
       [4., 5., 6.]])

生成序列 number


In [7]:
np.arange(10)


array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [8]:
np.arange(10, 30, 5)


array([10, 15, 20, 25])

In [9]:
np.arange(0, 2, 0.3)


array([0. , 0.3, 0.6, 0.9, 1.2, 1.5, 1.8])

In [10]:
np.linspace(0, 2, 9)


array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])

1 维转 n 维


In [11]:
np.arange(10).reshape(2, 5)


array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [12]:
# 错误示例: 维数相乘不相等
np.arange(10).reshape(2, 3)


ValueError: cannot reshape array of size 10 into shape (2,3)

基本操作


In [13]:
a = np.array([20, 30, 40, 50, 60])
b = np.arange(5)

c = a - b
print(c)

# power
print(b**2)

print(10 * np.sin(a))

print(a < 36)

[20 29 38 47 56]
[ 0  1  4  9 16]
[ 9.12945251 -9.88031624  7.4511316  -2.62374854 -3.04810621]
[ True  True False False False]


乘法操作


In [14]:
A = np.array([[1, 1], 
              [0, 1]])
B = np.array([[2, 0], 
              [3, 4]])
# elementwise product
print(A * B)
# matrix product
print(A @ B)
# another matrix product
print(A.dot(B))

[[2 0]
 [0 4]]
[[5 4]
 [3 4]]
[[5 4]
 [3 4]]


+= 和 *= 等类似操作会改变现有 array


In [15]:
a = np.ones((2, 3))
print(a)
a *= 3
print(a)

b = np.random.random((2, 3))
print(b)
b += a
print(b)

[[1. 1. 1.]
 [1. 1. 1.]]
[[3. 3. 3.]
 [3. 3. 3.]]
[[0.40065352 0.21453985 0.02348679]
 [0.83416897 0.41220112 0.69026197]]
[[3.40065352 3.21453985 3.02348679]
 [3.83416897 3.41220112 3.69026197]]


数据类型转换


In [16]:
a = np.ones((2, 3), dtype=np.int)
b = np.random.random((2, 3))
print(a.dtype, b.dtype)

# int -> float
b += a
print(b)

# 错误示例: float -> int
a += b


int64 float64
[[1.26033272 1.5985853  1.35579137]
 [1.64932339 1.351993   1.05025037]]


TypeError: Cannot cast ufunc add output from dtype('float64') to dtype('int64') with casting rule 'same_kind'

ndarray 的一元操作


In [17]:
a = np.random.random((2, 3))
print(a)
print(a.sum())
print(a.min())
print(a.max())

b = np.arange(12).reshape(3, 4)
print(b)
print(b.sum(axis = 0))
print(b.sum(axis = 1))
# 每行的累计值
print(b.cumsum(axis = 1))

[[0.06107879 0.26127172 0.04995713]
 [0.57084005 0.09508052 0.14367703]]
1.1819052413854196
0.049957125513921063
0.5708400503213619
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[12 15 18 21]
[ 6 22 38]
[[ 0  1  3  6]
 [ 4  9 15 22]
 [ 8 17 27 38]]


通用方法,如: 

sin, cos, exp, all, any, apply_along_axis, argmax, argmin, argsort, average, bincount, ceil, clip, conj, corrcoef, cov, cross, cumprod, cumsum, diff, dot, floor, inner, inv, lexsort, max, maximum, mean, median, min, minimum, nonzero, outer, prod, re, round, sort, std, sum, trace, transpose, var, vdot, vectorize, where


In [18]:
B = np.arange(3)
print(B)
print(np.exp(B))
print(np.sqrt(B))

C = np.array([2., -1., 3.])
print(np.add(B, C))

[0 1 2]
[1.         2.71828183 7.3890561 ]
[0.         1.         1.41421356]
[2. 0. 5.]


array 的取值,切片和迭代


In [19]:
a = np.arange(10)**3
print(a)
print(a[2])
print(a[1:6])
print(a[:-1])

for i in a:
    print(i * 3)

[  0   1   8  27  64 125 216 343 512 729]
8
[  1   8  27  64 125]
[  0   1   8  27  64 125 216 343 512]
0
3
24
81
192
375
648
1029
1536
2187


多维度 array 取值


In [20]:
def f(x, y):
    return 10 * x + y

b = np.fromfunction(f, (5, 4), dtype=np.int)
print(b)
print(b[2, 3])
print(b[:5, 1])
print(b[:, 1])
print(b[1:3, :])

[[ 0  1  2  3]
 [10 11 12 13]
 [20 21 22 23]
 [30 31 32 33]
 [40 41 42 43]]
23
[ 1 11 21 31 41]
[ 1 11 21 31 41]
[[10 11 12 13]
 [20 21 22 23]]


点操作 (`...`) 代表省略的冒号.如: `x` 为 5 维数组, 则:
- `x[1, 2, ...]` = `x[1, 2, :, :, :]`
- `x[..., 3]` = `x[:, :, :, :, 3]`
- `x[4, ..., 5, :]` = `x[4, :, :, 5, :]`


In [21]:
c = np.fromfunction(lambda x, y, z: 10**x + y + 2*z, (2, 2, 3), dtype=np.int)
print(c)
print(c[1, ...])
print(c[..., 2])

for row in c:
    print('row: ', row)

for element in c.flat:
    print('element: ', element)

[[[ 1  3  5]
  [ 2  4  6]]

 [[10 12 14]
  [11 13 15]]]
[[10 12 14]
 [11 13 15]]
[[ 5  6]
 [14 15]]
row:  [[1 3 5]
 [2 4 6]]
row:  [[10 12 14]
 [11 13 15]]
element:  1
element:  3
element:  5
element:  2
element:  4
element:  6
element:  10
element:  12
element:  14
element:  11
element:  13
element:  15


维度控制


In [22]:
a = np.array([[0, 1, 2, 3, 4], 
              [5, 6, 7, 8, 9], 
              [10, 11, 12, 13, 14]], order='F')
print(a, a.shape)

# matrix.flatten : returns a similar output matrix but always a copy
# matrix.flat : a flat iterator on the array.
# numpy.ravel : related function which returns an ndarray
print(a.flatten())
print(a.ravel())

b = a.reshape(5, 3)
print(b, b.shape)

c = a.T
print(c, c.shape)

# 与 array 的 order 有关, 如果是 Fortran 风格的 array ,则与 reshape 的结果不同
# 且会改变原 array
a.resize(5, 3)
print(a, a.shape)

d = np.arange(30)
print(d)
# -1 代表任何值
d.shape = 2, -1, 3
print(d.shape)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]] (3, 5)
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]] (5, 3)
[[ 0  5 10]
 [ 1  6 11]
 [ 2  7 12]
 [ 3  8 13]
 [ 4  9 14]] (5, 3)
[[ 0 11  8]
 [ 5  2 13]
 [10  7  4]
 [ 1 12  9]
 [ 6  3 14]] (5, 3)
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29]
(2, 5, 3)


array 的连接方法


In [23]:
a = np.array([1, 1, 1])
b = np.array([2, 2, 2])
print(np.vstack((a, b)))
print(np.hstack((a, b)))
print(np.column_stack((a, b)))
print(np.row_stack((a, b)))


[[1 1 1]
 [2 2 2]]
[1 1 1 2 2 2]
[[1 2]
 [1 2]
 [1 2]]
[[1 1 1]
 [2 2 2]]


array 的分离方法

In [24]:

a = np.arange(18).reshape(3, 6)
print(a)
# 按份数分
print(np.hsplit(a, 3))
# 按分割线分
print(np.hsplit(a, (1, 3)))
print(np.vsplit(a, 3))
print(np.vsplit(a, (2, 5)))

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]]
[array([[ 0,  1],
       [ 6,  7],
       [12, 13]]), array([[ 2,  3],
       [ 8,  9],
       [14, 15]]), array([[ 4,  5],
       [10, 11],
       [16, 17]])]
[array([[ 0],
       [ 6],
       [12]]), array([[ 1,  2],
       [ 7,  8],
       [13, 14]]), array([[ 3,  4,  5],
       [ 9, 10, 11],
       [15, 16, 17]])]
[array([[0, 1, 2, 3, 4, 5]]), array([[ 6,  7,  8,  9, 10, 11]]), array([[12, 13, 14, 15, 16, 17]])]
[array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]]), array([[12, 13, 14, 15, 16, 17]]), array([], shape=(0, 6), dtype=int64)]


array 的拷贝

1. 不拷贝

In [25]:
a = np.arange(10)
b = a
print(b is a)

b.shape = 2, 5
print(a.shape)

# 函数传参

def f(x):
    print(id(x))


print(id(a))
f(a)


True
(2, 5)
4665535264
4665535264


2. 浅拷贝


In [26]:
a = np.arange(30).reshape(3,10)

c = a.view()
print(c is a, c.base is a, c.flags.owndata)

c.shape = 5, 6
print(a.shape)

c[1, 3] = 100
print(c)
print(a)

s = a[:, 1:3]
print(s)

s[:] = 10
print(a)

False False False
(3, 10)
[[  0   1   2   3   4   5]
 [  6   7   8 100  10  11]
 [ 12  13  14  15  16  17]
 [ 18  19  20  21  22  23]
 [ 24  25  26  27  28  29]]
[[  0   1   2   3   4   5   6   7   8 100]
 [ 10  11  12  13  14  15  16  17  18  19]
 [ 20  21  22  23  24  25  26  27  28  29]]
[[ 1  2]
 [11 12]
 [21 22]]
[[  0  10  10   3   4   5   6   7   8 100]
 [ 10  10  10  13  14  15  16  17  18  19]
 [ 20  10  10  23  24  25  26  27  28  29]]


3. 深拷贝


In [27]:
a = np.arange(30).reshape(3,10)

d = a.copy()
print(d is a, d.base is a)

d[0, 0] = 100
print(d)
print(a)

False False
[[100   1   2   3   4   5   6   7   8   9]
 [ 10  11  12  13  14  15  16  17  18  19]
 [ 20  21  22  23  24  25  26  27  28  29]]
[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]]



### 传播规则

让通用函数处理不同 shape 的 array 的一种手段.

需要 arrays 的维度满足以下条件:
1. 相同,或者
2. 其中之一为 1

如:

```
    Image  (3d array): 256 x 256 x 3
    Scale  (1d array):             3
    Result (3d array): 256 x 256 x 3
    
    A      (4d array):  8 x 1 x 6 x 1
    B      (3d array):      7 x 1 x 5
    Result (4d array):  8 x 7 x 6 x 5
```


In [28]:
a = np.ones((3, 1, 5))
b = np.arange(15).reshape(3, 5)
print(a)
print(b)
c = a + b
print(c, c.shape)

[[[1. 1. 1. 1. 1.]]

 [[1. 1. 1. 1. 1.]]

 [[1. 1. 1. 1. 1.]]]
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
[[[ 1.  2.  3.  4.  5.]
  [ 6.  7.  8.  9. 10.]
  [11. 12. 13. 14. 15.]]

 [[ 1.  2.  3.  4.  5.]
  [ 6.  7.  8.  9. 10.]
  [11. 12. 13. 14. 15.]]

 [[ 1.  2.  3.  4.  5.]
  [ 6.  7.  8.  9. 10.]
  [11. 12. 13. 14. 15.]]] (3, 3, 5)


使用 array index


In [29]:
a = np.arange(12) ** 2
print(a)
i = np.array([1, 2, 3, 8, 5])

print(a[i])

j = np.array([[3, 5], [9, 7]])
print(a[j])

[  0   1   4   9  16  25  36  49  64  81 100 121]
[ 1  4  9 64 25]
[[ 9 25]
 [81 49]]


使用多维 array index


In [30]:
a = np.array([[0, 0, 0],
              [1, 1, 1],
              [0, 2, 0],
              [0, 0, 3],
              [0, 0, 4],
              [5, 0, 5]])

i = np.array([[0, 1, 2, 0], [0, 3, 5, 0]])

print(a[i])

a = np.arange(15).reshape(3, 5)
print(a)
# 1 维
i = np.array([[0, 1],
              [1, 2]])
# 2 维
j = np.array([[2, 1],
              [3, 3]])

# i, j 的 shape 必须一致
print(a[i, j])

[[[0 0 0]
  [1 1 1]
  [0 2 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 3]
  [5 0 5]
  [0 0 0]]]
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
[[ 2  6]
 [ 8 13]]


使用 boolean array index


In [31]:
a = np.arange(15).reshape(3, 5)
b = a > 5
print(b)
print(a[b])

a[b] = 0
print(a)


[[False False False False False]
 [False  True  True  True  True]
 [ True  True  True  True  True]]
[ 6  7  8  9 10 11 12 13 14]
[[0 1 2 3 4]
 [5 0 0 0 0]
 [0 0 0 0 0]]


[ix_](https://www.numpy.org/devdocs/reference/generated/numpy.ix_.html#numpy.ix_) 函数
快速构造 index


In [32]:
a = np.arange(10).reshape(2, 5)
ix = np.ix_([0, 1], [2, 3])
print(a, ix)
print(a[ix])


[[0 1 2 3 4]
 [5 6 7 8 9]] (array([[0],
       [1]]), array([[2, 3]]))
[[2 3]
 [7 8]]


线性代数

In [33]:
a = np.array([[1., 2.], 
              [3., 4.]])
print(a)

print(a.transpose())

print(np.linalg.inv(a))

u = np.eye(2)
print(u)
print(np.trace(u))

j = np.array([[0., -1.], 
              [1., 0.]])
print(j@j)

b = np.array([[5.], 
              [7.]])
print(np.linalg.solve(a, b))

[[1. 2.]
 [3. 4.]]
[[1. 3.]
 [2. 4.]]
[[-2.   1. ]
 [ 1.5 -0.5]]
[[1. 0.]
 [0. 1.]]
2.0
[[-1.  0.]
 [ 0. -1.]]
[[-3.]
 [ 4.]]


### 其他

np.prod: array 对应 axis 的乘积

In [34]:
print(np.prod([[1, 3], [2, 5]]))
print(np.prod([[1, 3], [2, 5]], axis = 0))
print(np.prod([[1, 3], [2, 5]], axis = 1))

30
[ 2 15]
[ 3 10]


In [35]:
a = np.array([1, 9, 5, 7, 8])
print(np.argsort(a))
print(a[np.argsort(a)])

[0 2 3 4 1]
[1 5 7 8 9]


In [36]:
a = [1, 3, 5]
print(np.var(a))
print(np.mean(a))
# var 的计算方法
print(np.mean((a - np.mean(a))**2))

2.6666666666666665
3.0
2.6666666666666665


In [37]:
a = np.arange(36).reshape(6, 6)
print(a)
# 取斜行数据
print(np.diag(a))
print(np.diag(np.arange(5)))

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [18 19 20 21 22 23]
 [24 25 26 27 28 29]
 [30 31 32 33 34 35]]
[ 0  7 14 21 28 35]
[[0 0 0 0 0]
 [0 1 0 0 0]
 [0 0 2 0 0]
 [0 0 0 3 0]
 [0 0 0 0 4]]


In [38]:
# 扔 1000 次骰子, 得到每个点数的次数
print(np.random.multinomial(1000, [1./6, 1./6, 1./6, 1./6, 1./6, 1./6]))
print(np.random.multinomial(1000, [1./ 6] * 6, size = 2))
# 当数组内的概率 不等于 1 时,只影响最后一位的次数
print(np.random.multinomial(1000, [1./6, 2./6, 1./6, 1./6, 1./6, 1./6]))
print(np.random.multinomial(1000, [1./6, .5/6, 1./6, 1./6, 1./6, 1./6]))

[188 159 174 150 178 151]
[[171 168 164 163 166 168]
 [184 180 155 183 143 155]]
[169 333 175 143 180   0]
[161  83 167 191 155 243]


In [39]:
import matplotlib.pyplot as plt
# 使用正态分布抽样多维度数据: 第二个参数为 斜行方阵
# np.random.randn 的高级版本
a = np.random.multivariate_normal([30, -30], [[10, 0], [0, 100]], 5000)
print(a, a.shape)
x, y = a.T
plt.plot(x, y, 'o')
plt.axis('equal')
plt.show()

[[ 31.84458484 -26.30632633]
 [ 31.21612215 -26.91127575]
 [ 30.08397113 -45.48702381]
 ...
 [ 27.53837578 -32.67774772]
 [ 29.35227333 -34.91950522]
 [ 33.58892415 -23.46559509]] (5000, 2)


<Figure size 640x480 with 1 Axes>

In [40]:
a = np.arange(10) ** 2
print(a)
# 翻转 ndarray
print(a[::-1])

[ 0  1  4  9 16 25 36 49 64 81]
[81 64 49 36 25 16  9  4  1  0]


In [2]:
a = np.random.randn(500, 36)
print(a)
print(np.mean(a[:100], axis = 0).shape)

[[ 2.14799752 -0.41820876 -1.47994605 ... -0.84206837 -1.86625846
  -1.4081478 ]
 [ 0.66222753  1.34409151 -0.98362023 ... -1.10363367 -1.88541754
   2.01587128]
 [ 0.26666903  0.2307335   1.57919177 ...  0.43872329  1.55609708
  -0.25898404]
 ...
 [-1.45421971 -1.89195246  0.2354702  ...  0.76210366 -0.61513514
   1.03357194]
 [-0.42594522  1.15614142  0.39275416 ... -0.05754799 -0.04360392
   0.41129031]
 [ 1.18292729 -1.38342697  0.25335201 ... -0.6154701   0.74358864
   1.28865376]]
(36,)


In [26]:
np.random.seed(12345)
print(np.random.uniform(-100, 100, size=(3, 5)))
np.random.seed(12345)
print(np.random.rand(3, 5) * 200 - 100)

[[ 85.92321856 -36.72488908 -63.21623766 -59.08794429  13.54500582]
 [ 19.1089406   92.90290395  30.63541937  49.78132751  30.71397417]
 [ 49.54296185  92.26134721 -98.32234041 -78.71112466 -40.25925725]]
[[ 85.92321856 -36.72488908 -63.21623766 -59.08794429  13.54500582]
 [ 19.1089406   92.90290395  30.63541937  49.78132751  30.71397417]
 [ 49.54296185  92.26134721 -98.32234041 -78.71112466 -40.25925725]]


In [2]:
print(np.random.permutation(np.arange(10)))

[4 9 7 5 3 1 2 8 0 6]


In [135]:
print(np.full(3, 6))
a = np.arange(10)
np.random.shuffle(a)
print(a)

[6 6 6]
[9 5 0 6 3 4 8 1 2 7]


In [2]:
print(np.bincount(np.arange(10)))
print(np.bincount(np.array([1,3,0,1,5,3,1,5])))

[1 1 1 1 1 1 1 1 1 1]
[1 3 0 2 0 2]
