# NumPy 快速教程

## 参考资料

> 英文版：https://numpy.org/devdocs/user/quickstart.html  
> 中文版：https://www.numpy.org.cn/user/quickstart.html  


In [1]:
import numpy as np
print(np.__version__)

np.random.seed(0)

1.19.1


## 基础知识

### 一个例子


In [2]:
a = np.arange(15).reshape(3, 5)
print(a)
print(a.shape)   # 数组形状
print(a.ndim)   # 轴
print(a.dtype.name)
print(a.itemsize)   # 每个元素字节大小
print(a.size)   # 数组长度
print(type(a))

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
(3, 5)
2
int32
4
15
<class 'numpy.ndarray'>


### 数组创建

> 另见：array， zeros， zeros_like， ones， ones_like， empty， empty_like， arange， linspace， numpy.random.mtrand.RandomState.rand， numpy.random.mtrand.RandomState.randn， fromfunction， fromfile



In [3]:
print(np.array([2,3,4]))
print(np.array([(1.5,2,3), (4,5,6)]))
print(np.array([[1,2], [3,4]], dtype=complex))

[2 3 4]
[[1.5 2.  3. ]
 [4.  5.  6. ]]
[[1.+0.j 2.+0.j]
 [3.+0.j 4.+0.j]]


In [4]:
print(np.zeros((3,4)))
print(np.ones((2,3,4), dtype=np.int16))
print(np.empty((2,3)))

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[[1 1 1 1]
  [1 1 1 1]
  [1 1 1 1]]

 [[1 1 1 1]
  [1 1 1 1]
  [1 1 1 1]]]
[[1.39069238e-309 1.39069238e-309 1.39069238e-309]
 [1.39069238e-309 1.39069238e-309 1.39069238e-309]]


In [5]:
print(np.arange(0, 2, 0.3))
print(np.linspace(0, 2, 9))

[0.  0.3 0.6 0.9 1.2 1.5 1.8]
[0.   0.25 0.5  0.75 1.   1.25 1.5  1.75 2.  ]


### 打印数组


In [6]:
print(np.arange(12).reshape(4,3))

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [7]:
import sys
np.set_printoptions(threshold=sys.maxsize)   # 强制打印完整数组

### 基本操作


In [8]:
a = np.array([20,30,40,50])
b = np.arange(4)
print(a-b)
print(b**2)   # 乘方
print(np.sin(a))
print(a<35)   # 比较

[20 29 38 47]
[0 1 4 9]
[ 0.91294525 -0.98803162  0.74511316 -0.26237485]
[ True  True False False]


In [9]:
A = np.array([[1,1],
              [0,1]])
B = np.array([[2,0],
              [3,4]])
print(A*B)   # 逐元素乘
print(A@B)   # 点乘
print(A.dot(B))   # 点乘

[[2 0]
 [0 4]]
[[5 4]
 [3 4]]
[[5 4]
 [3 4]]


In [11]:
# *=、+= 直接更改被操作数组
a = np.ones((2,3), dtype=int)
b = np.random.random((2,3))
a *= 3
print(a)
b += a
print(b)    # 不同类型数组间操作，结果将会向上（更精确）转换
a += b    # 错误，结果精度不允许向下转换

[[3 3 3]
 [3 3 3]]
[[3.43758721 3.891773   3.96366276]
 [3.38344152 3.79172504 3.52889492]]


UFuncTypeError: Cannot cast ufunc 'add' output from dtype('float64') to dtype('int32') with casting rule 'same_kind'

In [12]:
a = np.ones(3, dtype=np.int32)
b = np.linspace(0, np.pi, 3)
c = a + b
d = np.exp(c*1j)    # 不同类型数组间操作，结果向上（更精确）转换
print(d)

[ 0.54030231+0.84147098j -0.84147098+0.54030231j -0.54030231-0.84147098j]


In [13]:
a = np.random.random((2,3))
print(a)
print(a.sum())   # 默认对整个数组操作
print(a.min(axis=0))   # 可通过axis指定沿某轴操作
print(a.max(axis=1))

[[0.56804456 0.92559664 0.07103606]
 [0.0871293  0.0202184  0.83261985]]
2.5046448002742845
[0.0871293  0.0202184  0.07103606]
[0.92559664 0.83261985]


### 通函数

> 另见：all， any， apply_along_axis， argmax， argmin， argsort， average， bincount， ceil， clip， conj， corrcoef， cov， cross， cumprod， cumsum， diff， dot， floor， inner， INV ， lexsort， max， maximum， mean， median， min， minimum， nonzero， outer， prod， re， round， sort， std， sum， trace， transpose， var， vdot， vectorize， where


In [14]:
A = np.arange(6).reshape(2,3)
B = np.array([2.,-1.,4.])
print(A)
print(B)
print(np.exp(A))
print(np.sqrt(B))
print(np.add(A, B))   # 广播机制

[[0 1 2]
 [3 4 5]]
[ 2. -1.  4.]
[[  1.           2.71828183   7.3890561 ]
 [ 20.08553692  54.59815003 148.4131591 ]]
[1.41421356        nan 2.        ]
[[2. 0. 6.]
 [5. 3. 9.]]


### 索引/切片/迭代

> 另见：Indexing, Indexing (reference), newaxis, ndenumerate, indices


In [15]:
a = np.arange(10)**3
print(a)
print(a[3])
print(a[3:5])
a[:6:2] = 100
print(a)
print(a[::-1])

[  0   1   8  27  64 125 216 343 512 729]
27
[27 64]
[100   1 100  27 100 125 216 343 512 729]
[729 512 343 216 125 100  27 100   1 100]


In [16]:
def func(x, y):
    return 10*x+y

a = np.fromfunction(func, (3,4), dtype=int)
print(a)
print(a[1,2])
print(a[2,1:])
print(a[-1])   # a[-1] == a[-1,:]
# 注意，缺失的索引认定为全切片

[[ 0  1  2  3]
 [10 11 12 13]
 [20 21 22 23]]
12
[21 22 23]
[20 21 22 23]


In [17]:
a = np.array([[[  0,  1,  2],               # a 3D array (two stacked 2D arrays)
               [ 10, 12, 13]],
              [[100,101,102],
               [110,112,113]]])
print(a)
print(a.shape)
print(a[0,...])   # b[0,...] == b[0,:,:]
print(a[...,2])   # b[...,2] == b[:,:,2]
# 注意，省略的索引认定为全切片

[[[  0   1   2]
  [ 10  12  13]]

 [[100 101 102]
  [110 112 113]]]
(2, 2, 3)
[[ 0  1  2]
 [10 12 13]]
[[  2  13]
 [102 113]]


In [18]:
a = np.arange(3)
print(a)
print(a[:, np.newaxis])   # 增加新轴

[0 1 2]
[[0]
 [1]
 [2]]


In [19]:
A = np.arange(6).reshape(2,3)
for item in A:   # 迭代多维数组时，针对第一个轴
    print(item)

[0 1 2]
[3 4 5]


## 形状操纵

### 改变形状

> 另见：ndarray.shape， reshape， resize， ravel


In [20]:
# 多维数组扁平化
a = np.arange(12).reshape((3,4))
print(a.flat)   # 返回迭代器
print(a.flatten())  # 返回列表
print(a.ravel())    # 返回列表
print(a.reshape((12)))  # 返回列表

<numpy.flatiter object at 0x000001F890448AE0>
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [21]:
# 以下操作不改变原数组
a = np.arange(12).reshape((3,4))
print(a)
print(a.ravel())
print(a.reshape((2,6)))
print(a.T)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]]
[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [22]:
# 以下操作会改变原数组
a = np.arange(12).reshape((3,4))
print(a)
a.resize((2,6))
print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]]


In [23]:
# np.reshape()
a = np.arange(12).reshape((3,4))
print(a)
print(a.reshape((12)))
print(a.reshape((1,12)))    # a.reshape((12)) != a.reshape((1,12))
print(a.reshape((1,-1)))    # np.reshape()中指定为-1的轴将自动计算其size
print(a.reshape((-1,12)))
print(a.reshape((-1,2,3)))

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[[ 0  1  2  3  4  5  6  7  8  9 10 11]]
[[ 0  1  2  3  4  5  6  7  8  9 10 11]]
[[ 0  1  2  3  4  5  6  7  8  9 10 11]]
[[[ 0  1  2]
  [ 3  4  5]]

 [[ 6  7  8]
  [ 9 10 11]]]


In [24]:
a = np.arange(12).reshape((3,4))
print(a)
print(a.reshape((2,6), order='C'))
print(a.reshape((2,6), order='F'))
# order参数指定索引顺序：
# 默认'C'为类C风格，右边轴变化最快
# 'F'为类Fortran风格，左边轴变化最快

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]]
[[ 0  8  5  2 10  7]
 [ 4  1  9  6  3 11]]


### 数组堆叠

> 另见：hstack, vstack, column_stack, concatenate, c_, r_


In [26]:
# np.vstack()和np.hstack()适合1-2维数组
a = np.arange(6).reshape((2,3))
b = np.floor(10*np.random.random((2,3)))
print(a)
print(b)
print(np.vstack((a,b)))   # 垂直堆叠（沿第一轴堆叠，第二轴维度须相同）
print(np.hstack((a,b)))   # 水平堆叠（沿第二轴堆叠，第一轴维度须相同）

[[0 1 2]
 [3 4 5]]
[[4. 7. 1.]
 [6. 1. 9.]]
[[0. 1. 2.]
 [3. 4. 5.]
 [4. 7. 1.]
 [6. 1. 9.]]
[[0. 1. 2. 4. 7. 1.]
 [3. 4. 5. 6. 1. 9.]]


In [27]:
a = np.arange(6).reshape((2,3))
b = np.floor(10*np.random.random((6))).reshape((2,3))
print(a)
print(b)
print(np.column_stack((a,b)))   # 堆叠2维数组，第一轴维度须相同

[[0 1 2]
 [3 4 5]]
[[5. 4. 2.]
 [7. 4. 5.]]
[[0. 1. 2. 5. 4. 2.]
 [3. 4. 5. 7. 4. 5.]]


In [37]:
a = np.arange(6).reshape((2,3))
b = np.floor(10*np.random.random((2,3)))
print(a)
print(b)
print(np.row_stack((a,b)))   # 堆叠2维数组，第二轴维度须相同
print(np.vstack((a,b)))   # 堆叠2维数组，第二轴维度须相同

[[0 1 2]
 [3 4 5]]
[[6. 0. 6.]
 [6. 2. 1.]]
[[0. 1. 2.]
 [3. 4. 5.]
 [6. 0. 6.]
 [6. 2. 1.]]
[[0. 1. 2.]
 [3. 4. 5.]
 [6. 0. 6.]
 [6. 2. 1.]]


In [38]:
print(np.column_stack is np.hstack)   # 输入为1维数组时两者操作不同，见下。
print(np.row_stack is np.vstack)

False
True


In [39]:
a = np.array([4.,2.])
b = np.array([3.,8.])
print(a)
print(b)
print(np.column_stack((a,b)))   # 堆叠1维数组，尺寸须相同
print(np.hstack((a, b)))   # 堆叠1维数组时，np.column_stack != np.hstack
print(np.hstack((a[:,np.newaxis], b[:,np.newaxis])))   # 但可以通过np.newaxis完成一样的操作


[4. 2.]
[3. 8.]
[[4. 3.]
 [2. 8.]]
[4. 2. 3. 8.]
[[4. 3.]
 [2. 8.]]


In [42]:
print(np.r_[1:4,0,np.array([3,2,1])])
# 默认操作类似np.vstack()，但允许使用可选参数指定连接的轴
print(np.c_[np.array([1,2,3]), np.array([4,5,6])])
# 默认操作类似np.hstack()，但允许使用可选参数指定连接的轴

[1 2 3 0 3 2 1]
[[1 4]
 [2 5]
 [3 6]]


In [43]:
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6]])
print(np.concatenate((a, b), axis=0))
print(np.concatenate((a, b.T), axis=1))
# np.concatenate()允许通过参数axis指定拼接的轴

[[1 2]
 [3 4]
 [5 6]]
[[1 2 5]
 [3 4 6]]


### 数组拆分

> 另见：vsplit, array_split


In [46]:
a = np.floor(10*np.random.random((2,12)))
print(a)
for e in np.hsplit(a,3):   # 平均拆分
    print(e)
for e in np.hsplit(a,(1,5)):   # 指定拆分位置
    print(e)

[[5. 0. 8. 0. 6. 2. 7. 9. 2. 5. 5. 5.]
 [2. 9. 4. 8. 6. 2. 8. 3. 8. 5. 8. 6.]]
[[5. 0. 8. 0.]
 [2. 9. 4. 8.]]
[[6. 2. 7. 9.]
 [6. 2. 8. 3.]]
[[2. 5. 5. 5.]
 [8. 5. 8. 6.]]
[[5.]
 [2.]]
[[0. 8. 0. 6.]
 [9. 4. 8. 6.]]
[[2. 7. 9. 2. 5. 5. 5.]
 [2. 8. 3. 8. 5. 8. 6.]]


## 拷贝与视图

### 完全不拷贝


In [60]:
# 不拷贝，同一数组对象 np.ndarray
a = np.array([[ 0,  1,  2,  3],
              [ 4,  5,  6,  7],
              [ 8,  9, 10, 11]])
b = a
print(b is a)

True
False


In [49]:
def func(x):
    return x

a = np.floor(10*np.random.random((2,3)))
print(id(a))
print(id(func(a)))   # Python将可变对象作为引用传递

2167128908144
2167128908144


### 视图或浅拷贝


In [67]:
# 浅拷贝，共享相同数据，但为不同数组对象 np.ndarray
a = np.array([[ 0,  1,  2,  3],
              [ 4,  5,  6,  7],
              [ 8,  9, 10, 11]])
b = a.view()
print(b is a)
print(b.base is a)
print(b.flags.owndata)

False
True
False


In [68]:
c = b.reshape((2,6))
print(a.shape)   # a的形状不变
c[1, 3] = 999
print(a)   # a的数据改变

(3, 4)
[[  0   1   2   3]
 [  4   5   6   7]
 [  8 999  10  11]]


In [69]:
# 切片数据会返回一个视图（浅拷贝）
s = a[:, 1:3]
print(s.base is a)
s[:] = 11
print(a)

True
[[ 0 11 11  3]
 [ 4 11 11  7]
 [ 8 11 11 11]]


### 深拷贝


In [72]:
# 深拷贝，生成一份完整的数组对象及数据的副本
a = np.array([[ 0,  1,  2,  3],
              [ 4,  5,  6,  7],
              [ 8,  9, 10, 11]])
b = a.copy()
print(b is a)
print(b.base is a)
print(b.flags.owndata)

False
False
True


In [73]:
b[2, 3] = 999
print(a)   # a的数据不变

c = a[:, 1:3].copy()
del a   # 完全释放a占用的资源
print(c)   # c仍然存在

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 1  2]
 [ 5  6]
 [ 9 10]]
