In [1]:
import numpy as np

In [2]:
# 随机种子，固定随机数的输出结果，确保结果可复现
np.random.seed(0)

## 创建数组

In [3]:
np.array([1, 2, 3])

array([1, 2, 3])

`np.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0)`

In [4]:
np.linspace(1, 50)

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
       27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39.,
       40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.])

In [5]:
np.linspace(0, 6, 5)

array([0. , 1.5, 3. , 4.5, 6. ])

In [6]:
# [start, stop)
np.linspace(0, 6, 5, endpoint=False)

array([0. , 1.2, 2.4, 3.6, 4.8])

In [7]:
# 返回间距
np.linspace(0, 6, 5, retstep=True)

(array([0. , 1.5, 3. , 4.5, 6. ]), 1.5)

`np.arange([start=0, ]stop, [step=1, ]dtype=None)`

In [8]:
np.arange(3.0)

array([0., 1., 2.])

In [9]:
np.arange(3, 6)

array([3, 4, 5])

In [10]:
np.arange(0, 6, 2)

array([0, 2, 4])

`np.zeros(shape, dtype=np.float64)`

In [11]:
np.zeros((2, 3))

array([[0., 0., 0.],
       [0., 0., 0.]])

`np.zeros_like(a, dtype=None)`

In [12]:
a = np.arange(3)
np.zeros_like(a)

array([0, 0, 0])

In [13]:
a = np.arange(3.0)
np.zeros_like(a)

array([0., 0., 0.])

`np.ones(shape, dtype=np.float64)`

In [14]:
np.ones((2, 3))

array([[1., 1., 1.],
       [1., 1., 1.]])

`np.ones_like(a, dtype=None)`

In [15]:
a = np.arange(3)
np.ones_like(a)

array([1, 1, 1])

In [16]:
a = np.arange(3.0)
np.ones_like(a)

array([1., 1., 1.])

`np.empty(shape, dtype=np.float64)`

In [17]:
np.empty((2, 3))

array([[1., 1., 1.],
       [1., 1., 1.]])

`np.empty_like(prototype, dtype=None)`

In [18]:
a = np.arange(3)
np.empty_like(a)

array([                  0, 4607182418800017408, 4611686018427387904])

In [19]:
a = np.arange(3.0)
np.empty_like(a)

array([0.e+000, 5.e-324, 1.e-323])

`np.full(shape, fill_value, dtype=None)`

In [20]:
np.full((2, 3), 2)

array([[2, 2, 2],
       [2, 2, 2]])

In [21]:
np.full((2, 3), [1, 2, 3])

array([[1, 2, 3],
       [1, 2, 3]])

`np.full_like(a, fill_value, dtype=None)`

In [22]:
a = np.arange(3)
np.full_like(a, 3)

array([3, 3, 3])

In [23]:
a = np.arange(3)
np.full_like(a, 0.5)

array([0, 0, 0])

In [24]:
a = np.arange(3.0)
np.full_like(a, 0.5)

array([0.5, 0.5, 0.5])

`np.eye(N, M=None, k=0, dtype=np.float64)`

In [25]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [26]:
np.eye(3, 2)

array([[1., 0.],
       [0., 1.],
       [0., 0.]])

In [27]:
np.eye(3, k=1)

array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 0.]])

### 随机值填充

- `np.random.rand(d0, d1, ..., dn)`：返回服从[0,1)均匀分布的浮点数

In [28]:
np.random.rand(3)

array([0.5488135 , 0.71518937, 0.60276338])

- `np.random.uniform(low=0.0, high=1.0, size=None)`：返回服从[low,high)均匀分布的浮点数

In [29]:
low, high, size = 5, 15, (3,)
# low + (high - low) * np.random.rand(*size)
# or
np.random.uniform(low, high, size)

array([10.44883183,  9.23654799, 11.45894113])

- `np.random.randint(low, high=None, size=None, dtype=np.int64)`：返回服从[low,high)均匀分布的整数

In [30]:
# [0, low)
np.random.randint(3, size=3)

array([0, 2, 0])

In [31]:
# [low, high)
np.random.randint(3, 5, size=(2, 3))

array([[3, 3, 3],
       [4, 3, 4]])

In [32]:
# [low1, high), [low2, high) ...
np.random.randint([2, 4, 8], 9)

array([8, 4, 8])

In [33]:
# [low, high1), [low, high2) ...
np.random.randint(1, [2, 4, 8])

array([1, 2, 6])

In [34]:
# [low1, high1), [low2, high2) ...
np.random.randint([1, 3, 5], [2, 4, 8])

array([1, 3, 6])

In [35]:
# [low1, high1), [low2, high1) ...
# [low1, high2), [low2, high2) ...
np.random.randint([1, 3, 5], [[6], [8]])

array([[1, 4, 5],
       [5, 6, 5]])

- `np.random.randn(d0, d1, ..., dn)`：返回服从N(0,1)标准正态分布的浮点数

In [36]:
np.random.randn(3)

array([ 0.48431215,  0.57914048, -0.18158257])

- `random.normal(loc=0.0, scale=1.0, size=None)`：返回服从N(loc,scale)正态分布的浮点数

In [37]:
mu, sigma, size = 1, 2, (3,)
# mu + sigma * np.random.randn(*size)
# or
np.random.normal(mu, sigma, size)

array([3.82040926, 0.25105662, 1.55039663])

- `np.random.choice(a, size=None, replace=True, p=None)`：从数组中随机选择

In [38]:
# np.random.randint(0, 5, 3)
# or
np.random.choice(5, 3)

array([0, 1, 1])

In [39]:
np.random.choice(list('abcde'), size=3)

array(['b', 'a', 'c'], dtype='<U1')

In [40]:
# 无放回
np.random.choice(list('abcde'), size=3, replace=True)

array(['e', 'd', 'd'], dtype='<U1')

In [41]:
# 全排列
# np.random.permutation(list('abcde'))
# or
np.random.choice(list('abcde'), size=5, replace=False)

array(['b', 'e', 'a', 'd', 'c'], dtype='<U1')

In [42]:
# 指定权重
np.random.choice(list('abcde'), size=(2, 3), p=[0.1, 0.2, 0.4, 0.2, 0.1])

array([['b', 'c', 'b'],
       ['b', 'c', 'e']], dtype='<U1')

- `np.random.permutation(x)`：全排列

In [43]:
np.random.permutation(3)

array([1, 0, 2])

In [44]:
np.random.permutation([1, 3, 5])

array([5, 3, 1])

## 数组操作

In [45]:
a = np.arange(6).reshape(2, 3)
a

array([[0, 1, 2],
       [3, 4, 5]])

In [46]:
b = np.arange(3, 9).reshape(2, 3)
b

array([[3, 4, 5],
       [6, 7, 8]])

转置

In [47]:
a.T

array([[0, 3],
       [1, 4],
       [2, 5]])

In [48]:
np.transpose(a)

array([[0, 3],
       [1, 4],
       [2, 5]])

### 合并

上下合并（行合并）：除`axis=0`以外，其他维度需相等。

In [49]:
np.r_[a, b]

array([[0, 1, 2],
       [3, 4, 5],
       [3, 4, 5],
       [6, 7, 8]])

`numpy.row_stack(tup)`

In [50]:
np.row_stack((a, b))

array([[0, 1, 2],
       [3, 4, 5],
       [3, 4, 5],
       [6, 7, 8]])

左右合并（列合并）除`axis=1`以外，其他维度需相等。

In [51]:
np.c_[a, b]

array([[0, 1, 2, 3, 4, 5],
       [3, 4, 5, 6, 7, 8]])

`numpy.column_stack(tup)`

In [52]:
np.column_stack((a, b))

array([[0, 1, 2, 3, 4, 5],
       [3, 4, 5, 6, 7, 8]])

### 维度变换

In [53]:
a = np.arange(8).reshape(2, 4)
a

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

- `ndarray.reshape(shape, order='C')`
- `np.reshape(a, newshape, order='C')`

In [54]:
# 按照行读取和填充
a.reshape((4, 2), order='C')

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [55]:
# 按照列读取和填充
a.reshape((4, 2), order='F')

array([[0, 2],
       [4, 6],
       [1, 3],
       [5, 7]])

In [56]:
# 自动计算某个维度的大小
a.reshape((4, -1))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [57]:
# 自动计算某个维度的大小
a.reshape((-1, 8))

array([[0, 1, 2, 3, 4, 5, 6, 7]])

In [58]:
# 降维，转为一维数组
a.reshape(-1)

array([0, 1, 2, 3, 4, 5, 6, 7])

### 切片与索引

In [59]:
a = np.arange(9).reshape(3, 3)
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [60]:
# 切片索引、列表索引
a[:-1, [0, 2]]

array([[0, 2],
       [3, 5]])

In [61]:
# 列表索引、布尔索引
a[np.ix_([1, 2], [True, False, True])]

array([[3, 5],
       [6, 8]])

In [62]:
# 一维数组布尔索引
a = a.reshape(-1)
a[a % 2 == 0]

array([0, 2, 4, 6, 8])

### 常用函数

`np.where(condition, [x, y])`：按条件筛选元素

等价于

`[xv if c else yv for c, xv, yv in zip(condition, x, y)]`

In [63]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [64]:
# 一维数组
np.where(a % 2 == 0, a, 2 * a)

array([ 0,  2,  2,  6,  4, 10,  6, 14,  8, 18])

In [65]:
# 多维数组
np.where([[True, False],
          [False, True]],
         [[1, 2],
          [3, 4]],
         [[9, 8],
          [7, 6]])

array([[1, 8],
       [7, 4]])

In [66]:
x, y = np.ogrid[:3, :4]
x, y

(array([[0],
        [1],
        [2]]),
 array([[0, 1, 2, 3]]))

In [67]:
# 广播
np.where(x < y, x, 10 + y)

array([[10,  0,  0,  0],
       [10, 11,  1,  1],
       [10, 11, 12,  2]])

In [68]:
# 广播
a = np.array([[0, 1, 2],
              [0, 2, 4],
              [0, 3, 6]])
np.where(a < 4, a, -1)

array([[ 0,  1,  2],
       [ 0,  2, -1],
       [ 0,  3, -1]])

- `np.nonzero(a)`：返回非零值的索引
- `ndarray.nonzero()`

In [69]:
x = np.array([[3, 0, 0], [0, 4, 0], [5, 6, 0]])
x

array([[3, 0, 0],
       [0, 4, 0],
       [5, 6, 0]])

In [70]:
np.nonzero(x)

(array([0, 1, 2, 2]), array([0, 1, 0, 1]))

In [71]:
np.transpose(np.nonzero(x))

array([[0, 0],
       [1, 1],
       [2, 0],
       [2, 1]])

In [72]:
x[np.nonzero(x)]

array([3, 4, 5, 6])

- `np.argmax(a, axis=None)`：返回最大值的索引
- `np.argmin(a, axis=None)`：返回最小值的索引

In [73]:
a = np.array([[2, 1, 3],
              [5, 0, 4]])
a

array([[2, 1, 3],
       [5, 0, 4]])

In [74]:
np.argmax(a)

3

In [75]:
np.argmin(a)

4

In [76]:
# 按列
np.argmax(a, axis=0)

array([1, 0, 1])

In [77]:
# 按行
np.argmax(a, axis=1)

array([2, 0])

In [78]:
# 按列
np.argmin(a, axis=0)

array([0, 1, 0])

In [79]:
# 按行
np.argmin(a, axis=1)

array([1, 1])

- `np.any(a, axis=None)`：存在非零
- `np.all(a, axis=None)`：全部非零

In [80]:
a = [[True, False],
     [True, True]]
np.any(a), np.all(a)

(True, False)

In [81]:
# 按列寻找
np.any(a, axis=0), np.all(a, axis=0)

(array([ True,  True]), array([ True, False]))

In [82]:
# 按行寻找
np.any(a, axis=1), np.all(a, axis=1)

(array([ True,  True]), array([False,  True]))

累加

- `np.cumsum(a, axis=None, dtype=None)`
- `ndarray.cumsum(axis=None, dtype=None)`

In [83]:
a = np.arange(6).reshape(2, 3)
a

array([[0, 1, 2],
       [3, 4, 5]])

In [84]:
np.cumsum(a)

array([ 0,  1,  3,  6, 10, 15])

In [85]:
# 按列
np.cumsum(a, axis=0)

array([[0, 1, 2],
       [3, 5, 7]])

In [86]:
# 按行
np.cumsum(a, axis=1)

array([[ 0,  1,  3],
       [ 3,  7, 12]])

累乘

- `np.cumprod(a, axis=None, dtype=None)`
- `ndarray.cumprod(axis=None, dtype=None)`

In [87]:
a = np.arange(1, 7).reshape(2, 3)
a

array([[1, 2, 3],
       [4, 5, 6]])

In [88]:
np.cumprod(a)

array([  1,   2,   6,  24, 120, 720])

In [89]:
# 按列
np.cumprod(a, axis=0)

array([[ 1,  2,  3],
       [ 4, 10, 18]])

In [90]:
# 按行
np.cumprod(a, axis=1)

array([[  1,   2,   6],
       [  4,  20, 120]])

差分

- `np.diff(a, n=1, axis=-1)`

In [91]:
x = np.array([[3, 1, 5, 2, 7],
              [1, 6, 3, 8, 4]])
x

array([[3, 1, 5, 2, 7],
       [1, 6, 3, 8, 4]])

In [92]:
# 一阶差分
np.diff(x)

array([[-2,  4, -3,  5],
       [ 5, -3,  5, -4]])

In [93]:
# 二阶差分
np.diff(x, n=2)

array([[ 6, -7,  8],
       [-8,  8, -9]])

In [94]:
# 按列
np.diff(x, axis=0)

array([[-2,  5, -2,  6, -3]])

In [95]:
# 时间戳
x = np.arange('2021-12-23', '2021-12-27', dtype=np.datetime64)
x

array(['2021-12-23', '2021-12-24', '2021-12-25', '2021-12-26'],
      dtype='datetime64[D]')

In [96]:
np.diff(x)

array([1, 1, 1], dtype='timedelta64[D]')

#### 统计函数

最大值

- `ndarray.max(axis=None)`
- `np.amax(a, axis=None)`
- `np.nanmax(a, axis=None)`

最小值

- `ndarray.min(axis=None)`
- `np.amin(a, axis=None)`
- `np.nanmin(a, axis=None)`

平均值

- `ndarray.mean(axis=None, dtype=None)`
- `np.mean(a, axis=None, dtype=None)`
- `np.nanmean(a, axis=None, dtype=None)`

中位数

- `numpy.median(a, axis=None)`
- `numpy.nanmedian(a, axis=None)`

标准差

- `ndarray.std(axis=None, dtype=None)`
- `np.std(a, axis=None, dtype=None)`
- `np.nanstd(a, axis=None, dtype=None)`

方差

- `ndarray.var(axis=None, dtype=None)`
- `np.var(a, axis=None, dtype=None)`
- `np.nanvar(a, axis=None, dtype=None)`

求和

- `ndarray.sum(axis=None, dtype=None)`
- `np.sum(a, axis=None, dtype=None)`
- `np.nansum(a, axis=None, dtype=None)`

分位数

- `np.quantile(a, q, axis=None)`
- `np.nanquantile(a, q, axis=None)`

相关系数矩阵

- `np.corrcoef(x, y=None, dtype=None)`

协方差矩阵

- `np.cov(m, y=None, dtype=None)`

In [97]:
a = np.array([[10, 7, 4],
              [3, 2, 1]])
a

array([[10,  7,  4],
       [ 3,  2,  1]])

In [98]:
# np.median(a)
np.quantile(a, 0.5)

3.5

In [99]:
# 按列
np.quantile(a, 0.5, axis=0)

array([6.5, 4.5, 2.5])

In [100]:
# 按行
np.quantile(a, 0.5, axis=1)

array([7., 2.])

### 广播机制

#### 标量和数组

In [101]:
a = np.arange(1, 5).reshape(2, 2)
a

array([[1, 2],
       [3, 4]])

In [102]:
a + 1

array([[2, 3],
       [4, 5]])

In [103]:
a * 2

array([[2, 4],
       [6, 8]])

In [104]:
1.0 / a

array([[1.        , 0.5       ],
       [0.33333333, 0.25      ]])

#### 维度不同的数组

In [105]:
a = np.arange(6).reshape(3, 2)
a

array([[0, 1],
       [2, 3],
       [4, 5]])

In [106]:
# 3x2 * 1x2 / 2
# 1x2 / 2 -> 3x2
# a * np.array([[2, 3]])
a * np.array([2, 3])

array([[ 0,  3],
       [ 4,  9],
       [ 8, 15]])

In [107]:
# 3x2 * 3x1
# 3x1 -> 3x2
a * np.array([[2],
              [3],
              [4]])

array([[ 0,  2],
       [ 6,  9],
       [16, 20]])

In [108]:
# 3x2 * 1x1 / 1
# 1x1 / 1 -> 3x2
# a * np.array([[2]])
a * 2

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10]])

向量内积

- `ndarray.dot(a)`
- `np.dot(a, b)`

矩阵范数

- `np.linalg.norm(x, ord=None, axis=None, keepdims=False)`

矩阵乘法

- `x1 @ x2`
- `np.matmul(x1, x2)`

In [109]:
# 1.1
a = np.arange(9).reshape(3, 3)
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [110]:
b = np.arange(9)[::-1].reshape(3, 3, order='F')
b

array([[8, 5, 2],
       [7, 4, 1],
       [6, 3, 0]])

In [111]:
a @ b

array([[ 19,  10,   1],
       [ 82,  46,  10],
       [145,  82,  19]])

In [112]:
np.array([[i.dot(j) for j in b.T] for i in a])

array([[ 19,  10,   1],
       [ 82,  46,  10],
       [145,  82,  19]])

In [113]:
# 1.2
a = np.arange(1, 10).reshape(3, 3)
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [114]:
np.sum(1 / a, axis=1).reshape(3, -1) * a

array([[1.83333333, 3.66666667, 5.5       ],
       [2.46666667, 3.08333333, 3.7       ],
       [2.65277778, 3.03174603, 3.41071429]])

In [115]:
# 1.3
np.random.seed(0)
a = np.random.randint(10, 20, (8, 5))
a

array([[15, 10, 13, 13, 17],
       [19, 13, 15, 12, 14],
       [17, 16, 18, 18, 11],
       [16, 17, 17, 18, 11],
       [15, 19, 18, 19, 14],
       [13, 10, 13, 15, 10],
       [12, 13, 18, 11, 13],
       [13, 13, 17, 10, 11]])

In [116]:
b = a.sum(axis=1).reshape(a.shape[0], -1) * a.sum(axis=0).reshape(-1, a.shape[1]) / a.sum()
b

array([[14.14211438, 13.08145581, 15.20277296, 13.67071057, 11.90294627],
       [15.18197574, 14.04332756, 16.32062392, 14.67590988, 12.77816291],
       [16.63778163, 15.38994801, 17.88561525, 16.08318891, 14.0034662 ],
       [16.42980936, 15.19757366, 17.66204506, 15.88214905, 13.82842288],
       [17.67764298, 16.35181976, 19.0034662 , 17.08838821, 14.87868284],
       [12.68630849, 11.73483536, 13.63778163, 12.26343154, 10.67764298],
       [13.93414211, 12.88908146, 14.97920277, 13.46967071, 11.72790295],
       [13.3102253 , 12.31195841, 14.3084922 , 12.86655113, 11.20277296]])

In [117]:
np.sum((a - b) ** 2 / b)

11.842696601945802

In [118]:
# 1.4
import time

np.random.seed(0)
m, n, p = 100, 80, 50
B = np.random.randint(0, 2, (m, p))
U = np.random.randint(0, 2, (p, n))
Z = np.random.randint(0, 2, (m, n))

In [119]:
def solution(B=B, U=U, Z=Z):
    L_res = []
    for i in range(m):
        for j in range(n):
            norm_value = ((B[i] - U[:, j]) ** 2).sum()
            L_res.append(norm_value * Z[i][j])
    return sum(L_res)


st = time.time()
ans = solution(B, U, Z)
ed = time.time()
ans, ed - st

(100566, 0.034029245376586914)

In [120]:
def solution(B=B, U=U, Z=Z):
    ans = 0
    for i in range(m):
        for j in range(n):
            norm_value = ((B[i] - U[:, j]) ** 2).sum()
            ans += norm_value * Z[i][j]
    return ans


st = time.time()
ans = solution(B, U, Z)
ed = time.time()
ans, ed - st

(100566, 0.032205820083618164)

In [121]:
# 1.5
a = np.array([1, 2, 5, 6, 7])
a

array([1, 2, 5, 6, 7])

In [122]:
np.diff(a) != 1

array([False,  True, False, False])

In [123]:
np.r_[1, np.diff(a) != 1, 1]

array([1, 0, 1, 0, 0, 1])

In [124]:
np.nonzero(np.r_[1, np.diff(a) != 1, 1])

(array([0, 2, 5]),)

In [125]:
np.diff(np.nonzero(np.r_[1, np.diff(a) != 1, 1]))

array([[2, 3]])

In [126]:
np.max(np.diff(np.nonzero(np.r_[1, np.diff(a) != 1, 1])))

3