## 一、Numpy数据基础

### python list 支持多类型元素，以及对其索引操纵

In [2]:
list = [i for i in range(10)]
list

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [4]:
list[5] = "np"
list

[0, 1, 2, 3, 4, 'np', 6, 7, 8, 9]

### python array 支持指定类型的元素，以及对其索引操作，但不支持矩阵和向量的复杂操作

In [5]:
import array
arr = array.array("i", [i for i in range(10)])
arr

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [6]:
arr[5] = "np"
arr

TypeError: an integer is required (got type str)

### numpy array 支持隐式单类型的元素，操作类似于python list或array，支持复杂数学计算

In [7]:
import numpy as np

In [8]:
np.__version__

'1.18.1'

In [9]:
nparr = np.array([i for i in range(10)])
nparr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [10]:
nparr.dtype

dtype('int32')

In [11]:
nparr[5] = "np"
nparr

ValueError: invalid literal for int() with base 10: 'np'

In [13]:
nparr[5] = "5.0"
nparr

ValueError: invalid literal for int() with base 10: '5.0'

In [14]:
nparr_2 = np.array([1, 2, 3.0])
nparr_2

array([1., 2., 3.])

In [15]:
nparr_2.dtype

dtype('float64')

### 其他创建numpy.array的方法

In [20]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [21]:
np.zeros(10, dtype = int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [22]:
np.zeros((3, 5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [23]:
np.zeros(shape = (3, 5), dtype = int)

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [24]:
np.ones(shape = (2, 5), dtype = int)

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [25]:
np.twos((2,3))

AttributeError: module 'numpy' has no attribute 'twos'

In [26]:
np.full((2,3), 5)

array([[5, 5, 5],
       [5, 5, 5]])

In [27]:
np.full(shape=(3,3), fill_value=5)

array([[5, 5, 5],
       [5, 5, 5],
       [5, 5, 5]])

### numpy arange 等步长可以为浮点数（指定步长）

In [30]:
[i for i in range(0, 20, 2)]

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [31]:
[i for i in range(0, 2, 0.2)]

TypeError: 'float' object cannot be interpreted as an integer

In [29]:
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [32]:
np.arange(0, 2, 0.2)

array([0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8])

In [34]:
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [35]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

### numpy linspace 等步长（指定元素个数，包括end元素）

In [37]:
np.linspace(0, 20, 10)

array([ 0.        ,  2.22222222,  4.44444444,  6.66666667,  8.88888889,
       11.11111111, 13.33333333, 15.55555556, 17.77777778, 20.        ])

### numpy random 随机数（均匀随机、正态分布随机）

In [38]:
np.random.randint(0, 10)

9

In [39]:
np.random.randint(0, 10, 5)

array([6, 7, 3, 2, 5])

In [40]:
np.random.randint(0, 10, (2, 5))

array([[0, 0, 9, 3, 7],
       [6, 1, 3, 1, 0]])

In [41]:
np.random.randint(0, 10, size=5)

array([8, 0, 1, 0, 7])

In [42]:
np.random.seed(120)

In [43]:
np.random.randint(0, 10, size=(2, 6))

array([[7, 0, 8, 4, 1, 7],
       [1, 3, 8, 9, 4, 9]])

In [44]:
np.random.seed(120)
np.random.randint(0, 10, size=(2, 6))

array([[7, 0, 8, 4, 1, 7],
       [1, 3, 8, 9, 4, 9]])

In [45]:
np.random.random()

0.49566897200316296

In [46]:
np.random.random(10)

array([0.92660078, 0.46330812, 0.7320508 , 0.01618143, 0.45486764,
       0.40224437, 0.46035949, 0.34912461, 0.97318581, 0.06697183])

In [47]:
np.random.random(size=(3,5))

array([[0.10348286, 0.57613618, 0.17199133, 0.66366107, 0.91227517],
       [0.64580302, 0.65246499, 0.42369047, 0.76014779, 0.97187248],
       [0.65072503, 0.20215034, 0.84232856, 0.73046586, 0.97803165]])

In [48]:
np.random.normal()

-0.42872938858589205

In [49]:
np.random.normal(10, 100)

39.328192776448034

In [50]:
np.random.normal(0, 1, (3,5))

array([[ 4.28845625e-04,  5.77985354e-01,  5.26808892e-01,
        -4.75049026e-01,  1.39823669e+00],
       [ 4.13005690e-01, -4.85554009e-01, -2.50209107e-01,
         1.28110766e-01,  2.85424512e-01],
       [-1.64716688e+00,  6.73767517e-01,  2.36747949e-01,
         5.55745624e-01,  2.59102733e+00]])

### 查询模块或方法文档说明

In [51]:
np.random.normal?

In [52]:
help(np.random.normal)

Help on built-in function normal:

normal(...) method of numpy.random.mtrand.RandomState instance
    normal(loc=0.0, scale=1.0, size=None)
    
    Draw random samples from a normal (Gaussian) distribution.
    
    The probability density function of the normal distribution, first
    derived by De Moivre and 200 years later by both Gauss and Laplace
    independently [2]_, is often called the bell curve because of
    its characteristic shape (see the example below).
    
    The normal distributions occurs often in nature.  For example, it
    describes the commonly occurring distribution of samples influenced
    by a large number of tiny, random disturbances, each with its own
    unique distribution [2]_.
    
    .. note::
        New code should use the ``normal`` method of a ``default_rng()``
        instance instead; see `random-quick-start`.
    
    Parameters
    ----------
    loc : float or array_like of floats
        Mean ("centre") of the distribution.
    scale : fl

## 二、Numpy array基本操作

In [53]:
x = np.arange(10)

In [54]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [55]:
X = np.arange(15).reshape(3, 5)
X

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

### numpy array基本属性

#### ndim：维度

In [56]:
x.ndim

1

In [68]:
X.ndim

2

#### shape：维度元素数量

In [58]:
x.shape

(10,)

In [69]:
X.shape

(3, 5)

#### size：元素总数

In [60]:
x.size

10

In [61]:
X.size

15

### numpy array 数据访问

#### 索引

In [63]:
x[0]

0

In [64]:
x[-1]

9

In [65]:
X[0][0]

0

In [66]:
X[(0, 0)]

0

In [67]:
X[2, 2]

12

#### 切片

In [71]:
x[0:5]

array([0, 1, 2, 3, 4])

In [72]:
x[:5]

array([0, 1, 2, 3, 4])

In [73]:
x[5:]

array([5, 6, 7, 8, 9])

In [74]:
x[::2]

array([0, 2, 4, 6, 8])

In [75]:
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [76]:
X[:2, :3]

array([[0, 1, 2],
       [5, 6, 7]])

In [77]:
X[:2][:3]

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [78]:
X[:2, ::2]

array([[0, 2, 4],
       [5, 7, 9]])

In [79]:
X[::-1, ::-1]

array([[14, 13, 12, 11, 10],
       [ 9,  8,  7,  6,  5],
       [ 4,  3,  2,  1,  0]])

In [80]:
X[0, :]

array([0, 1, 2, 3, 4])

In [81]:
X[0, :].ndim

1

In [82]:
X[:, 0]

array([ 0,  5, 10])

In [83]:
X[:, 0].ndim

1

#### 复制（numpy array 切片为引用（性能考虑），python array切片为直接复制）

In [84]:
subX = X[:2, :3]
subX

array([[0, 1, 2],
       [5, 6, 7]])

In [85]:
subX[0, 0] = 100
subX

array([[100,   1,   2],
       [  5,   6,   7]])

In [86]:
X

array([[100,   1,   2,   3,   4],
       [  5,   6,   7,   8,   9],
       [ 10,  11,  12,  13,  14]])

In [87]:
X[0, 0] = 0
subX

array([[0, 1, 2],
       [5, 6, 7]])

In [88]:
subX = X[:2, :3].copy()
subX

array([[0, 1, 2],
       [5, 6, 7]])

In [89]:
subX[0, 0] = 100
X

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

#### 维度调整（不改变原数据结构）

In [90]:
x.shape

(10,)

In [91]:
x.reshape(2, 5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [92]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [93]:
x.reshape(1, 10)

array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [94]:
A = x.reshape(10, -1)

In [95]:
A

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [97]:
x.reshape(2, -1)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [98]:
x.reshape(3, -1)

ValueError: cannot reshape array of size 10 into shape (3,newaxis)

### 合并操作

In [99]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])

In [100]:
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [101]:
z = np.array([123, 456, 789])

In [102]:
np.concatenate([x, y, z])

array([  1,   2,   3,   3,   2,   1, 123, 456, 789])

In [103]:
A = np.array([[1, 2, 3],
             [4, 5, 6]])

In [104]:
np.concatenate([A, A])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [105]:
# axis指定拼接维度，默认axis=0
np.concatenate([A, A], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [106]:
np.concatenate([A, z])

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)

In [109]:
# 不同维度的数据进行拼接需要进行转换（保存拼接结果需要重新赋值）
np.concatenate([A, z.reshape(1, -1)])

array([[  1,   2,   3],
       [  4,   5,   6],
       [123, 456, 789]])

In [112]:
# 智能拼接，即使数据维度不同（v：竖直方向，h：水平方向）非法数据不行
np.vstack([A, z])

array([[  1,   2,   3],
       [  4,   5,   6],
       [123, 456, 789]])

In [111]:
np.hstack([A, A])

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

### 分割操作

In [118]:
x = np.array([1, 2, 4, 2, 8, 6, 2, 9])
x

array([1, 2, 4, 2, 8, 6, 2, 9])

In [123]:
# 切分下标为数组
x1, x2, x3 = np.split(x, [2, 5])

In [120]:
x1

array([1, 2])

In [121]:
x2

array([4, 2, 8])

In [122]:
x3

array([6, 2, 9])

In [124]:
A = np.arange(16).reshape((4, 4))
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [125]:
A1, A2 = np.split(A, [2])

In [126]:
A1

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [127]:
A2

array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [128]:
# axis指定分割维度，默认axis=0
A1, A2 = np.split(A, [2], axis=1)

In [129]:
A1

array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])

In [130]:
A2

array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])

In [133]:
upper, lower = np.vsplit(A, [2])

In [134]:
upper

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [135]:
lower

array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [136]:
left, right = np.hsplit(A, [2])

In [137]:
left

array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])

In [138]:
right

array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])

In [139]:
X, y = np.hsplit(A, [-1])

In [140]:
X

array([[ 0,  1,  2],
       [ 4,  5,  6],
       [ 8,  9, 10],
       [12, 13, 14]])

In [141]:
y

array([[ 3],
       [ 7],
       [11],
       [15]])

In [142]:
y[:, 0]

array([ 3,  7, 11, 15])

## 三、Numpy array基本运算

In [143]:
n = 10
L = [i for i in range(n)]

In [144]:
2 * L

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [145]:
A = []
for e in L:
    A.append(2 * e)
A

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [146]:
n = 1000000
L = [i for i in range(n)]

In [147]:
%%time
for e in L:
    A.append(2 * e)

Wall time: 206 ms


In [148]:
%%time
A = [2 * e for e in L]

Wall time: 107 ms


In [150]:
%%time
A = np.array(2 * e for e in L)

Wall time: 19.9 ms


In [153]:
%%time
# 效果同每个元素均乘以2，而非连接两个相同数组
A = 2 * L

Wall time: 3.99 ms


In [154]:
A

array([      0,       2,       4, ..., 1999994, 1999996, 1999998])

### Universal Functions

In [160]:
# numpy支持对向量和矩阵的数学运算，并优化其性能
x = np.arange(1, 16).reshape((3, 5))
x

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [157]:
x + 1

array([[ 2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16]])

In [158]:
x - 1

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [161]:
x * 2

array([[ 2,  4,  6,  8, 10],
       [12, 14, 16, 18, 20],
       [22, 24, 26, 28, 30]])

In [162]:
# 浮点数除法
x / 2

array([[0.5, 1. , 1.5, 2. , 2.5],
       [3. , 3.5, 4. , 4.5, 5. ],
       [5.5, 6. , 6.5, 7. , 7.5]])

In [163]:
# 整数除法
x // 2

array([[0, 1, 1, 2, 2],
       [3, 3, 4, 4, 5],
       [5, 6, 6, 7, 7]], dtype=int32)

In [164]:
x ** 2

array([[  1,   4,   9,  16,  25],
       [ 36,  49,  64,  81, 100],
       [121, 144, 169, 196, 225]], dtype=int32)

In [165]:
x % 2

array([[1, 0, 1, 0, 1],
       [0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1]], dtype=int32)

In [166]:
1 / x

array([[1.        , 0.5       , 0.33333333, 0.25      , 0.2       ],
       [0.16666667, 0.14285714, 0.125     , 0.11111111, 0.1       ],
       [0.09090909, 0.08333333, 0.07692308, 0.07142857, 0.06666667]])

In [168]:
# 绝对值
np.abs(x)

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [169]:
# 正弦
np.sin(x)

array([[ 0.84147098,  0.90929743,  0.14112001, -0.7568025 , -0.95892427],
       [-0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849, -0.54402111],
       [-0.99999021, -0.53657292,  0.42016704,  0.99060736,  0.65028784]])

In [170]:
np.cos(x)

array([[ 0.54030231, -0.41614684, -0.9899925 , -0.65364362,  0.28366219],
       [ 0.96017029,  0.75390225, -0.14550003, -0.91113026, -0.83907153],
       [ 0.0044257 ,  0.84385396,  0.90744678,  0.13673722, -0.75968791]])

In [171]:
np.tan(x)

array([[ 1.55740772e+00, -2.18503986e+00, -1.42546543e-01,
         1.15782128e+00, -3.38051501e+00],
       [-2.91006191e-01,  8.71447983e-01, -6.79971146e+00,
        -4.52315659e-01,  6.48360827e-01],
       [-2.25950846e+02, -6.35859929e-01,  4.63021133e-01,
         7.24460662e+00, -8.55993401e-01]])

In [173]:
np.arcsin(x)

  """Entry point for launching an IPython kernel.


array([[1.57079633,        nan,        nan,        nan,        nan],
       [       nan,        nan,        nan,        nan,        nan],
       [       nan,        nan,        nan,        nan,        nan]])

In [174]:
# e的x次方
np.exp(x)

array([[2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01,
        1.48413159e+02],
       [4.03428793e+02, 1.09663316e+03, 2.98095799e+03, 8.10308393e+03,
        2.20264658e+04],
       [5.98741417e+04, 1.62754791e+05, 4.42413392e+05, 1.20260428e+06,
        3.26901737e+06]])

In [175]:
np.power(3, x)

array([[       3,        9,       27,       81,      243],
       [     729,     2187,     6561,    19683,    59049],
       [  177147,   531441,  1594323,  4782969, 14348907]], dtype=int32)

In [176]:
3 ** x

array([[       3,        9,       27,       81,      243],
       [     729,     2187,     6561,    19683,    59049],
       [  177147,   531441,  1594323,  4782969, 14348907]], dtype=int32)

In [177]:
# 以e为底
np.log(x)

array([[0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791],
       [1.79175947, 1.94591015, 2.07944154, 2.19722458, 2.30258509],
       [2.39789527, 2.48490665, 2.56494936, 2.63905733, 2.7080502 ]])

In [178]:
np.log10(x)

array([[0.        , 0.30103   , 0.47712125, 0.60205999, 0.69897   ],
       [0.77815125, 0.84509804, 0.90308999, 0.95424251, 1.        ],
       [1.04139269, 1.07918125, 1.11394335, 1.14612804, 1.17609126]])

### 矩阵间的运算

In [182]:
A = np.arange(4).reshape(2, 2)
B = np.full((2, 2), 10)

In [183]:
A

array([[0, 1],
       [2, 3]])

In [184]:
B

array([[10, 10],
       [10, 10]])

In [180]:
A + B

array([[10, 11],
       [12, 13]])

In [181]:
A - B

array([[-10,  -9],
       [ -8,  -7]])

In [186]:
# 对应元素相乘，非实际矩阵乘法（numpy对所有运算符的定义都是对应元素做相应运算）
A * B

array([[ 0, 10],
       [20, 30]])

In [187]:
# 实际矩阵乘法
A.dot(B)

array([[10, 10],
       [50, 50]])

In [188]:
# 实际矩阵转置
A.T

array([[0, 2],
       [1, 3]])

### 矩阵和向量的运算

In [190]:
v = np.array([1, 2])
v

array([1, 2])

In [191]:
A

array([[0, 1],
       [2, 3]])

In [192]:
# 矩阵与向量的加法
v + A

array([[1, 3],
       [3, 5]])

In [195]:
np.vstack([v] * A.shape[0])

array([[1, 2],
       [1, 2]])

In [196]:
np.vstack([v] * A.shape[0]) + A

array([[1, 3],
       [3, 5]])

In [197]:
# 向量堆叠（行堆叠次数，列堆叠次数）
np.tile(v, (2, 1))

array([[1, 2],
       [1, 2]])

In [198]:
np.tile(v, (2, 1)) + A

array([[1, 3],
       [3, 5]])

In [199]:
# 矩阵与向量的乘法
v * A

array([[0, 2],
       [2, 6]])

In [200]:
v.dot(A)

array([4, 7])

In [201]:
A.dot(v)

array([2, 8])

### 矩阵的逆

In [202]:
A

array([[0, 1],
       [2, 3]])

In [203]:
# 逆矩阵
invA = np.linalg.inv(A)
invA

array([[-1.5,  0.5],
       [ 1. ,  0. ]])

In [204]:
A.dot(invA)

array([[1., 0.],
       [0., 1.]])

In [209]:
# 伪逆矩阵
X = np.arange(16).reshape((2, 8))
pinvX = np.linalg.pinv(X)
pinvX

array([[-1.35416667e-01,  5.20833333e-02],
       [-1.01190476e-01,  4.16666667e-02],
       [-6.69642857e-02,  3.12500000e-02],
       [-3.27380952e-02,  2.08333333e-02],
       [ 1.48809524e-03,  1.04166667e-02],
       [ 3.57142857e-02, -1.04083409e-17],
       [ 6.99404762e-02, -1.04166667e-02],
       [ 1.04166667e-01, -2.08333333e-02]])

In [210]:
X.dot(pinvX)

array([[ 1.00000000e+00, -2.49800181e-16],
       [ 0.00000000e+00,  1.00000000e+00]])

## 四、Numpy array聚合操作

In [2]:
import numpy as np
L = np.random.random(100)
L

array([0.66767819, 0.42587115, 0.80776773, 0.42585138, 0.76374408,
       0.27361095, 0.70612006, 0.4605381 , 0.25821582, 0.902259  ,
       0.35244201, 0.88504233, 0.60977493, 0.14291182, 0.37383098,
       0.95162156, 0.98855493, 0.8794706 , 0.87137044, 0.23995361,
       0.19638245, 0.81083538, 0.55803802, 0.30974459, 0.52388745,
       0.15450079, 0.79137756, 0.19237881, 0.30216719, 0.9641601 ,
       0.72104154, 0.42281416, 0.88948632, 0.00334803, 0.77016338,
       0.43267636, 0.25005218, 0.68557602, 0.3976198 , 0.01590434,
       0.8697432 , 0.65168977, 0.79049922, 0.37930574, 0.41142086,
       0.92483803, 0.04778082, 0.65778583, 0.3685023 , 0.32153862,
       0.12685269, 0.68861915, 0.01140144, 0.18847357, 0.01611672,
       0.94057253, 0.14257891, 0.1146853 , 0.68616238, 0.24959677,
       0.48585022, 0.61967248, 0.582173  , 0.25032801, 0.22024579,
       0.25641066, 0.2335059 , 0.24279814, 0.91133689, 0.74222779,
       0.09875769, 0.693675  , 0.47192906, 0.67600522, 0.52453

In [3]:
# python 自带求和函数
sum(L)

49.1654589935473

In [4]:
# numpy 的求和函数（性能优化）
np.sum(L)

49.16545899354728

In [5]:
big_array = np.random.rand(1000000)
%timeit sum(big_array)
%timeit np.sum(big_array)

191 ms ± 7.42 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
1.25 ms ± 142 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [6]:
# 最小值
np.min(big_array)

7.432758468706169e-07

In [7]:
# 最大值
np.max(big_array)

0.9999991687107436

In [8]:
X = np.arange(16).reshape(4, -1)
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [9]:
# 矩阵所有元素和
np.sum(X)

120

In [10]:
# 矩阵特定维度的和（x轴）
np.sum(X, axis=0)

array([24, 28, 32, 36])

In [11]:
# 矩阵特定维度的和（y轴）
np.sum(X, axis=1)

array([ 6, 22, 38, 54])

In [12]:
# 乘积
np.prod(X)

0

In [16]:
np.prod(X + 1)

2004189184

In [17]:
# 平均值
np.mean(X)

7.5

In [18]:
# 中位数
np.median(X)

7.5

In [19]:
# 指定百分位
np.percentile(big_array, q=50)

0.5000674240026745

In [20]:
np.median(big_array)

0.5000674240026745

In [21]:
np.percentile(big_array, q=100)

0.9999991687107436

In [23]:
np.max(big_array)

0.9999991687107436

In [24]:
np.percentile(big_array, q=0)

7.432758468706169e-07

In [25]:
np.min(big_array)

7.432758468706169e-07

In [26]:
for percent in [0, 25, 50, 75, 100]:
    print(np.percentile(big_array, q=percent))

7.432758468706169e-07
0.249707672129443
0.5000674240026745
0.7500947641295121
0.9999991687107436


In [27]:
# 方差
np.var(big_array)

0.08335698866369164

In [28]:
# 标准差
np.std(big_array)

0.2887161039216407

In [29]:
x = np.random.normal(0, 1, size = 1000000)

In [30]:
np.mean(x)

0.0010655887824180317

In [31]:
np.std(x)

0.9992915443623205

## 五、Numpy array索引操作

### arg索引操作

In [32]:
np.min(x)

-4.953588565934844

In [33]:
# arg操作返回对应函数求得元素所在的索引
np.argmin(x)

960415

In [34]:
x[960415]

-4.953588565934844

In [35]:
np.argmax(x)

824665

### 排序索引操作

In [36]:
x = np.arange(16)
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [38]:
# shuffle乱序数组元素
np.random.shuffle(x)
x

array([ 4, 11,  3,  0, 13, 15,  6,  5,  7, 14,  2,  9, 12, 10,  1,  8])

In [39]:
# 此操作不改变数组本身，只是返回一个排序数组
np.sort(x)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [40]:
x

array([ 4, 11,  3,  0, 13, 15,  6,  5,  7, 14,  2,  9, 12, 10,  1,  8])

In [41]:
# 排序数组本身
x.sort()
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [42]:
X = np.random.randint(10, size=(4, 4))
X

array([[8, 3, 9, 5],
       [4, 7, 1, 9],
       [6, 8, 0, 1],
       [3, 9, 0, 9]])

In [43]:
np.sort(X)

array([[3, 5, 8, 9],
       [1, 4, 7, 9],
       [0, 1, 6, 8],
       [0, 3, 9, 9]])

In [44]:
# axis默认为1，同聚合操作，按y轴方向依次对x轴所有元素排序
np.sort(X, axis=1)

array([[3, 5, 8, 9],
       [1, 4, 7, 9],
       [0, 1, 6, 8],
       [0, 3, 9, 9]])

In [45]:
np.sort(X, axis=0)

array([[3, 3, 0, 1],
       [4, 7, 0, 5],
       [6, 8, 1, 9],
       [8, 9, 9, 9]])

In [48]:
np.random.shuffle(x)
x

array([15,  9,  8,  2,  5,  6, 11,  0, 12,  4,  3, 10, 14,  7,  1, 13])

In [49]:
# 对数组排序后返回元素原索引
np.argsort(x)

array([ 7, 14,  3, 10,  9,  4,  5, 13,  2,  1, 11,  6,  8, 15, 12,  0],
      dtype=int64)

In [50]:
# 按给定元素partition数组（tong快排的partition操作）
np.partition(x, 5)

array([ 1,  0,  2,  3,  4,  5,  6,  7, 12,  9,  8, 10, 11, 13, 14, 15])

In [51]:
np.argpartition(x, 5)

array([14,  7,  3, 10,  9,  4,  5, 13,  8,  1,  2, 11,  6, 15, 12,  0],
      dtype=int64)

In [52]:
X

array([[8, 3, 9, 5],
       [4, 7, 1, 9],
       [6, 8, 0, 1],
       [3, 9, 0, 9]])

In [53]:
np.argsort(X, axis=1)

array([[1, 3, 0, 2],
       [2, 0, 1, 3],
       [2, 3, 0, 1],
       [2, 0, 1, 3]], dtype=int64)

In [54]:
np.argpartition(X, 2, axis=1)

array([[1, 3, 0, 2],
       [2, 0, 1, 3],
       [2, 3, 0, 1],
       [2, 0, 1, 3]], dtype=int64)

### Fancy Indexing

#### 索引

In [59]:
x = np.arange(16)
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [60]:
# 批量索引返回向量
index = [3, 5, 8]
x[index]

array([3, 5, 8])

In [62]:
# 批量索引返回矩阵
index = np.array([[0, 2],
                  [1, 3]])
x[index]

array([[0, 2],
       [1, 3]])

In [65]:
X = x.reshape(4, -1)
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [67]:
# 联合索引
row = np.array([0, 1, 2])
col = np.array([1, 2, 3])
X[row, col]

array([ 1,  6, 11])

In [68]:
X[0, col]

array([1, 2, 3])

In [69]:
X[:2, col]

array([[1, 2, 3],
       [5, 6, 7]])

In [70]:
# 布尔索引
col = [True, False, True, True]
X[1:3, col]

array([[ 4,  6,  7],
       [ 8, 10, 11]])

#### 比较（universal）

In [71]:
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [72]:
x < 4

array([ True,  True,  True,  True, False, False, False, False, False,
       False, False, False, False, False, False, False])

In [73]:
x == 4

array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False])

In [74]:
x > 4

array([False, False, False, False, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True])

In [75]:
x != 4

array([ True,  True,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True])

In [76]:
2 * x == 24 - 4 * x

array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False])

In [77]:
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [78]:
X < 6

array([[ True,  True,  True,  True],
       [ True,  True, False, False],
       [False, False, False, False],
       [False, False, False, False]])

In [79]:
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [80]:
# 求和将true等于1，false等于0
np.sum(x < 4)

4

In [81]:
# 计算非0元素个数
np.count_nonzero(x < 4)

4

In [82]:
# 判断（或）存在元素为0
np.any(x == 0)

True

In [83]:
np.any(x < 0)

False

In [86]:
# 判断（与）所有元素为0
np.all(x == 0)

False

In [87]:
np.all(x >= 0)

True

In [88]:
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [89]:
# 操作也适用于矩阵
np.sum(X % 2 == 0)

8

In [90]:
np.sum(X%2 == 0, axis=1)

array([2, 2, 2, 2])

In [91]:
np.sum(X%2 == 0, axis=0)

array([4, 0, 4, 0])

In [93]:
np.all(X > 0, axis=1)

array([False,  True,  True,  True])

In [94]:
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [95]:
# 使用位运算符，而非条件运算符
np.sum((x > 3) & (x < 10))

6

In [96]:
np.sum((x > 3) && (x < 10))

SyntaxError: invalid syntax (<ipython-input-96-d834f65999a2>, line 1)

In [97]:
np.sum((x % 3 == 0) | (x > 10))

9

In [100]:
# 非运算
np.sum(~(x==0))

15

#### 比较 + 索引

In [101]:
x[x < 4]

array([0, 1, 2, 3])

In [106]:
x[x % 2 == 0]

array([ 0,  2,  4,  6,  8, 10, 12, 14])

In [108]:
X[X[:,3] % 3 == 0, :]

array([[ 0,  1,  2,  3],
       [12, 13, 14, 15]])

In [109]:
X[:,3] % 3 == 0

array([ True, False, False,  True])

#### Pandas对数据进行预处理后转为Numpy矩阵，再而进行机器学习