In [2]:
import numpy as np

## Python 默认 List

值与值之间可以类型不同

In [3]:
list = [i for i in range(10)]

In [4]:
list

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [5]:
list[5] = 'hello python'

In [6]:
list

[0, 1, 2, 3, 4, 'hello python', 6, 7, 8, 9]

## 类型不可变的 List

array 模块中的 list 类型不可变

In [7]:
import array

In [8]:
arraylist = array.array('i', [i for i in range(10)])

In [9]:
arraylist

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [10]:
arraylist[5] = 'hello world'

TypeError: an integer is required (got type str)

## Numpy Array

数据类型也是统一的

In [11]:
nparr = np.array([1, 2, 3])

In [12]:
nparr

array([1, 2, 3])

由于 nparr 默认类型为 int 3.99 也被转换为整型 3

In [13]:
nparr[2] = 3.99

In [14]:
nparr

array([1, 2, 3])

In [15]:
nparr.dtype

dtype('int64')

In [16]:
nparr[2] = 'hello numpy'

ValueError: invalid literal for int() with base 10: 'hello numpy'

## 创建 nparray 的方式

- np.zeros
- np.ones
- np.full
- np.arange
- np.linspace

In [17]:
np.zeros(shape=(10,), dtype='int')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [18]:
np.ones((3, 5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [19]:
np.full((3, 5), 42, dtype='float')

array([[42., 42., 42., 42., 42.],
       [42., 42., 42., 42., 42.],
       [42., 42., 42., 42., 42.]])

numpy 中 arange 可以使用 __小数__ 步长

In [20]:
np.arange(0, 1, 0.2)

array([0. , 0.2, 0.4, 0.6, 0.8])

linspace 包含 __起始点__ 和 __终止点__

In [21]:
np.linspace(0, 20, 11)

array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14., 16., 18., 20.])

## Numpy 中 随机数 的创建

In [22]:
np.random.randint(0, 10)

6

In [23]:
np.random.randint(0, 10, 10)

array([6, 5, 6, 6, 5, 3, 7, 3, 3, 8])

In [24]:
np.random.randint(0, 10, size=(2, 3))

array([[0, 2, 9],
       [3, 7, 0]])

In [25]:
np.random.randint(0, 1, 10)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [26]:
np.random.seed(42)
np.random.randint(0, 10, (3, 2))

array([[6, 3],
       [7, 4],
       [6, 9]])

In [27]:
np.random.seed(42)
np.random.randint(0, 10, (3, 2))

array([[6, 3],
       [7, 4],
       [6, 9]])

均匀分布的 0-1 之间的的随机数

In [28]:
np.random.random()

0.15599452033620265

In [29]:
np.random.random(10)

array([0.05808361, 0.86617615, 0.60111501, 0.70807258, 0.02058449,
       0.96990985, 0.83244264, 0.21233911, 0.18182497, 0.18340451])

指定均值和方差的随机数

In [30]:
np.random.normal(10, 100)

34.19622715660341

In [31]:
np.random.normal(0, 1, (3, 5))

array([[-1.91328024, -1.72491783, -0.56228753, -1.01283112,  0.31424733],
       [-0.90802408, -1.4123037 ,  1.46564877, -0.2257763 ,  0.0675282 ],
       [-1.42474819, -0.54438272,  0.11092259, -1.15099358,  0.37569802]])

## Numpy  数组 与 子数组的关系

子数组只是原数组，改变子数组会影响原数组。

而在 Python 原切片处理中会创建一个新的子数组

In [32]:
X = np.arange(15).reshape(3, 5)

In [33]:
X

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [34]:
subX = X[:2, :3]

In [35]:
subX

array([[0, 1, 2],
       [5, 6, 7]])

In [36]:
subX[0, 0] = 100

In [37]:
X

array([[100,   1,   2,   3,   4],
       [  5,   6,   7,   8,   9],
       [ 10,  11,  12,  13,  14]])

In [38]:
subX_cp = X[:2, :3].copy()

In [39]:
subX_cp[0, 0] = 0

In [40]:
X

array([[100,   1,   2,   3,   4],
       [  5,   6,   7,   8,   9],
       [ 10,  11,  12,  13,  14]])

X[行处理, 列处理]

X[行处理]

行列分别进行切片处理

In [41]:
X[::-1, ::-1]

array([[ 14,  13,  12,  11,  10],
       [  9,   8,   7,   6,   5],
       [  4,   3,   2,   1, 100]])

reshape 中可以用 -1 来指定任意行或列数

In [42]:
np.arange(10).reshape(2, -1)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

## Numpy 数据的合并

In [43]:
a = np.array([1, 2, 3])
A = np.arange(6).reshape(2, -1)

In [44]:
np.concatenate([a, a])

array([1, 2, 3, 1, 2, 3])

In [45]:
np.concatenate([A, A])

array([[0, 1, 2],
       [3, 4, 5],
       [0, 1, 2],
       [3, 4, 5]])

In [46]:
np.concatenate([A, A], axis=1)

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5]])

In [47]:
np.concatenate([A, a])

ValueError: all the input arrays must have same number of dimensions

In [48]:
np.vstack([A, a])

array([[0, 1, 2],
       [3, 4, 5],
       [1, 2, 3]])

In [49]:
np.hstack([A, A])

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5]])

## Numpy 数据的分割

In [50]:
b = np.arange(10)

In [51]:
b1, b2, b3 = np.split(b, [3, 7])

In [52]:
b1

array([0, 1, 2])

In [53]:
b2

array([3, 4, 5, 6])

In [54]:
b3

array([7, 8, 9])

In [55]:
B = np.arange(16).reshape(4, 4)

In [56]:
B

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [57]:
B1, B2 = np.split(B, [2])

In [58]:
B1

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [59]:
B2

array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [60]:
B1, B2 = np.split(B, [2], axis=1)

In [61]:
B1

array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])

最后一列为特征

In [62]:
X, y = np.hsplit(B, [-1])

In [63]:
X

array([[ 0,  1,  2],
       [ 4,  5,  6],
       [ 8,  9, 10],
       [12, 13, 14]])

In [64]:
y = y[:, 0]

In [65]:
y

array([ 3,  7, 11, 15])

## Numpy 数据运算

- np.exp(X)
- np.power(3, X)
- np.abs(X)
- np.sin(X)
- np.log(X)
- np.log2(X)
- np.log10(X)

X 为矩阵，Y 为矩阵

- X.dot(Y)
- X.T

## 向量 与 矩阵 之间的运算

In [66]:
x = np.array([1, 2])
X = np.arange(4).reshape(2, -1)

In [67]:
x

array([1, 2])

In [68]:
X

array([[0, 1],
       [2, 3]])

In [69]:
X + x # 矩阵的每一行 与 向量 相加

array([[1, 3],
       [3, 5]])

In [70]:
x * X # 矩阵的每一行 与 向量 点乘

array([[0, 2],
       [2, 6]])

In [71]:
np.tile(x, (2, 1)) # 对 x 进行堆叠，行上堆 2 次，列上堆 1 次

array([[1, 2],
       [1, 2]])

In [72]:
x.dot(X) # x 被当成 行向量

array([4, 7])

In [73]:
X.dot(x) # x 被当成 列向量

array([2, 8])

## 矩阵的逆

In [74]:
normal_X = np.arange(4).reshape(2, -1)

In [75]:
normal_X

array([[0, 1],
       [2, 3]])

In [76]:
inv_X = np.linalg.inv(normal_X) # linalg = linear algebra

In [77]:
inv_X

array([[-1.5,  0.5],
       [ 1. ,  0. ]])

In [78]:
inv_X.dot(normal_X)

array([[1., 0.],
       [0., 1.]])

In [79]:
normal_X.dot(inv_X)

array([[1., 0.],
       [0., 1.]])

In [80]:
innormal_X = np.arange(16).reshape(2, 8)

In [81]:
innormal_X

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15]])

In [82]:
inv_iX = np.linalg.pinv(innormal_X)

In [83]:
inv_iX

array([[-1.35416667e-01,  5.20833333e-02],
       [-1.01190476e-01,  4.16666667e-02],
       [-6.69642857e-02,  3.12500000e-02],
       [-3.27380952e-02,  2.08333333e-02],
       [ 1.48809524e-03,  1.04166667e-02],
       [ 3.57142857e-02, -1.04083409e-17],
       [ 6.99404762e-02, -1.04166667e-02],
       [ 1.04166667e-01, -2.08333333e-02]])

In [84]:
innormal_X.dot(inv_iX)

array([[ 1.00000000e+00, -2.49800181e-16],
       [ 0.00000000e+00,  1.00000000e+00]])

## Numpy 中的聚合运算

In [85]:
A = np.arange(16).reshape(4, -4)

In [86]:
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [87]:
np.prod(A + 1)

20922789888000

In [88]:
np.prod(A, axis=1)

array([    0,   840,  7920, 32760])

In [89]:
np.mean(A, axis=0)

array([6., 7., 8., 9.])

In [90]:
np.percentile(A, q=0, axis=1)

array([ 0.,  4.,  8., 12.])

In [91]:
for q in [0, 25, 50, 75, 100]:
    print( np.percentile(A, q=q) )

0.0
3.75
7.5
11.25
15.0


## FancyIndexing

In [92]:
X = np.arange(16).reshape(4, -4)

In [93]:
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [94]:
row = np.array([0, 1, 2])
col = np.array([1, 2, 3])

In [95]:
X[row, col]

array([ 1,  6, 11])

In [96]:
row = [0, 1, 2]
col = [1, 2, 3]

In [97]:
X[row, col]

array([ 1,  6, 11])

In [98]:
x = np.arange(10)

In [99]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [100]:
x[x<3]

array([0, 1, 2])

In [101]:
x[(x>1) & (x<3)] # 必须加括号

array([2])