# Numpy入门

主要介绍numpy工具库的使用

## 环境初始化

In [1]:
import numpy as np

### 一维数组

In [2]:
np.array([1,2,3,4])

array([1, 2, 3, 4])

In [3]:
# numpy 数组都是n维数组
type(np.array([1,2,3,4]))

numpy.ndarray

In [4]:
np.ones(3)

array([1., 1., 1.])

In [5]:
np.zeros(3)

array([0., 0., 0.])

In [6]:
np.random.random(3)

array([0.17175294, 0.24745337, 0.22457954])

In [7]:
# 推荐写法
np.array([1,2,3,4])
# 不推荐
np.array((1,2,3,4))

array([1, 2, 3, 4])

In [8]:
# 错误写法
# np.array(1,2,3,4)

In [9]:
b = np.array([1,2,3,4.0])
b

array([1., 2., 3., 4.])

#### 创建0-6的数组

In [10]:
np.arange(6)

array([0, 1, 2, 3, 4, 5])

创建-2到1间隔为0.5的数组, 前闭后开

In [11]:
np.arange(-2,1,0.5)

array([-2. , -1.5, -1. , -0.5,  0. ,  0.5])

等分空间为若干份

把0-2的空间分割成5份，包含2

In [12]:
np.linspace(0,2,5)

array([0. , 0.5, 1. , 1.5, 2. ])

### 二维数组

In [13]:
np.zeros([3, 3])

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [14]:
a = np.ones([2, 3])
a

array([[1., 1., 1.],
       [1., 1., 1.]])

In [15]:
a.shape

(2, 3)

In [16]:
np.ones([3, 3, 2])

array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]])

In [17]:
# 对角矩阵
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [18]:
# numpy 斜对角矩阵
a = np.array([1, 2, 3])
d = np.diag(a)
d

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

### Numpy数组的属性

In [19]:
d.ndim

2

In [20]:
d.shape

(3, 3)

In [21]:
d.size

9

In [22]:
d.dtype

dtype('int32')

In [23]:
np.linspace(0, 2, 5).dtype

dtype('float64')

In [24]:
b = np.array(['a', 1, 2])
b.dtype

dtype('<U1')

### 数据类型转换

In [25]:
a = np.array([1.1, 2.2, 3.3], dtype=np.float64)
a, a.dtype

(array([1.1, 2.2, 3.3]), dtype('float64'))

In [26]:
# 将a的数值类型从float64转为int，并查看类型
a.astype(int), a.astype(int).dtype 

(array([1, 2, 3]), dtype('int32'))

## Numpy数组运算

### Numpy数组的四则运算

In [27]:
a = np.array([1, 2])
b = np.ones(2)
a + b

array([2., 3.])

In [28]:
a - b

array([0., 1.])

In [29]:
a * a

array([1, 4])

In [30]:
a / a

array([1., 1.])

In [31]:
np.array([1,2,3,2])

array([1, 2, 3, 2])

In [32]:
np.array([1,2,3,2]) * 1.0

array([1., 2., 3., 2.])

In [33]:
a = np.array([1,2,3,2])
np.sin(a)

array([0.84147098, 0.90929743, 0.14112001, 0.90929743])

In [34]:
np.sqrt(a)

array([1.        , 1.41421356, 1.73205081, 1.41421356])

In [35]:
a < 3

array([ True,  True, False,  True])

### Numpy数组的索引

In [36]:
data = np.array([1,2,3,4])

In [37]:
data[0:2]

array([1, 2])

In [38]:
data[1:]

array([2, 3, 4])

In [39]:
data[0:-1]

array([1, 2, 3])

### Numpy数组的元素操作

In [40]:
data.min()

1

In [41]:
data.max()

4

In [42]:
data.sum()

10

In [43]:
data.mean()

2.5

## Numpy多维数组的运算

### Numpy高维数组的切片

In [44]:
data = np.array([
    [1, 2],
    [3, 4],
    [5, 6]
])

In [45]:
data[0, 1]

2

In [46]:
data[1:]

array([[3, 4],
       [5, 6]])

In [47]:
data[1:3]

array([[3, 4],
       [5, 6]])

In [48]:
data[:2, 0:1]

array([[1],
       [3]])

### 矩阵的转置

- 行变列，列变行

In [48]:
a = np.array([
    [1,1],    
    [2,4]
])
a

array([[1, 1],
       [2, 4]])

In [49]:
a.T

array([[1, 2],
       [1, 4]])

### Numpy数据的变形

In [116]:
c = np.array([1,2,3,4,5,6])
c.reshape(2,3)
c.reshape(3,2)
a = c.reshape(3,-1)
a
a.ravel() # 降成一维

array([1, 2, 3, 4, 5, 6])

### Numpy高维数组

In [127]:
a = np.ones(shape=(4,3,2))
print(a)
print(a.ndim)
print(a.shape)

[[[1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]]]
3
(4, 3, 2)


In [124]:
# 行数， 列数， 深度
np.zeros([4, 5, 3])

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [126]:
np.random.random((4,3,2))

array([[[0.36217621, 0.70768662],
        [0.74674622, 0.69109292],
        [0.68918041, 0.37360012]],

       [[0.6681348 , 0.33984866],
        [0.57279387, 0.32580716],
        [0.44514505, 0.06152893]],

       [[0.24267542, 0.97160261],
        [0.2305842 , 0.69147751],
        [0.65047686, 0.72393914]],

       [[0.47508861, 0.59666377],
        [0.06696942, 0.07256214],
        [0.19897603, 0.151861  ]]])

### Numpy数据分析

In [50]:
a = np.array([
    [0, 1],
    [2, 4],
    [0, 8]
])

In [51]:
np.sum(a) # 全部元素求和

15

In [52]:
np.sum(a, axis = 1) # axis==1 表示按行求和

array([1, 6, 8])

In [53]:
np.sum(a, axis = 0) # axis==0 表示按列求和

array([ 2, 13])

In [54]:
np.max(a, axis = 1)

array([1, 4, 8])

In [55]:
np.max(a, axis = 0)

array([2, 8])

In [56]:
np.mean(a)   # 均值

2.5

In [57]:
np.median(a) # 排在中间的1个或2个值的均值

1.5

In [58]:
# 方差和标准差
a = np.array([1, 2, 3, 4, 5])
print(np.var(a))
print(np.std(a))

2.0
1.4142135623730951


## Numpy的随机数

In [102]:
# 生成 [0, 1) 的随机数
np.random.rand()

0.20137871104307314

In [103]:
np.random.rand(3)

array([0.51403506, 0.08722937, 0.48358553])

In [60]:
# 生成 [0, 1) 随机数矩阵，2, 4为形状 
np.random.rand(2, 4)

array([[0.29030601, 0.88186723, 0.69936786, 0.25123737],
       [0.35109743, 0.83459441, 0.43355748, 0.79085719]])

In [61]:
# 生成 [2,6) 的随机数，最后一个参数是数量
np.random.randint(2, 6, 10)

array([4, 3, 5, 5, 2, 4, 2, 5, 4, 3])

In [62]:
# 生成正态分布的随机数， 均值在0附近
np.random.randn(2, 3)

array([[ 0.53237997, -0.17162405,  0.42243229],
       [-0.28461071,  1.04146273,  0.19673282]])

In [63]:
# 生成n个[0, 1)之间的随机数。作用和rand一样，为了兼容Matlab使用体验
np.random.random_sample(3)

array([0.77092301, 0.28050888, 0.84211781])

In [64]:
# 生成2行4列个[0, 1)之间的随机数, 参数为size的元组或列表，作用和rand一样，为了兼容Matlab使用体验
np.random.random_sample([2, 4])

array([[0.39674868, 0.6686296 , 0.29868981, 0.26796298],
       [0.99759442, 0.62924975, 0.99274104, 0.01207741]])

### 正态分布相关API

- 创建符合正态分布的随机数

In [65]:
sheep = np.random.normal(85, 10, size=(3, 4))
print(sheep)
print(np.mean(sheep))
print(np.std(sheep))

[[81.23424964 79.66525149 74.46208153 73.71122901]
 [87.64865597 64.86674809 85.6079742  85.2831761 ]
 [70.33980463 61.3331168  69.26209623 77.39880696]]
75.90109922171033
8.067217718970642


- 创建期望为0的标准正态分布

In [66]:
sample = np.random.randn(1000, 1000)
# 计算均值
sample.mean()

-0.0004980219326807859

In [67]:
# 计算标准差
sample.std()

0.9994473933181361

### 数据打散

In [68]:
a = np.arange(30)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [69]:
# 打散数据， permutation 不会修改数本身
np.random.permutation(a)

array([25, 29, 15, 13,  7, 23, 27,  0, 19, 12, 21, 16, 17, 24,  1, 10,  2,
       20,  9,  8,  4, 26, 14,  6, 28, 11, 22, 18,  5,  3])

In [70]:
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [71]:
np.random.shuffle(a)

In [72]:
a

array([18, 26, 13, 27,  2,  9,  6, 22, 12, 29, 28, 19, 10, 14, 25, 11,  1,
        4,  7,  0, 16, 24,  5, 23,  8, 21, 17, 20,  3, 15])

In [73]:
np.random.seed(3)
np.random.randn(2, 2)

array([[ 1.78862847,  0.43650985],
       [ 0.09649747, -1.8634927 ]])

In [74]:
np.random.seed(3)
np.random.randn(2, 2)

array([[ 1.78862847,  0.43650985],
       [ 0.09649747, -1.8634927 ]])

In [75]:
np.random.randn(2, 2)

array([[-0.2773882 , -0.35475898],
       [-0.08274148, -0.62700068]])

## Numpy的逻辑运算

In [76]:
a = np.array([True, False, False, True, False])

In [77]:
# 判断是否全是True，np.all(a)
a.all()

False

In [78]:
# 判断是否至少有一个True，np.any(a)
a.any()

True

- 把普通的Numpy数组转成真假值数组

In [79]:
a = np.random.rand(3, 3)
a

array([[0.02987621, 0.45683322, 0.64914405],
       [0.27848728, 0.6762549 , 0.59086282],
       [0.02398188, 0.55885409, 0.25925245]])

In [80]:
b = a < 0.5
b

array([[ True,  True, False],
       [ True, False, False],
       [ True, False,  True]])

## Numpy的排序

In [81]:
a = np.random.randn(10)
a

array([-1.62915743, -0.63893553, -0.56080987,  1.81898259,  0.15657257,
       -1.20327619, -0.7040594 ,  1.01922044, -0.53463016, -1.32688896])

In [82]:
# 排序，会修改数据本身
a.sort()

In [83]:
a

array([-1.62915743, -1.32688896, -1.20327619, -0.7040594 , -0.63893553,
       -0.56080987, -0.53463016,  0.15657257,  1.01922044,  1.81898259])

In [84]:
# 排序，不修改数据源本身
a = np.random.randn(10)
print(a)
a.argsort()

[ 0.66805241  0.28587781 -0.37067527  1.05045112  0.2613125  -0.69459727
 -2.02469194  0.84595316 -0.47455326 -1.39576555]


array([6, 9, 5, 8, 2, 4, 1, 0, 7, 3], dtype=int64)

In [85]:
a[a.argsort()]

array([-2.02469194, -1.39576555, -0.69459727, -0.47455326, -0.37067527,
        0.2613125 ,  0.28587781,  0.66805241,  0.84595316,  1.05045112])

In [86]:
a[[0, 1, 2, 4]]

array([ 0.66805241,  0.28587781, -0.37067527,  0.2613125 ])

In [87]:
a

array([ 0.66805241,  0.28587781, -0.37067527,  1.05045112,  0.2613125 ,
       -0.69459727, -2.02469194,  0.84595316, -0.47455326, -1.39576555])

In [88]:
# 按指定方式排序
b = np.random.randn(3, 3)
b

array([[-1.73278446, -0.23322423, -0.36848667],
       [-1.8131661 ,  0.25453554,  1.06394904],
       [-0.98961218, -0.96505109, -1.42170273]])

In [89]:
b.sort(axis = 1) # axis轴，默认按行排列
b

array([[-1.73278446, -0.36848667, -0.23322423],
       [-1.8131661 ,  0.25453554,  1.06394904],
       [-1.42170273, -0.98961218, -0.96505109]])

In [90]:
help(a.sort)

Help on built-in function sort:

sort(...) method of numpy.ndarray instance
    a.sort(axis=-1, kind=None, order=None)
    
    Sort an array in-place. Refer to `numpy.sort` for full documentation.
    
    Parameters
    ----------
    axis : int, optional
        Axis along which to sort. Default is -1, which means sort along the
        last axis.
    kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
        Sorting algorithm. The default is 'quicksort'. Note that both 'stable'
        and 'mergesort' use timsort under the covers and, in general, the
        actual implementation will vary with datatype. The 'mergesort' option
        is retained for backwards compatibility.
    
        .. versionchanged:: 1.15.0.
           The 'stable' option was added.
    
    order : str or list of str, optional
        When `a` is an array with fields defined, this argument specifies
        which fields to compare first, second, etc.  A single field can
        be specified a

## 行向量和列向量

In [91]:
a = np.array([1, 2, 3])
a

array([1, 2, 3])

In [104]:
a.T

array([[1, 3, 5],
       [2, 4, 6]])

In [105]:
a.transpose()

array([[1, 3, 5],
       [2, 4, 6]])

- 对于一维数组(向量)，行向量和列向量在Numpy表达时候没区别
- 对于二维数组(矩阵)，行向量和列向量有区别

In [93]:
b = np.array([[1], [2], [3]])
b

array([[1],
       [2],
       [3]])

In [94]:
b.T

array([[1, 2, 3]])

In [95]:
b.T.T

array([[1],
       [2],
       [3]])

- 把一维数组升级为二维数组

In [106]:
np.array([1, 2, 3]).reshape(3, 1)

array([[1],
       [2],
       [3]])

In [114]:
# 行向量生成技巧
np.r_[-2, -1, 3, 1:4]

array([-2, -1,  3,  1,  2,  3])

In [115]:
# 其他创建列向量方式
np.c_[np.array([-2, -1, 3,2,3,4,3])] 

array([[-2],
       [-1],
       [ 3],
       [ 2],
       [ 3],
       [ 4],
       [ 3]])