# Numpy入门
主要介绍Numpy工具使用

## 环境初始化

In [1]:
import numpy as np
np.__version__

'1.19.2'

### 创建一维数组

In [2]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [3]:
# 所有的numpy数组都是n维数组ndarray
type(np.array([1, 2, 3, 4]))

numpy.ndarray

In [4]:
np.ones(3)

array([1., 1., 1.])

In [5]:
np.zeros(3)

array([0., 0., 0.])

In [6]:
np.random.random(3)

array([0.86131698, 0.19825871, 0.47940626])

In [7]:
# 不推荐如下写法
np.array((1, 2, 3, 4))

array([1, 2, 3, 4])

In [8]:
a = np.array([1, 2, 3, 4.2])
a

array([1. , 2. , 3. , 4.2])

- 创建0-6数组

In [9]:
np.arange(6)

array([0, 1, 2, 3, 4, 5])

In [10]:
np.arange(1, 7, 2)

array([1, 3, 5])

- 创建-2， 1， 0.5数组 (左闭右开)

In [11]:
np.arange(-2, 1, 0.5)

array([-2. , -1.5, -1. , -0.5,  0. ,  0.5])

- 把 0-2 区间，分成5份，包含2

In [12]:
np.linspace(0, 2, 5, endpoint=True)

array([0. , 0.5, 1. , 1.5, 2. ])

### 二维数组

In [13]:
np.zeros([3, 3])

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [14]:
a = np.ones([2, 3])
a

array([[1., 1., 1.],
       [1., 1., 1.]])

In [15]:
a.shape

(2, 3)

In [16]:
np.ones([2, 3, 3])

array([[[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]])

In [17]:
# 创建单位矩阵
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [18]:
# 创建斜对角矩阵
a = np.array([1, 2, 3])
d = np.diag(a)
d

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

## Numpy数组的属性

In [19]:
d.ndim # 维度

2

In [20]:
d.shape # 形状

(3, 3)

In [21]:
d.size # 个数

9

In [22]:
d.dtype

dtype('int64')

In [23]:
np.linspace(0, 2, 5).dtype

dtype('float64')

In [24]:
b = np.array(['a', 1, 2])
b.dtype

dtype('<U1')

#### 数据类型转换

In [25]:
a = np.array([1.1, 2.2, 3.3], dtype=np.float64)
a, a.dtype

(array([1.1, 2.2, 3.3]), dtype('float64'))

In [26]:
a.astype(int), a.astype(int).dtype

(array([1, 2, 3]), dtype('int64'))

## Numpy数组的运算

In [27]:
a = np.array([1, 2])
b = np.ones(2)
a + b

array([2., 3.])

In [28]:
a - b

array([0., 1.])

In [29]:
a * a

array([1, 4])

In [30]:
a / a

array([1., 1.])

In [31]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [32]:
np.array([1, 2, 3, 4]) * 2.0

array([2., 4., 6., 8.])

In [33]:
a = np.array([1, 2, 3, 2])
np.sin(a)

array([0.84147098, 0.90929743, 0.14112001, 0.90929743])

In [34]:
np.sqrt(a)

array([1.        , 1.41421356, 1.73205081, 1.41421356])

In [35]:
a <= 2

array([ True,  True, False,  True])

### Numpy数组的索引

In [36]:
data = np.array([1, 2, 3, 4])

In [37]:
data[0:2]

array([1, 2])

In [38]:
data[2:]

array([3, 4])

In [39]:
data[0:-1]

array([1, 2, 3])

### Numpy数组的元素操作

In [40]:
data.min()

1

In [41]:
data.max()

4

In [42]:
data.sum()

10

In [43]:
data.mean()

2.5

## Numpy的多维数组

In [44]:
a = np.array([
    [0, 1], 
    [2, 3]
]) 
# 等同于 a = = np.array([[0, 1], [2, 3]]) 
a

array([[0, 1],
       [2, 3]])

In [45]:
b = np.ones([2, 2])
c = np.ones(2)
b, c

(array([[1., 1.],
        [1., 1.]]),
 array([1., 1.]))

In [46]:
a + b

array([[1., 2.],
       [3., 4.]])

In [47]:
a + c

array([[1., 2.],
       [3., 4.]])

### 高维数组切片

In [48]:
data = np.array([
    [1, 2],
    [3, 4],
    [5, 6]
])

In [49]:
data[0, 1]

2

In [50]:
data[1:]

array([[3, 4],
       [5, 6]])

In [51]:
# 取出前两行的索引为1的列，写上:可以避免降维
data[:2, 1:]

array([[2],
       [4]])

In [52]:
data.T

array([[1, 3, 5],
       [2, 4, 6]])

### 矩阵的转置

In [53]:
# 行变列，列变行
a = np.array([
    [1, 1],
    [2, 4],
    [5, 6]
])
a

array([[1, 1],
       [2, 4],
       [5, 6]])

In [54]:
a.T

array([[1, 2, 5],
       [1, 4, 6]])

In [55]:
a.transpose()

array([[1, 2, 5],
       [1, 4, 6]])

### Numpy数据变形

In [56]:
c = np.arange(1, 7)
c

array([1, 2, 3, 4, 5, 6])

In [57]:
c.reshape(2, 3)

array([[1, 2, 3],
       [4, 5, 6]])

In [58]:
c.reshape(3, 2)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [59]:
a = c.reshape(3, -1)
a

array([[1, 2],
       [3, 4],
       [5, 6]])

In [60]:
# 把高维数组降到一维
a.ravel()

array([1, 2, 3, 4, 5, 6])

### Numpy高维数组

In [61]:
a = np.ones((4, 3, 2))
a

array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]])

In [62]:
a.ndim

3

In [63]:
a.shape

(4, 3, 2)

In [64]:
# 创建了4行5列的彩色图片
np.zeros((4, 5, 3))

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [65]:
np.random.random((4, 5, 3))

array([[[0.09058305, 0.14861944, 0.89456829],
        [0.10942031, 0.52681743, 0.65216738],
        [0.7448196 , 0.22328208, 0.70775102],
        [0.81643685, 0.25104201, 0.95725955],
        [0.22114219, 0.94309229, 0.71795839]],

       [[0.80703417, 0.43829074, 0.35938764],
        [0.80823219, 0.3490648 , 0.83835669],
        [0.86319507, 0.63032217, 0.89513368],
        [0.57375202, 0.87891621, 0.66461473],
        [0.8538216 , 0.62194354, 0.00473802]],

       [[0.86195106, 0.91686961, 0.26813465],
        [0.56453698, 0.28416556, 0.03610566],
        [0.08642045, 0.59169659, 0.86482465],
        [0.08185352, 0.41890202, 0.62099612],
        [0.12613469, 0.44452809, 0.73947494]],

       [[0.50032715, 0.72792845, 0.07225237],
        [0.14074176, 0.35749582, 0.40538908],
        [0.25411846, 0.56719627, 0.15536945],
        [0.39653565, 0.93926115, 0.06624256],
        [0.85608473, 0.44292711, 0.85605254]]])

## Numpy的数据分析

In [66]:
a = np.array([
    [0, 1],
    [2, 4],
    [0, 8]
])

In [67]:
# 计算所有数字和
np.sum(a)

15

In [68]:
np.sum(a, axis = 1) # 表示按行求和

array([1, 6, 8])

In [69]:
np.sum(a, axis = 0) # 表示按列求和

array([ 2, 13])

In [70]:
np.max(a)

8

In [71]:
np.max(a, axis = 1) # 求每一行最大值

array([1, 4, 8])

In [72]:
np.min(a)

0

In [73]:
np.mean(a) # 均值

2.5

In [74]:
np.median(a) # 中值 0 0 1 2 4 8 排在中间1个或2个值的均值

1.5

In [75]:
b = np.array([1,2,3,4,5]) #  方差：sum(每个数与均值的差值的平方) / 数据个数
print("均值：", np.mean(b))
print(b - np.mean(a))
print((b - np.mean(a)) ** 2)
print(np.sum((b - np.mean(a)) ** 2) / b.size)


均值： 3.0
[-1.5 -0.5  0.5  1.5  2.5]
[2.25 0.25 0.25 2.25 6.25]
2.25


In [76]:
print("方差：", np.var(b))

方差： 2.0


In [77]:
print("标准差：", np.std(b))

标准差： 1.4142135623730951


## Numpy随机数

In [78]:
# 生成[0,1)的随机数 random() random_sample()
np.random.rand()

0.590994730759833

In [79]:
np.random.rand(3)

array([0.65622113, 0.71998682, 0.52379867])

In [80]:
# 生成[0,1) 随机数矩阵，2,4为形状
np.random.rand(2, 4)

array([[0.84495794, 0.93540035, 0.05277708, 0.19518067],
       [0.8079058 , 0.35911598, 0.65678336, 0.87461131]])

In [81]:
# 生成 [2, 6) 的随机整数, 10个
np.random.randint(2, 6, 10)

array([5, 2, 2, 5, 3, 4, 2, 5, 3, 2])

In [82]:
# 生成正态分布的随机数，均值是0，标准差1
np.random.randn()

-1.1742143036581325

In [83]:
np.random.randn(2, 3)

array([[1.21151139, 0.74377745, 0.11642964],
       [0.25321186, 0.57132578, 1.32798724]])

In [84]:
np.random.random(size=[2, 4])

array([[0.81491343, 0.52139651, 0.69659544, 0.06803486],
       [0.84768698, 0.00462999, 0.61277329, 0.03527221]])

In [85]:

np.random.random_sample([2, 4])

array([[0.16834721, 0.19150335, 0.70293273, 0.8636557 ],
       [0.57582014, 0.67476294, 0.19304997, 0.18250802]])

In [91]:
# 设置随机数种子
np.random.seed(seed=2)
np.random.rand(3, 2)

array([[0.4359949 , 0.02592623],
       [0.54966248, 0.43532239],
       [0.4203678 , 0.33033482]])

## Numpy的逻辑运算

In [92]:
a = np.array([True, False, False, True, False])

In [94]:
# 判断是否全是True， np.all(a)
a.all()

False

In [96]:
# 判断是否至少有一个True， np.any(a)
a.any()

True

- 把普通Numpy数组转成真假值数组

In [100]:
a = np.random.rand(3, 3)
a

array([[0.48306984, 0.50523672, 0.38689265],
       [0.79363745, 0.58000418, 0.1622986 ],
       [0.70075235, 0.96455108, 0.50000836]])

In [106]:
b = a < 0.6
b

array([[ True,  True,  True],
       [False,  True,  True],
       [False, False,  True]])

In [107]:
np.any(a < 0.6) # 判断数组中是否包含 <0.6 的值

True

## Numpy的排序

In [108]:
a = np.random.rand(10)
a

array([0.88952006, 0.34161365, 0.56714413, 0.42754596, 0.43674726,
       0.77655918, 0.53560417, 0.95374223, 0.54420816, 0.08209492])

In [109]:
a.sort() # 排序，不会返回数据，会修改数据本身

In [112]:
# 排序，不修改数据本身
a = np.random.rand(10)
a

array([0.76495986, 0.16922545, 0.29302323, 0.52406688, 0.35662428,
       0.04567897, 0.98315345, 0.44135492, 0.50400044, 0.32354132])

In [114]:
a.argsort() # 排序，返回按照从小到大数据索引

array([5, 1, 2, 9, 4, 7, 8, 3, 0, 6])

In [120]:
# index = np.array([1, 2, 1])
index = a.argsort()
a[index]

array([0.04567897, 0.16922545, 0.29302323, 0.32354132, 0.35662428,
       0.44135492, 0.50400044, 0.52406688, 0.76495986, 0.98315345])

In [122]:
a

array([0.76495986, 0.16922545, 0.29302323, 0.52406688, 0.35662428,
       0.04567897, 0.98315345, 0.44135492, 0.50400044, 0.32354132])

In [129]:
# 按照行或列排序
b = np.random.rand(3, 3)
b

array([[0.31325853, 0.04179771, 0.73839976],
       [0.65751239, 0.21463575, 0.41675344],
       [0.64384193, 0.66148133, 0.17047713]])

In [131]:
b.sort(axis = 0) # axis指定排列轴， 默认按照行排列 (会修改数据本身)
b

array([[0.31325853, 0.04179771, 0.17047713],
       [0.64384193, 0.21463575, 0.41675344],
       [0.65751239, 0.66148133, 0.73839976]])

In [134]:
# help(b.sort)

## 行向量和列向量

In [136]:
a = np.array([1, 2, 3])
a

array([1, 2, 3])

In [138]:
a.T

array([1, 2, 3])

In [140]:
a.transpose()

array([1, 2, 3])

In [142]:
b = np.arange(6).reshape(2, -1)
b

array([[0, 1, 2],
       [3, 4, 5]])

In [144]:
b.T

array([[0, 3],
       [1, 4],
       [2, 5]])

- 对于一维数组（向量），行向量和列向量在Numpy中表达没有区别
- 对于二维数组（矩阵），行向量和列向量有区别的

In [145]:
# 三行一列
c = np.array([
    [1],
    [2],
    [3],
])

In [147]:
# 一行三列
c.T

array([[1, 2, 3]])

In [148]:
c.T.T

array([[1],
       [2],
       [3]])

- 如果如要区分行列向量，可以把一维数组升级为一个二位数组

In [150]:
np.array([1, 2, 3]).reshape(3, -1)

array([[1],
       [2],
       [3]])

In [156]:
# 行向量生成技巧 row
np.r_[1, -2, 2:6, -4]

array([ 1, -2,  2,  3,  4,  5, -4])

In [163]:
# 列向量生成技巧1 column
np.c_[1, 2, 3].T

array([[1],
       [2],
       [3]])

In [162]:
# 列向量生成技巧2 column
np.c_[np.array([1, 2, 3])]

array([[1],
       [2],
       [3]])