# numpy

> https://mofanpy.com/tutorials/data-manipulation/numpy/

## numpy 基础

In [1]:
import numpy as np


In [11]:
np.array([1, 2, 3])


array([1, 2, 3])

In [12]:
my_list = [1, 2, 3]
print(my_list[0])


1


In [13]:
my_array = np.array([1, 2, 3])
print(my_array[0])


1


In [14]:
my_list[0] = -1
print(my_list)
my_array[0] = -1
print(my_array)


[-1, 2, 3]
[-1  2  3]


## np.array list 速度比较


In [15]:
import time
t0 = time.time()
# python list
l = list(range(100))
for _ in range(10000):
    for i in range(len(l)):
        l[i] += 1

t1 = time.time()
# numpy array
a = np.array(l)
for _ in range(10000):
    a += 1

print("python list spend {:.3f}".format(t1-t0))
print("numpy list spend {:.3f}".format(time.time()-t1))


python list spend 0.101
numpy list spend 0.010


## 创建数据

```python
np.array()
array.ndim # 维度
```


In [16]:
cars = np.array(
    [
        [
            [5, 10, 12, 6],
            [1.5, 10, 1.2, 6],
            [5, 10, 12, 6],
            [5, 10, 12, 6],
        ],
        [
            [5, 10, 12, 6],
            [1.5, 10, 1.2, 6],
            [5, 10, 12, 6],
            [5, 10, 12, 6],
        ]
    ]
)
print("数据:", cars, "\n维度", cars.ndim)


数据: [[[ 5.  10.  12.   6. ]
  [ 1.5 10.   1.2  6. ]
  [ 5.  10.  12.   6. ]
  [ 5.  10.  12.   6. ]]

 [[ 5.  10.  12.   6. ]
  [ 1.5 10.   1.2  6. ]
  [ 5.  10.  12.   6. ]
  [ 5.  10.  12.   6. ]]] 
维度 3


## 添加数据

```python
np.concatenate()
np.expand_dims() #扩展维度
```


In [17]:
car1 = np.array([4,12,2,23])
car2 = np.array([2,23,2,23])
cars = np.concatenate([car2,car1])
print(cars)
car1 = np.expand_dims(car1, 0)
car2 = car2[np.newaxis, :]
print("car1加维度后 ", car1, car1.ndim)
print("car2加维度后 ", car2, car1.ndim)
all_cars = np.concatenate([car1, car2])
print("扩展后 ", all_cars)

[ 2 23  2 23  4 12  2 23]
car1加维度后  [[ 4 12  2 23]] 2
car2加维度后  [[ 2 23  2 23]] 2
扩展后  [[ 4 12  2 23]
 [ 2 23  2 23]]


## 合并数据

```python
np.concatenate()
np.vstack()
np.hstack()
```


In [18]:
print("第一维度叠加: \n", np.concatenate([all_cars,all_cars],axis=0))
print("第二维度叠加: \n", np.concatenate([all_cars,all_cars],axis=1))

第一维度叠加: 
 [[ 4 12  2 23]
 [ 2 23  2 23]
 [ 4 12  2 23]
 [ 2 23  2 23]]
第二维度叠加: 
 [[ 4 12  2 23  4 12  2 23]
 [ 2 23  2 23  2 23  2 23]]


In [19]:
a = np.array([
    [1,2],
    [3,4],
])
b = np.array([
    [5,6],
    [7,8],
])
print("竖直合并\n", np.vstack([a,b]))
print("水平合并\n", np.hstack([a,b]))

竖直合并
 [[1 2]
 [3 4]
 [5 6]
 [7 8]]
水平合并
 [[1 2 5 6]
 [3 4 7 8]]


## Size

```python
array.size
array.shape
```


In [20]:
cars = np.array(
    [
        [
            [5, 10, 12, 6],
            [1.5, 10, 1.2, 6],
            [5, 10, 12, 6],
            [5, 10, 12, 6],
        ],
        [
            [5, 10, 12, 6],
            [1.5, 10, 1.2, 6],
            [5, 10, 12, 6],
            [5, 10, 12, 6],
        ]
    ]
)
print("总数:",cars.size)
print("第一维度:",cars.shape[0])
print("所有维度:",cars.shape)

总数: 32
第一维度: 2
所有维度: (2, 4, 4)


## 数据获取

### 单个获取

```python
array[1]
array[1,2,3]
array[1][1]
```

In [21]:
a = np.array([1,2,3])
print("a[0]",a[0])
print("a[[0,1]]", a[[0,1]])
print("a[[1,1,0]]", a[[1,1,0]])

a[0] 1
a[[0,1]] [1 2]
a[[1,1,0]] [2 2 1]


### 切片

```python
array[:3]
array[2:4, 1:3]
```

In [22]:
print("a[0:2]:", a[0:2])
print("a[1:]:", a[1:])
print("a[-2:]:", a[-2:])

a[0:2]: [1 2]
a[1:]: [2 3]
a[-2:]: [2 3]


In [23]:
b = np.array([
    [1,2,3,4],
    [5,6,7,8],
    [9,10,11,12],
])
print("b[:2] = ",b[:2])
print("b[:2,:3] = ",b[:2,:3])
print("b[1:3,-2:] = ",b[1:3,-2:])

b[:2] =  [[1 2 3 4]
 [5 6 7 8]]
b[:2,:3] =  [[1 2 3]
 [5 6 7]]
b[1:3,-2:] =  [[ 7  8]
 [11 12]]


### 筛选

```python
array[array<0]
np.where(array, array < 0)
```

In [29]:
print(b[b>7])
print(np.where(b>7, -1,b))
print(np.where(b>7, -1,2))
print(np.where(b>7, b, -b-1))

[ 8  9 10 11 12]
[[ 1  2  3  4]
 [ 5  6  7 -1]
 [-1 -1 -1 -1]]
[[ 2  2  2  2]
 [ 2  2  2 -1]
 [-1 -1 -1 -1]]
[[-2 -3 -4 -5]
 [-6 -7 -8  8]
 [ 9 10 11 12]]


## 运算

### 加减乘除

- +-*/
- np.dot()

In [2]:
a = np.array([1,2,3,4])
a + 3

array([4, 5, 6, 7])

In [3]:
a - 3

array([-2, -1,  0,  1])

In [4]:
a * 2

array([2, 4, 6, 8])

In [5]:
a / 2

array([0.5, 1. , 1.5, 2. ])

In [6]:
a = np.array([
[1, 2],
[3, 4]
])
b = np.array([
[5, 6],
[7, 8]
])

print(a.dot(b))
print(np.dot(a, b))


[[19 22]
 [43 50]]
[[19 22]
 [43 50]]


### 数据统计分析

- np.max() 最大值 np.min() 最小值 np.sum() 累加 np.prod() 累乘 np.count() 记数 
- np.std() 标准差 np.mean() 平均 np.median() 中位值

### 特殊运算符号

- np.argmax() np.argmin()
- np.ceil() np.floor() np.clip()

## 改变数据形态

### 改变形态

- array[np.newaxis, :] 增加维度
- array.reshape() 
- array.ravel(), array.flatten()
- array.transpose() 矩阵转置

In [10]:
a = np.array([1,2,3,4,5,6])
a_2d = a[np.newaxis, :] 
print(a_2d)
print(a.shape, a_2d.shape)
a_none = a[:, None]
a_expand = np.expand_dims(a, axis=1)
print(a_none)
print(a_expand)
print(a_none.shape, a_expand.shape)
a_squeeze = np.squeeze(a_expand)
a_squeeze_axis = a_expand.squeeze(axis=1)
print(a_squeeze)
print(a_squeeze.shape)
print(a_squeeze_axis.shape)

[[1 2 3 4 5 6]]
(6,) (1, 6)
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
(6, 1) (6, 1)
[1 2 3 4 5 6]
(6,)
(6,)


### 合并

- np.column_stack(), np.row_stack()
- np.vstack(), np.hstack(), np.stack()
- np.concatenate()

In [3]:
feature_a = np.array([1,2,3,4,5,6])
feature_b = np.array([11,22,33,44,55,66])
c_stack = np.column_stack([feature_a, feature_b])
c_stack

array([[ 1, 11],
       [ 2, 22],
       [ 3, 33],
       [ 4, 44],
       [ 5, 55],
       [ 6, 66]])

In [4]:
sample_a = np.array([0, 1.1])
sample_b = np.array([1, 2.2])
c_stack = np.row_stack([sample_a, sample_b])
print(c_stack)

[[0.  1.1]
 [1.  2.2]]


In [5]:
feature_a = np.array([1,2,3,4,5,6])[:, None]
feature_b = np.array([11,22,33,44,55,66])[:, None]
c_stack = np.hstack([feature_a, feature_b])
print(c_stack)

sample_a = np.array([0, 1.1])[None, :]
sample_b = np.array([1, 2.2])[None, :]
c_stack = np.vstack([sample_a, sample_b])
print(c_stack)


[[ 1 11]
 [ 2 22]
 [ 3 33]
 [ 4 44]
 [ 5 55]
 [ 6 66]]
[[0.  1.1]
 [1.  2.2]]


In [6]:
a = np.array([
[1,2],
[3,4]
])
b = np.array([
[5,6],
[7,8]
])

print(np.concatenate([a, b], axis=0))
print(np.concatenate([a, b], axis=1))


[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[1 2 5 6]
 [3 4 7 8]]


### 拆解

np.vsplit(), np.hsplit(), np.split()

In [7]:
a = np.array(
[[ 1, 11, 2, 22],
 [ 3, 33, 4, 44],
 [ 5, 55, 6, 66],
 [ 7, 77, 8, 88]]
)
print(np.vsplit(a, indices_or_sections=2))  # 分成两段
print(np.vsplit(a, indices_or_sections=[2,3]))  # 分片成 [:2]，[2:3], [3:]


[array([[ 1, 11,  2, 22],
       [ 3, 33,  4, 44]]), array([[ 5, 55,  6, 66],
       [ 7, 77,  8, 88]])]
[array([[ 1, 11,  2, 22],
       [ 3, 33,  4, 44]]), array([[ 5, 55,  6, 66]]), array([[ 7, 77,  8, 88]])]


In [8]:
a = np.array(
[[ 1, 11, 2, 22],
 [ 3, 33, 4, 44],
 [ 5, 55, 6, 66],
 [ 7, 77, 8, 88]]
)
print(np.split(a, indices_or_sections=2, axis=0))  # 分成两段
print(np.split(a, indices_or_sections=[2,3], axis=1))  # 在第二维度，分片成 [:2]，[2:3]，[3:]


[array([[ 1, 11,  2, 22],
       [ 3, 33,  4, 44]]), array([[ 5, 55,  6, 66],
       [ 7, 77,  8, 88]])]
[array([[ 1, 11],
       [ 3, 33],
       [ 5, 55],
       [ 7, 77]]), array([[2],
       [4],
       [6],
       [8]]), array([[22],
       [44],
       [66],
       [88]])]


## 读取保存数据

### 加载常用数据格式

- np.loadtxt(), np.fromstring()

### 保存数据

- np.savetxt()
- np.save(), np.savez(), np.savez_compressed()

## 标准数据生成

### 创建统一数据

- np.zeros(), np.ones(), np.full()
- np.zeros_like(), np.ones_like(), np.full_like()

### 创建规则数据

- np.arange(), np.linspace()

### 快速创建再添加值

- np.empty(), np.empty_like()

## 随机数和随机操作

### 多种随机数生成

- np.random.rand(), np.random.random()
- np.random.randn(), np.random.randint()

### 给你施加随机

- np.random.choice()
- np.random.shuffle(), np.random.permutation()

### 随机分布

- np.random.normal(), np.random.uniform()

### 随机种子的重要性

- np.random.seed()