# 数据操作

## 数组的迭代

### for循环遍历

In [1]:
import numpy as np

In [2]:
# 一维数组
arr1d=np.arange(12)
for x in arr1d:
    print(x)

0
1
2
3
4
5
6
7
8
9
10
11


In [3]:
# 二维数组
arr2d=arr1d.reshape(4,3)
for X in arr2d:
    print(X)

[0 1 2]
[3 4 5]
[6 7 8]
[ 9 10 11]


In [4]:
# 三维数组
arr3d=arr1d.reshape(2,2,3)
for A in arr3d:
    print(A)

[[0 1 2]
 [3 4 5]]
[[ 6  7  8]
 [ 9 10 11]]


### nditer逐个访问元素

In [5]:
for x in np.nditer(arr3d):
    print(x)

0
1
2
3
4
5
6
7
8
9
10
11


In [6]:
# 用别的字符类型处理
# op_dtypes=['']: 设置类型
# flags=['buffered']：因为原数组不变，需设置固定结构flags=['buffered']变量作为缓冲

for x in np.nditer(arr3d,op_dtypes=float,flags=['buffered']):
    print(x)

0.0
1.0
2.0
3.0
4.0
5.0
6.0
7.0
8.0
9.0
10.0
11.0


### ndenumerate枚举元素并返回索引

In [7]:
for idx,x in np.ndenumerate(arr2d):
    print(idx,x)

(0, 0) 0
(0, 1) 1
(0, 2) 2
(1, 0) 3
(1, 1) 4
(1, 2) 5
(2, 0) 6
(2, 1) 7
(2, 2) 8
(3, 0) 9
(3, 1) 10
(3, 2) 11


## 数组的连接

### concatenate连接数组

(arr1,arr2,...arrn):n个需要连接的数组<br>
axis：需要连接的轴[ [ [] ] ],由外向内依次是0,1,...m个维度<br>
假设连接的维数为x，连接后shape：d0* d1 *...d(x-1) *sum(dx_(1->n)) * d(x+1) * ... *dm

In [8]:
arr1=np.array([[1,2,3],
               [4,5,6],
               [7,8,9]])
arr2=np.array([[10,20,30],
              [40,50,60]])
arr_12=np.concatenate((arr1,arr2)) # 4*3
print(arr_12,arr_12.shape)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 20 30]
 [40 50 60]] (5, 3)


In [9]:
arr1=np.array([[1,2,3,4,5],
               [4,5,6,7,8]])
arr2=np.array([[10,20,30],
              [40,50,60]])
arr_12=np.concatenate((arr1,arr2),axis=1)
print(arr_12,arr_12.shape)

[[ 1  2  3  4  5 10 20 30]
 [ 4  5  6  7  8 40 50 60]] (2, 8)


### stack连接数组(额外增加一个维度)

(arr1,arr2,...arrn):n个需要连接的数组<br>
axis：需要连接(增加)的轴[ [ [] ] ],由外向内依次是0,1,...m个维度<br>
假设连接的维数为x，连接后shape：d0* d1 * ...sum(d(x-1)_(1->n)) * dx_(1->n) *...*d(m+1)

In [10]:
arr1=np.array([[1,2,3,4],
               [4,5,6,7]])
arr2=np.array([[10,20,30,40],
              [40,50,60,70]])
arr_12=np.stack((arr1,arr2))
print(arr_12,arr_12.shape)
arr_12=np.stack((arr1,arr2),axis=2) #可以为2
print(arr_12,arr_12.shape)

[[[ 1  2  3  4]
  [ 4  5  6  7]]

 [[10 20 30 40]
  [40 50 60 70]]] (2, 2, 4)
[[[ 1 10]
  [ 2 20]
  [ 3 30]
  [ 4 40]]

 [[ 4 40]
  [ 5 50]
  [ 6 60]
  [ 7 70]]] (2, 4, 2)


### 按行(hstack)、按列(vstack)、按高(dstack)堆叠

In [11]:
arr1=np.array([[1,2,3],
               [4,5,6]])
arr2=np.array([[10,20,30],
              [40,50,60]])
a1=np.hstack((arr1,arr2))
a2=np.vstack((arr1,arr2))
a3=np.dstack((arr1,arr2))
print(a1,a1.shape)
print(a2,a2.shape)
print(a3,a3.shape)

[[ 1  2  3 10 20 30]
 [ 4  5  6 40 50 60]] (2, 6)
[[ 1  2  3]
 [ 4  5  6]
 [10 20 30]
 [40 50 60]] (4, 3)
[[[ 1 10]
  [ 2 20]
  [ 3 30]]

 [[ 4 40]
  [ 5 50]
  [ 6 60]]] (2, 3, 2)


## 数组拆分

### array_spilt拆分数组
axis：指定分隔轴，默认0

In [12]:
arr_6=np.arange(1,7)
# 拆分份数能整除
print(np.array_split(arr_6,3))
print(np.split(arr_6,3))

[array([1, 2]), array([3, 4]), array([5, 6])]
[array([1, 2]), array([3, 4]), array([5, 6])]


In [13]:
print(np.array_split(arr_6,4))
# print(np.split(arr_6,4)) #报错

[array([1, 2]), array([3, 4]), array([5]), array([6])]


In [14]:
arr34=np.arange(1,13).reshape(3,4)
# print(arr34.base) #[ 1  2  3  4  5  6  7  8  9 10 11 12]
print(np.array_split(arr34,3))
print(np.array_split(arr34,4,axis=1))

[array([[1, 2, 3, 4]]), array([[5, 6, 7, 8]]), array([[ 9, 10, 11, 12]])]
[array([[1],
       [5],
       [9]]), array([[ 2],
       [ 6],
       [10]]), array([[ 3],
       [ 7],
       [11]]), array([[ 4],
       [ 8],
       [12]])]


In [15]:
# hsplit按列拆分
print(np.hsplit(arr34,4))

[array([[1],
       [5],
       [9]]), array([[ 2],
       [ 6],
       [10]]), array([[ 3],
       [ 7],
       [11]]), array([[ 4],
       [ 8],
       [12]])]


## 数组搜索

### where返回value的索引
where(表达式)
argwhere返回值

In [45]:
arr_7=np.array([1,2,3,4,3,2,1])
print(np.where(arr_7==3))
print(np.where(arr_7==1))
print(np.argwhere(arr_7==3))

(array([2, 4], dtype=int64),)
(array([0, 6], dtype=int64),)
[[2]
 [4]]


In [17]:
# 取出偶数
print(np.where(arr_7%2==0))

(array([1, 3, 5], dtype=int64),)


### searchsorted在已排序数组中查找元素插入位置
待插值：一个数字或一个列表<br>
side:'left'（默认）或'right'，从哪边寻找<br>
从左边查找：寻找i满足a[i-1]<插入值<=a[i]<br>
从右边查找：寻找i满足a[i]<=插入值< a[i+1]<br>

In [18]:
arr = np.array([1, 3, 5, 7, 9, 11])
print(np.searchsorted(arr, 9))
print(np.searchsorted(arr, 9, side='right'))
print(np.searchsorted(arr, [0, 5, 10]))

4
5
[0 2 5]


## 数组排序
sort():不改变原数组顺序,且生成新的数据（base=None）
axis:默认-1，可指定排序维度
kind：排序规则，有三个选项:quicksort,mergesort,heapsort


In [32]:
np.random.seed(20)
# np.random.seed(None)
arr=np.random.randint(1,20,size=10)
arr_sorted=np.sort(arr)
print(arr)
print(arr_sorted,arr_sorted.base)

[ 4 16 10 12  8  3  1  9 17  7]
[ 1  3  4  7  8  9 10 12 16 17] None


In [36]:
# 二维数组，默认只对最后一维排序
arr=np.random.randint(1,20,size=12).reshape(3,4)
print(arr)
print(np.sort(arr))
print(np.sort(arr,axis=0))

[[11  4 19  7]
 [10 15  5 12]
 [ 9  7 16 19]]
[[ 4  7 11 19]
 [ 5 10 12 15]
 [ 7  9 16 19]]
[[ 9  4  5  7]
 [10  7 16 12]
 [11 15 19 19]]


## 数组过滤

In [38]:
arr=np.random.randint(1,20,size=10)
arr_fliter=arr>10
print(arr)
print(arr_fliter)
print(arr[arr_fliter])

[ 9 17 17 11  7 10  1 13 15 18]
[False  True  True  True False False False  True  True  True]
[17 17 11 13 15 18]


## squeeze数组降维

In [44]:
arr=np.array([1,2,3],ndmin=5)
print(arr.shape)
print(np.squeeze(arr).shape)

(1, 1, 1, 1, 3)
(3,)
