## `Numpy`函数包使用

使用`Python`的`list`对象来创建一个一维的`ndarray`对象

In [1]:
import numpy as np
data = [1, 2.32, 5, 14]
arr = np.array(data)

print(arr)
print(type(arr))

[ 1.    2.32  5.   14.  ]
<class 'numpy.ndarray'>


In [3]:
data = [[1, 2, 3, 4],[5, 6, 7.2, 8]]

arr = np.array(data)

print(arr)
print(arr.ndim)
print(arr.shape)
print(arr.dtype)
print(type(arr))

[[1.  2.  3.  4. ]
 [5.  6.  7.2 8. ]]
2
(2, 4)
float64
<class 'numpy.ndarray'>


其中`ndim`是数组的维数：
`data.ndim = len(data.shape)`

初始化时指定数据类型，或是对已有的`ndarray`数组进行数据类型的显示转换

In [4]:
arr1 = np.array([1, 2, 3], dtype=np.float64)
arr2 = np.array([2, 4, 5], dtype=np.int32)
arr3 = arr2.astype(np.float64)

print(arr1)
print(arr2)
print(arr3)

[1. 2. 3.]
[2 4 5]
[2. 4. 5.]


**注意:** 调用`astype`会创建一个新的数组，即原始数据的一个深拷贝。

**维度转换**

In [6]:
a = np.arange(24).reshape((6,4))
print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]


In [8]:
# 将其展平，转换为一维数组
print(a.flatten())

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]


In [9]:
# 转换为 3x8 的数组
print(a.reshape((3, 8)))

[[ 0  1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14 15]
 [16 17 18 19 20 21 22 23]]


In [11]:
# 矩阵转置操作
print(a)
print(a.transpose())

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]
[[ 0  4  8 12 16 20]
 [ 1  5  9 13 17 21]
 [ 2  6 10 14 18 22]
 [ 3  7 11 15 19 23]]


**高维数组索引**

In [12]:
arr3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
print(arr3d)
print(arr3d[0])
print(arr3d[1][0])
print(arr3d[0][0][1])

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]
[[1 2 3]
 [4 5 6]]
[7 8 9]
2


**高维数组切片**

In [13]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(arr2d)
print(arr2d[:2, 1:])

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[2 3]
 [5 6]]


*选取指定列*

In [15]:
arr = np.arange(32).reshape(8,4)
print(arr)
print(arr[[1,5,7,2], [0,3,1,2]])

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]]
[ 4 23 29 10]


## 数组的元素级别运算

In [16]:
arr1 = np.array([1,3,5,4,5]) 
arr2 = np.array([4,6,1,3,4])  
print(np.sqrt(arr1)) 
print(np.square(arr2)) 
print(np.multiply(arr1,arr2)) 
print(np.subtract(arr1,arr2))  

[1.         1.73205081 2.23606798 2.         2.23606798]
[16 36  1  9 16]
[ 4 18  5 12 20]
[-3 -3  4  1  1]


## 条件逻辑的数组运算:`np.where`

In [17]:
arr = np.random.randn(4,4) 
print(arr) 
print(np.where(arr>0,2,-2)) 
print(np.where(arr>0,2,arr)) 

[[-2.53447497 -1.27498284 -0.5701695  -0.89540426]
 [-1.13578093 -1.14674653 -0.96796503 -0.10439865]
 [ 1.30379635  1.37969483  2.30692316 -0.38518423]
 [ 0.34855281 -0.47228758  1.23899577 -0.43812587]]
[[-2 -2 -2 -2]
 [-2 -2 -2 -2]
 [ 2  2  2 -2]
 [ 2 -2  2 -2]]
[[-2.53447497 -1.27498284 -0.5701695  -0.89540426]
 [-1.13578093 -1.14674653 -0.96796503 -0.10439865]
 [ 2.          2.          2.         -0.38518423]
 [ 2.         -0.47228758  2.         -0.43812587]]


## 各轴向上的统计运算
对一个数组内的值进行**求和、均值、方差、标准差、累积、累和以及最值求解**

在求均值时，如果不指定参数，则是求取全部值的平均值，如果指定关键字参数`axis=0`，则是沿着纵向求均值，`axis=1`，沿着横向求均值

In [22]:
arr = np.array([[0,1,2],[3,4,5],[6,7,8]]) 
# 求均值
print(arr.mean()) 
print(arr.mean(axis=0)) 
print(arr.mean(axis=1))
# 求和
print(arr.sum())
# 标准差
print(arr.std())
# 方差
print(arr.var())
# 最值
print(arr.min(),arr.max())
# 最值索引
print(arr.argmin(), arr.argmax())

4.0
[3. 4. 5.]
[1. 4. 7.]
36
2.581988897471611
6.666666666666667
0 8
0 8


多维数组就地排序，沿着某一个轴向进行排序

In [29]:
arr1 = np.array([[2,3,1],[8,6,7],[4,9,3]])
arr2 = np.array([[2,3,1],[8,6,7],[4,9,3]])
print(arr1)
arr1.sort(axis=0)
print(arr1)

arr2.sort(axis=1)
print(arr2)

[[2 3 1]
 [8 6 7]
 [4 9 3]]
[[2 3 1]
 [4 6 3]
 [8 9 7]]
[[1 2 3]
 [6 7 8]
 [3 4 9]]


### CSV数据的读取

In [31]:
c, v = np.loadtxt('datafile.csv', delimiter=',', usecols=(1, 2), unpack=True) 
print(c) 
print(v)

[178.02 178.65 178.44 179.97 181.72 179.98 176.94 175.03 176.67 176.82
 176.21 175.   178.12 178.39 178.97 175.5  172.5  171.07 171.85 172.43
 172.99 167.37 164.34 162.71 156.41 155.15 159.54 163.03 156.49 160.5
 167.78 167.43 166.97 167.96 171.51 171.11 174.22 177.04 177.   178.46
 179.26 179.1  176.19 177.09 175.28 174.29 174.33 174.35 175.   173.03
 172.23 172.26 169.23 171.08 170.6  170.57 175.01 175.01 174.35 174.54
 176.42]
[38313330. 22676520. 29334630. 31464170. 32191070. 32130360. 24518850.
 31686450. 23273160. 27825140. 38426060. 48706170. 37568080. 38885510.
 37353670. 33772050. 30953760. 37378070. 33690660. 40113790. 50908540.
 40382890. 32483310. 60774900. 70583530. 54145930. 51467440. 68171940.
 72215320. 85957050. 44453230. 32234520. 45635470. 50565420. 39075250.
 41438280. 51368540. 32395870. 27052000. 31306390. 31087330. 34260230.
 29512410. 25302200. 18653380. 23751690. 21532200. 20523870. 23589930.
 22342650. 29461040. 25400540. 25938760. 16412270. 21477380. 33113340

In [33]:
# 算术平均值
mean_c = np.mean(c)
print(mean_c)
# 最值，最值之差
print(np.max(c), np.min(c))
print(np.ptp(c))
# 中位数
print(np.median(c))
# 方差
print(np.var(c))
# 根据方差定义求方差（方差指的是各个数据与所有数据算数平均数的离差平方和的均值）
print(np.mean((c - c.mean())**2))

172.61491803278687
181.72 155.15
26.569999999999993
174.35
37.598552862133815
37.598552862133815
