# Numpy学习笔记
- 数据结构
- 关键：数据类型和尺寸
- 数据索引和切片
- 通用函数调用（一元、二元或多元）
- 数据筛选（np.where)
- 数据统计和排序，集合计算
- 数组文件输入输出
- 线性代数计算

## 数据结构
numpy是一种多维数组对象，关键点在于内部数据类型一致，便于大规模、统一运行计算！

In [9]:
import numpy as np
from random import *

In [45]:
a = np.random.randn(2, 10)
a

array([[-0.81309217,  0.41135035, -0.65550612, -0.74730307, -0.80436756,
        -0.07117057,  1.05878718, -0.77568302, -0.06530494,  0.08762856],
       [-0.25332392,  0.0870552 , -0.01378358,  0.59182185,  0.79189442,
         0.58800379, -0.32533814, -0.94121369, -1.3479782 , -0.72438471]])

## 关键：数据类型和尺寸
类型：dtype, 转换类型astype  
尺寸：shape, 改变尺寸reshape和resize  


In [47]:
a.dtype

dtype('float64')

In [48]:
a.shape

(2, 10)

In [50]:
# 转换数据类型
a.astype(np.int)

array([[ 0,  0,  0,  0,  0,  0,  1,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0, -1,  0]])

## 数据索引和切片
普通索引：a[1:5]  
切片索引：a[:5, 2:]  
花式索引：传入多个值，索引多个值或者多个维度的单个值  
转置和轴对换

In [51]:
# 普通索引
a[1:5]

array([[-0.25332392,  0.0870552 , -0.01378358,  0.59182185,  0.79189442,
         0.58800379, -0.32533814, -0.94121369, -1.3479782 , -0.72438471]])

In [52]:
# 切片索引
a[:5, 2:]

array([[-0.65550612, -0.74730307, -0.80436756, -0.07117057,  1.05878718,
        -0.77568302, -0.06530494,  0.08762856],
       [-0.01378358,  0.59182185,  0.79189442,  0.58800379, -0.32533814,
        -0.94121369, -1.3479782 , -0.72438471]])

In [66]:
# 花式索引
b = [[1, 0]]
a[b]

array([[-0.25332392,  0.0870552 , -0.01378358,  0.59182185,  0.79189442,
         0.58800379, -0.32533814, -0.94121369, -1.3479782 , -0.72438471],
       [-0.81309217,  0.41135035, -0.65550612, -0.74730307, -0.80436756,
        -0.07117057,  1.05878718, -0.77568302, -0.06530494,  0.08762856]])

In [68]:
# 转置和轴对换
c = a.T
c

array([[-0.81309217, -0.25332392],
       [ 0.41135035,  0.0870552 ],
       [-0.65550612, -0.01378358],
       [-0.74730307,  0.59182185],
       [-0.80436756,  0.79189442],
       [-0.07117057,  0.58800379],
       [ 1.05878718, -0.32533814],
       [-0.77568302, -0.94121369],
       [-0.06530494, -1.3479782 ],
       [ 0.08762856, -0.72438471]])

In [77]:
d = np.arange(20).reshape(2, 2, 5)
d

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9]],

       [[10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]]])

In [80]:
# transpose和swapaxex等效
print(d.swapaxes(0, 1))
d.transpose((1, 0, 2))

[[[ 0  1  2  3  4]
  [10 11 12 13 14]]

 [[ 5  6  7  8  9]
  [15 16 17 18 19]]]


array([[[ 0,  1,  2,  3,  4],
        [10, 11, 12, 13, 14]],

       [[ 5,  6,  7,  8,  9],
        [15, 16, 17, 18, 19]]])

## 通用函数调用（一元、二元或多元）
一元即对单个数值进行运算，如绝对值abs，开方sqrt，exp等  
二元即对两个数值进行运算，如加减乘除等  
多元或者统计方法：mean， sum， std， var， min， max等等

In [83]:
# 一元
np.sqrt(d)

array([[[0.        , 1.        , 1.41421356, 1.73205081, 2.        ],
        [2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ]],

       [[3.16227766, 3.31662479, 3.46410162, 3.60555128, 3.74165739],
        [3.87298335, 4.        , 4.12310563, 4.24264069, 4.35889894]]])

In [87]:
# 二元
e = d.reshape(2, 10)
np.add(a, e)

array([[-0.81309217,  1.41135035,  1.34449388,  2.25269693,  3.19563244,
         4.92882943,  7.05878718,  6.22431698,  7.93469506,  9.08762856],
       [ 9.74667608, 11.0870552 , 11.98621642, 13.59182185, 14.79189442,
        15.58800379, 15.67466186, 16.05878631, 16.6520218 , 18.27561529]])

In [90]:
# 多元
print('平均值',e.mean())
print('最大值', e.max())
print('标准差', np.std(a))

平均值 9.5
最大值 19
标准差 0.6389321853375332


## 数据筛选（np.where)
np.where(condition, x, y), 条件成立为x， 条件不成立为y  
可多个np.where嵌套，形成if-else条件语句  

In [91]:
e

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])

In [93]:
np.where(e > 5, 6, np.where(e > 3, 8, np.where(e > 0, 10, 666)))

array([[666,  10,  10,  10,   8,   8,   6,   6,   6,   6],
       [  6,   6,   6,   6,   6,   6,   6,   6,   6,   6]])

## 排序，集合计算
排序一般用到sort()  
多维数组排序，加入轴号即可  
集合计算常用： unique, intersect1d, union1d等等

In [109]:
y = np.random.randn(10)
y

array([ 0.92879435,  0.23517274,  0.560154  , -0.55331837, -1.70368585,
        0.6493964 , -1.37855567,  0.50438693, -0.34634076, -0.80172691])

In [107]:
# 原地修改数组
x.sort()
x

array([-1.60990801, -1.37147495,  0.06527773,  0.10375607,  0.1285965 ,
        0.17947983,  0.22818647,  0.58991732,  0.80670465,  0.9456178 ])

In [110]:
# 创建副本排序
print(np.sort(y))
y

[-1.70368585 -1.37855567 -0.80172691 -0.55331837 -0.34634076  0.23517274
  0.50438693  0.560154    0.6493964   0.92879435]


array([ 0.92879435,  0.23517274,  0.560154  , -0.55331837, -1.70368585,
        0.6493964 , -1.37855567,  0.50438693, -0.34634076, -0.80172691])

## 数组文件输入输出
数组保存 np.save(filename, array), 数组读取 np.load(array)  
读取文本 np.loadtxt(filename, delimiter=' '), 文本保存 np.savetxt  
保存多个数组 np.savez(filename.npz, a=array1, b=array2), 读取 np.load(filename.npz) 调用时加入参数

In [112]:
np.save('666', x)

In [113]:
x

array([-1.60990801, -1.37147495,  0.06527773,  0.10375607,  0.1285965 ,
        0.17947983,  0.22818647,  0.58991732,  0.80670465,  0.9456178 ])

In [114]:
y

array([ 0.92879435,  0.23517274,  0.560154  , -0.55331837, -1.70368585,
        0.6493964 , -1.37855567,  0.50438693, -0.34634076, -0.80172691])

In [115]:
np.savez('123.npz', a=x, b=y)

In [119]:
m = np.load('123.npz')
m['b']

array([ 0.92879435,  0.23517274,  0.560154  , -0.55331837, -1.70368585,
        0.6493964 , -1.37855567,  0.50438693, -0.34634076, -0.80172691])

## 线性代数计算
numpy.linalg用于线性代数计算，如矩阵的逆inv， trace对角线元素和， dot矩阵乘法  

In [120]:
from numpy.linalg import inv, qr

In [122]:
X = np.random.randn(5, 5)
X

array([[-1.86726622, -0.45487866, -0.59261704,  0.20257722, -0.60797967],
       [-0.6317677 ,  0.42804284,  0.4899873 ,  0.80405747,  0.48441675],
       [-0.46662343, -1.7263396 , -0.29946582, -1.37191524, -0.45878488],
       [ 1.76961191, -0.61739434,  0.3326875 ,  0.83550296,  1.09464776],
       [ 1.7364809 , -0.10263833, -0.18980842, -1.50428487, -1.47998725]])

In [123]:
inv(X)

array([[-0.00334188, -0.21365295, -0.15857308,  0.2680044 ,  0.17882321],
       [-0.41952499, -0.50740429, -0.40252597, -0.49748552, -0.23691491],
       [-0.23348383,  2.36717472,  0.58918766,  0.0457201 ,  0.72189103],
       [ 0.85074728,  0.55753935, -0.33163908,  0.62133079,  0.39536402],
       [-0.80959672, -1.08577479,  0.10338085, -0.28844233, -0.94387399]])

In [124]:
q, r = qr(X)
r

array([[ 3.20163134,  0.03552206,  0.37352285, -0.43094321,  0.12819494],
       [ 0.        , -1.93928653, -0.41102644, -1.17274319, -0.38542367],
       [ 0.        ,  0.        , -0.7205612 , -1.28676781, -1.62905547],
       [ 0.        ,  0.        ,  0.        , -1.52112639, -1.01482661],
       [ 0.        ,  0.        ,  0.        ,  0.        , -0.59558232]])