# NumPy 

In [48]:
import numpy as np

## 文件的输入输出

In [79]:
# 文件的输入输出
arr = np.arange(10)
np.save('output/some_array', arr)
np.load('output/some_array.npy')

np.savez('output/array_archive.npz', a=arr, b=np.arange(20))
arch = np.load('output/array_archive.npz')
arch['b']

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

## 生成array对象

In [1]:
# list 转array
data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [3]:
# 设置数据类型
data2 = [[1,2,3, 4], [5, 6, 7,8]]
arr2 = np.array(data2, dtype=np.int32)
arr2.dtype

dtype('int32')

In [5]:
# 基本属性
print(arr1.ndim,arr1.shape,arr1.dtype)

1 (5,) float64


In [7]:
# 生成连续的range值
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [8]:
# 将输入转为array
np.asarray([1,2])

array([1, 2])

In [18]:
# 生成二维的array
arr2d = np.array([[1,2,3],[4,5,6], [7,8,9]])
print(arr2d)
arr2d[0,2]

[[1 2 3]
 [4 5 6]
 [7 8 9]]


3

## reshape:数组重塑

In [53]:
# 改变arr 的形状
arr = np.arange(32).reshape((8,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [54]:
#－1表示维度大小由数据本身推断
arr = np.arange(12)
arr.reshape((3,4)).reshape((6,2))
arr.reshape((4,-1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

## ravel:数组展开

In [57]:
# 相反操作：扁平化和散开
arr = np.arange(15).reshape((5,3))
print(arr)
arr.ravel()

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [59]:
#列优先
arr = np.arange(12).reshape((3,4))
print(arr)
arr.ravel('F')

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

## concatenate&split:数组合并与拆分

In [61]:
#数组的合并
arr1 = np.array([[1,2,3], [4,5,6]])
arr2 = np.array([[7,8,9], [10,11,12]])
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [68]:
# 数组的拆分
# split(ary, indices_or_sections, axis=0) 
# 把一个数组从左到右按顺序切分 
# 参数： 
# ary:要切分的数组 
# indices_or_sections:如果是一个整数，就用该数平均切分，如果是一个数组，为沿轴切分的位置 
# axis：沿着哪个维度进行切向，默认为0，横向切分

np.random.seed(9)
arr = np.random.randn(5,2)
print(arr,'\n ##########')
f,s,t = np.split(arr, [1,3])
print(f,'\n ##########')
print(s,'\n ##########')
print(t,'\n ##########')

[[ 1.10855471e-03 -2.89544069e-01]
 [-1.11606630e+00 -1.28827567e-02]
 [-3.78361464e-01 -4.81135363e-01]
 [-1.51733118e+00 -4.90871981e-01]
 [-2.40680579e-01 -6.47947460e-01]] 
 ##########
[[ 0.00110855 -0.28954407]] 
 ##########
[[-1.1160663  -0.01288276]
 [-0.37836146 -0.48113536]] 
 ##########
[[-1.51733118 -0.49087198]
 [-0.24068058 -0.64794746]] 
 ##########


## repeat&tile:元素的重复扩展

In [77]:
# repeat 以行/列为单位进行指定倍数扩展
arr = np.arange(3)
print(arr.repeat(3))
print(arr.repeat([2,3,4]))

[0 0 0 1 1 1 2 2 2]
[0 0 1 1 1 2 2 2 2]


In [76]:
# 二维array的扩展
np.random.seed(34)
arr = np.random.randn(2,2)
print(arr,'\n ##########')
print(arr.repeat(2, axis=0),'\n ##########')
print(arr.repeat([2,3], axis=0),'\n ##########')
print(arr.repeat([2,3], axis=1),'\n ##########')

[[ 0.2438351  -0.74731818]
 [-1.56117699 -0.46425312]] 
 ##########
[[ 0.2438351  -0.74731818]
 [ 0.2438351  -0.74731818]
 [-1.56117699 -0.46425312]
 [-1.56117699 -0.46425312]] 
 ##########
[[ 0.2438351  -0.74731818]
 [ 0.2438351  -0.74731818]
 [-1.56117699 -0.46425312]
 [-1.56117699 -0.46425312]
 [-1.56117699 -0.46425312]] 
 ##########
[[ 0.2438351   0.2438351  -0.74731818 -0.74731818 -0.74731818]
 [-1.56117699 -1.56117699 -0.46425312 -0.46425312 -0.46425312]] 
 ##########


In [78]:
# tile 以array整体为单位进行指定倍数扩展
print(arr,'\n ##########')
print(np.tile(arr,2),'\n ##########')
print(np.tile(arr,(2,1)),'\n ##########')
print(np.tile(arr,(3,2)),'\n ##########')

[0 1 2] 
 ##########
[0 1 2 0 1 2] 
 ##########
[[0 1 2]
 [0 1 2]] 
 ##########
[[0 1 2 0 1 2]
 [0 1 2 0 1 2]
 [0 1 2 0 1 2]] 
 ##########


## 生成特定矩阵

In [9]:
# 生成全是1的矩阵
np.ones_like([[1,2], [3,4]])

# 生成单位矩阵
np.identity(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

## astype:改变数据类型

In [10]:
arr3 = np.array(['2', ' 534'], dtype=np.string_)
arr3.astype(np.int32)

array([  2, 534])

In [11]:
arr2.astype(np.float64)

array([[1., 2., 3., 4.],
       [5., 6., 7., 8.]])

In [12]:
arr1.astype(arr2.dtype)

array([6, 7, 8, 0, 1])

## array之间的运算

In [13]:
arr2 * arr2

array([[ 1,  4,  9, 16],
       [25, 36, 49, 64]])

In [14]:
arr2*2

array([[ 2,  4,  6,  8],
       [10, 12, 14, 16]])

## 切片与索引

In [24]:
import numpy as np
from numpy.random import randn

In [37]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7,4)
data[(names == 'Bob') | (names == 'Joe')]

array([[-1.21907528,  1.2705255 , -0.54212363,  0.80974703],
       [-1.76553628, -1.80329136, -0.90210562, -0.74221657],
       [ 2.28587489,  0.30085764, -0.95233408, -1.54211855],
       [ 0.79052931, -1.37344705,  1.51744052, -0.18722192],
       [-0.25895972,  0.30646545, -0.03690562, -0.88666348]])

In [18]:
#利用整数数组进行索引
arr[np.ix_([1,5,7,2],[0,3,1,2])]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [23]:
# 将数组各元素的小数和整数部分以两个独立数组形式返回
arr = randn(7)*5
print(arr)
np.modf(arr)

[ 0.62653967  8.36582088 -4.735825    1.11698705  3.08388594 -3.18673245
 -0.17518684]


(array([ 0.62653967,  0.36582088, -0.735825  ,  0.11698705,  0.08388594,
        -0.18673245, -0.17518684]), array([ 0.,  8., -4.,  1.,  3., -3., -0.]))

In [70]:
# bool索引
bools = np.array([1,0,1,0,0])
print(bools.any(), bools.all())

True False


In [26]:
# array比较
np.array([1,2,3])<np.array([4,5,6])

array([ True,  True,  True])

## np.where:筛选

In [29]:
# 满足条件(condition)，输出x，不满足输出y
xarr = np.array([i/10.0 for i in range(11,16)])
yarr = np.array([i/10.0 for i in range(21,26)])
cond = np.array([1, 0, 1, 1, 0])
result = np.where(cond, xarr, yarr)
print(xarr)
print(yarr)
print(result)

# np.where(condition)只有条件 (condition)，没有x和y，则输出满足条件 (即非0) 元素的坐标
a = np.array([2,4,6,8,10])
print(np.where(a > 5))

[1.1 1.2 1.3 1.4 1.5]
[2.1 2.2 2.3 2.4 2.5]
[1.1 2.2 1.3 1.4 2.5]
(array([2, 3, 4], dtype=int64),)


In [31]:
arr = randn(4,4)
arr

array([[ 1.18029014,  0.71319241,  2.01895214,  1.37924678],
       [ 0.12525128, -1.81354103,  0.38562474, -0.16819841],
       [ 1.47071074, -0.26982163, -1.11628426,  0.35210247],
       [ 0.48588561, -1.32807385, -1.33038652,  0.5009419 ]])

In [33]:
# 赋固定数字
np.where(arr>0, 2, arr)

array([[ 2.        ,  2.        ,  2.        ,  2.        ],
       [ 2.        , -1.81354103,  2.        , -0.16819841],
       [ 2.        , -0.26982163, -1.11628426,  2.        ],
       [ 2.        , -1.32807385, -1.33038652,  2.        ]])

In [30]:
# 可以嵌套使用
cond1 = np.array([1, 0, 1])
cond2 = np.array([0, 0,1])
np.where(cond1 & cond2, 0, np.where(cond1, 1, np.where(cond2, 2,3)))

array([1, 3, 0])

## array的各种统计

In [35]:
# 给出array的各种统计值
arr = np.array([[1,1,1,1], [3,3,3,3]])
print(arr.mean(axis=1), np.mean(arr,axis=0), arr.var(), arr.max(), arr.min(), arr.argmin(), arr.cumsum(), arr.cumprod())
print((randn(100)>0).sum())

[1. 3.] [2. 2. 2. 2.] 1.0 3 1 0 [ 1  2  3  4  7 10 13 16] [ 1  1  1  1  3  9 27 81]
57


## sort排序

In [40]:
# 排序
arr = randn(5,3)
arr

array([[-0.09470479, -0.46708705,  0.3939774 ],
       [-0.52239632, -0.2943496 , -0.50798053],
       [ 0.15226808, -0.09832291, -0.92650488],
       [-1.3404149 , -0.68069766,  1.15292967],
       [ 0.94921177, -0.56955642,  0.09715116]])

In [42]:
# axis：数组排序时的基准，axis=0 按列排列；axis=1 按行排列
arr1 = np.sort(arr, axis=0)
print(arr1)
print('排序之后的结果是\n',arr)

[[-1.3404149  -0.68069766 -0.92650488]
 [-0.52239632 -0.56955642 -0.50798053]
 [-0.09470479 -0.46708705  0.09715116]
 [ 0.15226808 -0.2943496   0.3939774 ]
 [ 0.94921177 -0.09832291  1.15292967]]
排序之后的结果是
 [[-0.09470479 -0.46708705  0.3939774 ]
 [-0.52239632 -0.2943496  -0.50798053]
 [ 0.15226808 -0.09832291 -0.92650488]
 [-1.3404149  -0.68069766  1.15292967]
 [ 0.94921177 -0.56955642  0.09715116]]


In [43]:
# sort除了可以看做np的函数，也可以看成array的属性之一
arr.sort(axis=0)
arr

array([[-1.3404149 , -0.68069766, -0.92650488],
       [-0.52239632, -0.56955642, -0.50798053],
       [-0.09470479, -0.46708705,  0.09715116],
       [ 0.15226808, -0.2943496 ,  0.3939774 ],
       [ 0.94921177, -0.09832291,  1.15292967]])

## numpy的交并补

子集：[np.in1d](http://doc.codingdict.com/NumPy_v111/reference/generated/numpy.in1d.html)

In [45]:
values = np.array([6,0,0,3,2,5,6])
np.in1d(values, [2,3,6])

array([ True, False, False,  True,  True, False,  True])

In [46]:
# 交集
np.intersect1d(values, [2,3,6])

array([2, 3, 6])

## 生成各种概率分布

In [49]:
samples = np.random.normal(size=(4,4))
samples

array([[ 0.72839955, -0.17624434,  0.52071077,  0.66522577],
       [ 0.90677079, -0.84590989, -0.76505035, -1.16936409],
       [-0.67207943, -0.90062313, -0.95062222, -0.12348399],
       [ 1.52615952,  1.40666646,  1.61997447,  0.01507119]])

In [50]:
import random
np.random.seed(0)
print(np.random.rand(4))
np.random.seed(0)
print(np.random.rand(3))
print(np.random.permutation(np.arange(10)))
arr = np.array([12,3,4])
np.random.shuffle(arr)
print(arr)
print(np.random.randint(0,2, size=[2,2]))
print(np.random.randn(3,4))
print(np.random.binomial(0.4,0.6, size=[2,3]))
print(np.random.normal(size=(3,4)))
print(np.random.beta(3,4, size=[2,2]))

[0.5488135  0.71518937 0.60276338 0.54488318]
[0.5488135  0.71518937 0.60276338]
[8 6 4 0 2 5 9 1 7 3]
[ 3  4 12]
[[0 1]
 [0 1]]
[[ 2.54520078  1.08081191  0.48431215  0.57914048]
 [-0.18158257  1.41020463 -0.37447169  0.27519832]
 [-0.96075461  0.37692697  0.03343893  0.68056724]]
[[0 0 0]
 [0 0 0]]
[[ 1.46274045  1.53502913  0.56644004  0.14926509]
 [-1.078278    1.39547227  1.78748405 -0.56951726]
 [ 0.17538653 -0.46250554 -1.0858006   0.63973599]]
[[0.4677088  0.46166706]
 [0.62560303 0.38300476]]
