# numpy

In [2]:
import numpy as np

In [2]:
data = np.random.randn(2,3)#随机生成一个n维数组对象，ndarray,所有元素都是相同类型的

In [6]:
data.shape #表示维度大小

(2, 3)

In [7]:
data.dtype #表示data type

dtype('float64')

# 创建ndarray

In [8]:
np.array([1,2,4.5]) #创建一维

array([1. , 2. , 4.5])

In [9]:
np.array([[1,2,4.5],[4,5,7]]) #创建多维

array([[1. , 2. , 4.5],
       [4. , 5. , 7. ]])

In [10]:
np.array([[1,2,4.5],[4,5,7]]).ndim #一维数组 ndim=1,二维数组 ndim=2,三维数组 ndim=3

2

In [11]:
np.zeros(5)#全0一维数组

array([0., 0., 0., 0., 0.])

In [14]:
np.ones((2,3)) #2*3二维数组

array([[1., 1., 1.],
       [1., 1., 1.]])

In [15]:
np.arange(15) #创建0-14

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

![image.png](attachment:image.png)

# ndarray的数据类型

In [16]:
np.array([1,2,3], dtype = np.float64) #规定数据类型

array([1., 2., 3.])

In [3]:
arr = np.array([1.5,3,5])
arr.dtype

dtype('float64')

In [29]:
intarr = arr.astype(np.int32) #转换数据类型，调用astype会创建一个新的数组

array([1, 3, 5])

In [26]:
arr.astype(intarr.dtype)

array([1, 3, 5])

In [52]:
np.issubdtype(intarr.dtype,np.integer)#是否是子类

True

In [53]:
np.float64.mro() #查看所有父类

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

![image.png](attachment:image.png)

# 数组的运算

In [30]:
arr * arr #相同大小数组的运算

array([ 2.25,  9.  , 25.  ])

In [31]:
arr + 5

array([ 6.5,  8. , 10. ])

不同大小的数组间运算叫做广播

# 索引和切片

一维

In [60]:
arr = np.arange(10)

In [61]:
arr[5]

5

In [62]:
arr[0:5]

array([0, 1, 2, 3, 4])

In [63]:
arr[5:8] = 12 #对数组切片进行修改将直接在原数据上进行修改，不会复制

In [64]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [65]:
arr[5:8].copy() #得到切片的副本

array([12, 12, 12])

二维

In [69]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d[1]

array([4, 5, 6])

In [73]:
arr2d[1,0] #等同于arr2d[1][0]

4

切片索引

In [74]:
arr2d[:2] #前2行

array([[1, 2, 3],
       [4, 5, 6]])

In [75]:
arr2d[:2,1:] #逗号分隔行列。

array([[2, 3],
       [5, 6]])

布尔型索引

花式索引

# 数组转置和轴对换

In [77]:
arr2d.T

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [83]:
arr=np.arange(16).reshape((2,2,4)) 
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [81]:
arr.transpose(2,1,0) #将第一维和第三维交换

array([[[ 0,  8],
        [ 4, 12]],

       [[ 1,  9],
        [ 5, 13]],

       [[ 2, 10],
        [ 6, 14]],

       [[ 3, 11],
        [ 7, 15]]])

In [82]:
arr.swapaxes(1,2) #将第一维和第二维交换 返回视图，不复制

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

# 通用函数

In [84]:
np.sqrt(arr)

array([[[0.        , 1.        , 1.41421356, 1.73205081],
        [2.        , 2.23606798, 2.44948974, 2.64575131]],

       [[2.82842712, 3.        , 3.16227766, 3.31662479],
        [3.46410162, 3.60555128, 3.74165739, 3.87298335]]])

一元

![image.png](attachment:image.png)

![image.png](attachment:image.png)

二元

![image.png](attachment:image.png)

# 利用数组进行数据分析

用数组表示式代替循环，称为矢量化

In [86]:
points = np.arange(-5,5,0.01)
xs,xy = np.meshgrid(points,points) #产生以points为行，以points为列的矩阵
xs
xy

array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
       [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
       [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
       ...,
       [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
       [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
       [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]])

# 条件逻辑

In [108]:
arr = np.random.randn(4,4)
np.where(arr > 0,2,arr)

array([[-1.50053729,  2.        , -1.47637728, -0.6084715 ],
       [-2.65680023, -0.56719153, -1.13180608, -0.49652489],
       [ 2.        , -0.89429088, -0.04488735, -0.96377758],
       [-0.97215273,  2.        ,  2.        ,  2.        ]])

In [110]:
np.where(arr>1,2,np.where(arr>0,0,arr)) # 嵌套的where表达式

array([[-1.50053729,  2.        , -1.47637728, -0.6084715 ],
       [-2.65680023, -0.56719153, -1.13180608, -0.49652489],
       [ 0.        , -0.89429088, -0.04488735, -0.96377758],
       [-0.97215273,  2.        ,  0.        ,  0.        ]])

# 数学和统计方法

In [92]:
np.mean(points) 

-0.005000000000106411

In [94]:
'''
axis 不设置值，对 m*n 个数求均值，返回一个实数
axis = 0：压缩行，对各列求均值，返回 1* n 矩阵
axis =1 ：压缩列，对各行求均值，返回 m *1 矩阵
'''
points.mean(axis = 0)


-0.005000000000106411

In [95]:
points.sum(0)

-5.000000000106411

![image.png](attachment:image.png)

![image.png](attachment:image.png)

# 布尔型

In [96]:
(points > 0 ).sum()

499

In [97]:
bools = np.array([False,True]) #是否有一个以上True
bools.any()

True

In [98]:
bools.all() #是否全部都是True

False

# 排序

In [102]:
arr = np.random.randn(2,3)
arr.sort()
arr

array([[-0.85239126, -0.29487536,  0.5506097 ],
       [ 0.18877074,  0.84253576,  1.25151915]])

In [104]:
arr.sort(1)
arr

array([[-0.85239126, -0.29487536,  0.5506097 ],
       [ 0.18877074,  0.84253576,  1.25151915]])

# 唯一化及其他集合逻辑

In [105]:
values = np.array([6,7,88,0,9,2])
np.in1d(values,[2,3,6])

array([ True, False, False, False, False,  True])

![image.png](attachment:image.png)

# 文件输入输出

二进制文件

In [None]:
np.save('arr',arr) #以二进制形式保存到.npy

In [None]:
np.load('arr.npy') #读入

In [None]:
np.savez('arr_zip',a = arr,b = arr) #将a,b两个数组保存到压缩文件

In [None]:
zipp = np.load('arr_zip.npz') 
zipp['a'] # 读出a数组

文本文件

In [None]:
arr = np.loadtxt('array_ex.txt',delimiter=',') # 指定分隔符

In [34]:
np.savetxt('arr_txt',arr)

# 线性代数

In [35]:
x = np.array([[1,2,3],[4,5,6]])
y = np.array([[6,7],[5,9],[8,9]])
from numpy.linalg import *

In [36]:
np.dot(x,y) # x.dot(y)

array([[ 40,  52],
       [ 97, 127]])

In [44]:
x = np.random.randn(5,5)

In [42]:
x.T # 转置

array([[-1.3718686 ,  0.47556945,  1.0890696 , -0.48392779],
       [ 0.40911239,  1.79235075,  1.6873749 ,  0.18360049],
       [-0.09725822, -1.40156046,  0.29935752, -0.40319722],
       [-0.32677956, -1.38020189,  1.095762  ,  0.89531169],
       [-2.64818056, -1.40496319,  0.13804596,  0.21860927]])

![image.png](attachment:image.png)

In [45]:
inv(x)

array([[ 0.25568819,  1.97105644,  0.07860812,  0.54246606, -1.55091603],
       [ 0.18110728,  1.06690561, -0.05051308,  0.00535406, -0.69672316],
       [ 0.17925022, -0.36019843, -0.41452314,  0.22779345,  0.32903839],
       [ 0.07930939,  0.12071223,  0.3199276 ,  0.09499014,  0.21934526],
       [ 0.43554989,  0.40287425,  0.08331289,  0.0795148 , -0.5688384 ]])

# 随机数生成

In [46]:
np.random.normal(size=5)

array([-0.34931357,  0.05717264, -0.96359   , -0.57770643,  0.05837501])

In [48]:
np.random.normal(size=(5,5))

array([[ 1.40267742, -1.34617112,  1.42508862,  2.74190767,  0.45811026],
       [ 1.24806449, -0.22752903,  2.03561586,  0.08744519, -2.04398689],
       [ 1.68107431,  0.95947326,  1.64305769,  0.61776704,  0.7162573 ],
       [ 1.3803436 ,  0.0036644 , -0.25766931,  0.12400773,  0.56461252],
       [-0.01488229, -0.41622692,  0.60063134,  0.37649601, -2.13056634]])

![image.png](attachment:image.png)

![image.png](attachment:image.png)

# 数组重塑

In [56]:
arr = np.arange(8)
arr.reshape((4,-1)) #将一维数组重新排列成4行2列的二维数组， -1表示维度大小由数据本身推断

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [57]:
other_arr = np.ones((2,4))
arr.reshape(other_arr.shape)

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [58]:
arr.ravel() #将二维数组变成一维数组 不产生数据副本

array([0, 1, 2, 3, 4, 5, 6, 7])

In [4]:
arr.flatten() # 产生原数据副本

array([1.5, 3. , 5. ])

# 数组的合并拆分

In [5]:
arr1 = np.array([[1,2,3],[4,5,6]])
arr2 = np.array([[7,8,9],[10,11,12]])

In [6]:
np.concatenate([arr1,arr2],axis = 0) #按照指定轴将数组的序列连接在一起

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [8]:
np.concatenate([arr1,arr2],axis = 1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [10]:
from numpy.random import randn
arr = randn(5,2)

In [11]:
first,second,third = np.split(arr,[1,3])#按指定轴拆分成多个数组

In [12]:
first

array([[ 2.04500547, -0.36190004]])

In [13]:
second

array([[ 0.57397815, -1.13497057],
       [ 0.18109315, -0.65121849]])

![image.png](attachment:image.png)

# 重复操作

In [19]:
arr = np.arange(3)

In [20]:
arr.repeat(3)#0,1,2三个数每个都重复三遍

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [21]:
arr.repeat([2,3,4])#0重复2遍，1重复3遍，2重复4遍

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [22]:
arr = randn(2,2)
arr.repeat(2,axis = 0)

array([[ 0.43505483, -1.23283496],
       [ 0.43505483, -1.23283496],
       [ 0.27870024, -2.84816555],
       [ 0.27870024, -2.84816555]])

In [23]:
np.tile(arr,2)#沿横轴方向的平铺

array([[ 0.43505483, -1.23283496,  0.43505483, -1.23283496],
       [ 0.27870024, -2.84816555,  0.27870024, -2.84816555]])

In [25]:
np.tile(arr,(2,1))#复制两行一列

array([[ 0.43505483, -1.23283496],
       [ 0.27870024, -2.84816555],
       [ 0.43505483, -1.23283496],
       [ 0.27870024, -2.84816555]])

# 花式索引的等价函数

In [40]:
arr = np.arange(10)*100
inds = [7,1,2,6]
arr.take(inds)#获取第7，1，2，6上面的数据

array([700, 100, 200, 600])

In [41]:
arr.put(inds,[40,41,42,43])#在arr的第7，1，2，6位上面分别设置40，41，42，43
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [42]:
inds = [2,0,2,1]
arr = randn(2,4)
arr.take(inds,axis=1)

array([[-0.00595775, -0.32191533, -0.00595775, -1.64747609],
       [-1.72936097,  0.65744465, -1.72936097,  2.17535572]])

# 广播

In [43]:
#通过减去列平均值的方式对数组的每一列进行距平化处理
arr = randn(4,3)
arr.mean(0)
demeaned = arr - arr.mean(0)
demeaned

array([[ 1.50955267,  0.38404593,  0.28258395],
       [ 1.31284836, -0.37263528,  1.00482661],
       [-0.87542568,  0.70670245, -0.70119715],
       [-1.94697536, -0.71811309, -0.58621342]])

In [47]:
#对每一行进行广播
row_means = arr.mean(1).reshape((4,1))
row_means

array([[ 0.74479265],
       [ 0.66774503],
       [-0.270575  ],
       [-1.06436882]])

In [49]:
demeaned = arr -row_means.reshape((4,1))
demeaned

array([[ 0.8178301 ,  0.13787746, -0.95570756],
       [ 0.69817341, -0.54175613, -0.15641728],
       [-0.55178061,  1.47590163, -0.92412102],
       [-0.82953646,  0.84487991, -0.01534345]])

# ufunc实例方法

In [29]:
arr = np.arange(10)
np.add.reduce(arr)#reduce接受一个数组参数，通过二元运算对值进行聚合
#np.logical_and检查数组各行的值是否是有序的，logical_and.reduce与all方法等价

45

In [30]:
arr = np.arange(15).reshape((3,5))
np.add.accumulate(arr,axis=1)#产生和原数组大小相同的中间累计值数组

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]], dtype=int32)

In [31]:
arr = np.arange(3).repeat([1,2,2])
np.multiply.outer(arr,np.arange(5))#叉积

array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [32]:
np.subtract.outer(randn(3,4),randn(5))

array([[[-1.14384505, -1.01183593, -1.56660792,  0.93154361,
         -1.02004963],
        [-0.09011289,  0.04189623, -0.51287576,  1.98527577,
          0.03368253],
        [-0.34729873, -0.21528961, -0.7700616 ,  1.72808993,
         -0.22350331],
        [-0.35366215, -0.22165303, -0.77642502,  1.72172651,
         -0.22986673]],

       [[-0.42525636, -0.29324724, -0.84801923,  1.6501323 ,
         -0.30146094],
        [-0.28457772, -0.1525686 , -0.70734059,  1.79081094,
         -0.1607823 ],
        [ 0.05386126,  0.18587038, -0.36890161,  2.12924992,
          0.17765668],
        [-0.28335732, -0.15134819, -0.70612019,  1.79203134,
         -0.1595619 ]],

       [[-0.76502826, -0.63301914, -1.18779114,  1.3103604 ,
         -0.64123284],
        [ 0.38484915,  0.51685827, -0.03791372,  2.46023781,
          0.50864457],
        [-1.01936987, -0.88736075, -1.44213274,  1.05601879,
         -0.89557445],
        [ 1.84098505,  1.97299417,  1.41822217,  3.91637371,
          1

In [33]:
arr = np.multiply.outer(np.arange(4),np.arange(5))
np.add.reduceat(arr,[0,2,4], axis=1)#在arr[0:5],arr[5:8]和arr[8:]上执行约简（求和）

array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]], dtype=int32)

![image.png](attachment:image.png)

In [34]:
def add_elements(x,y):
    return x + y
add_them = np.frompyfunc(add_elements,2,1)#自定义函数，接受python函数和两个分别表示输入输出参数数量的整数
add_them(np.arange(8),np.arange(8))

array([0, 2, 4, 6, 8, 10, 12, 14], dtype=object)

# 结构化数组

结构化数组的各个元素可以被看成C语言中的结构体

In [36]:
dtype = [('x',np.float64),('y',np.int32)]
sarr = np.array([(1.5,6),(np.pi,-2)],dtype = dtype)
sarr

array([(1.5       ,  6), (3.14159265, -2)],
      dtype=[('x', '<f8'), ('y', '<i4')])

In [37]:
sarr[0]

(1.5, 6)

In [38]:
sarr['x']

array([1.5       , 3.14159265])

In [39]:
dtype = [('x',[('a','f8'),('b','f4')]),('y',np.int32)]#嵌套dtype
data = np.array([((1,2),5),((3,4),6)],dtype = dtype)
data['x']

array([(1., 2.), (3., 4.)], dtype=[('a', '<f8'), ('b', '<f4')])

# 排序

In [27]:
arr = randn(3,5)#sort first column
arr[:,0].sort()
arr

array([[-0.20209591,  0.3596223 ,  0.82835374,  0.30492033,  0.78372684],
       [ 0.26231561, -0.45221047, -0.68202043, -0.85952871,  0.55364928],
       [ 0.84227388,  1.33874839, -1.30524543, -0.50302878,  0.68416499]])

In [28]:
np.sort(arr)

array([[-0.20209591,  0.30492033,  0.3596223 ,  0.78372684,  0.82835374],
       [-0.85952871, -0.68202043, -0.45221047,  0.26231561,  0.55364928],
       [-1.30524543, -0.50302878,  0.68416499,  0.84227388,  1.33874839]])

# Matrix类

![image.png](attachment:image.png)

In [None]:
np.dot(y.T,np.dot(X,y))

In [None]:
y.T*X*y#*表示矩阵的乘法

In [None]:
X.I*X#I表示矩阵的逆