In [1]:
import numpy as np

In [2]:
# Numpy数据类型体系
ints = np.ones(10,dtype=np.uint16)
floats = np.ones(10,dtype=np.float32)

In [3]:
# dtype有两个超类，为np.integer和np.floating
np.issubdtype(ints.dtype,np.integer)

True

In [6]:
np.issubdtype(floats.dtype,np.floating)

True

In [10]:
np.float64.mro() # 查看所有的父类

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [11]:
# 数组重塑
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [17]:
arr.reshape((2,4)).reshape((4,2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [19]:
arr.reshape((4,-1)) # -1表示该维度的大小由数据本身推断出来

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [20]:
# 扁平化，将多维数组转化为一维数组
arr2 = np.arange(15).reshape((5,3))
arr2

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [22]:
arr2.ravel()  # 不会产生源数据的副本
arr2.flatten() # 会返回数据的副本

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [25]:
# C和Fortran顺序，这个跟数据存储顺序有关
arr2.ravel()
arr2.ravel('C') #行优先顺序

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [24]:
arr2.ravel('F') #列优先顺序

array([ 0,  3,  6,  9, 12,  1,  4,  7, 10, 13,  2,  5,  8, 11, 14])

In [27]:
# 数据的合并与重组
arr3 = np.array([[1,2,3],[4,5,6]])
arr4 = np.array([[7,8,9],[10,11,12]])

In [28]:
np.concatenate([arr3,arr4],axis=0) # 按行合并

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [29]:
np.concatenate([arr3,arr4],axis=1) # 按列合并

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [30]:
# 对于常见的连接操作，numpy提供了简单的方法
np.vstack((arr3,arr4))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [31]:
np.hstack((arr3,arr4))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [32]:
from numpy.random import randn
arr5 = randn(5,2)

In [33]:
arr5

array([[ 1.35863263, -0.96127544],
       [ 0.09216231,  1.65104472],
       [-0.1317638 ,  0.40638075],
       [ 0.3388668 ,  0.31790493],
       [ 0.47044151,  0.69776066]])

In [42]:
first,second,third = np.split(arr5,[1,3])

In [43]:
print (first)
print (second)
print (third)

[[ 1.35863263 -0.96127544]]
[[ 0.09216231  1.65104472]
 [-0.1317638   0.40638075]]
[[ 0.3388668   0.31790493]
 [ 0.47044151  0.69776066]]


In [44]:
# 元素的重复操作：tile和repeat
arr6 = np.arange(3)
arr6.repeat(2)

array([0, 0, 1, 1, 2, 2])

In [45]:
arr6.repeat([2,3,4]) # 可以不同的重复

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [46]:
arr7 = randn(2,2)
arr7

array([[-0.20261023,  1.01461797],
       [ 0.37792664, -0.06491642]])

In [48]:
arr7.repeat(2,axis=0) # 沿指定轴重复

array([[-0.20261023, -0.20261023,  1.01461797,  1.01461797],
       [ 0.37792664,  0.37792664, -0.06491642, -0.06491642]])

In [49]:
arr7.repeat(2,axis=1) # 沿指定轴重复

array([[-0.20261023, -0.20261023,  1.01461797,  1.01461797],
       [ 0.37792664,  0.37792664, -0.06491642, -0.06491642]])

In [50]:
arr7.repeat([2,3],axis=0) 

array([[-0.20261023,  1.01461797],
       [-0.20261023,  1.01461797],
       [ 0.37792664, -0.06491642],
       [ 0.37792664, -0.06491642],
       [ 0.37792664, -0.06491642]])

In [51]:
np.tile(arr7,2) #沿指定轴向堆叠数组的副本，可以想象成铺瓷砖，跟repeat存在差别，是整个数据重复

array([[-0.20261023,  1.01461797, -0.20261023,  1.01461797],
       [ 0.37792664, -0.06491642,  0.37792664, -0.06491642]])

In [52]:
np.tile(arr7,(2,1)) #第二个参数可以是元组

array([[-0.20261023,  1.01461797],
       [ 0.37792664, -0.06491642],
       [-0.20261023,  1.01461797],
       [ 0.37792664, -0.06491642]])

In [53]:
# 花式索引的等价函数：take 和put
arr = np.arange(10) * 100
inds = [7,1,2,6]

In [54]:
arr[inds]

array([700, 100, 200, 600])

In [55]:
arr.take(inds) # 获取索引数据

array([700, 100, 200, 600])

In [56]:
arr.put(inds,42) # 设置索引数据
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [57]:
# 广播:是指不同形状的数组之间的算术运算的执行方式。
arr = np.arange(5)
arr * 4

array([ 0,  4,  8, 12, 16])

In [58]:
arr = randn(4,3)

In [59]:
arr

array([[-0.32443932,  1.1336235 , -0.61277699],
       [-1.62105563, -1.28072677, -0.75790843],
       [ 1.56912107, -0.11752484, -0.62659309],
       [-1.27348759,  1.76910226,  0.15660397]])

In [61]:
arr.mean(0)

array([-0.41246537,  0.37611854, -0.46016863])

In [65]:
demeaned = arr - arr.mean(0) # 广播
demeaned

array([[ 0.08802605,  0.75750496, -0.15260836],
       [-1.20859026, -1.65684531, -0.29773979],
       [ 1.98158644, -0.49364338, -0.16642445],
       [-0.86102222,  1.39298372,  0.61677261]])

In [64]:
demeaned.mean(0)

array([  0.00000000e+00,   0.00000000e+00,   2.77555756e-17])

In [None]:
# 广播的原则：