# 生成ndarray

### array方法

1. array接受任意 [序列型对象],生成一个新的包含传递数据NumPy数组


In [6]:
import numpy as np

In [7]:
data_1 = [6, 3.4, 9, 5, 7]

In [8]:
arr = np.array(data_1)

In [9]:
arr

array([6. , 3.4, 9. , 5. , 7. ])

2. 嵌套序列，等长度的列表，将会转换 <b>多维数组</b>

In [10]:
data_2 = [[1, 2, 3, 4], [5, 6, 7, 8]]

In [11]:
arr = np.array(data_2)

In [12]:
arr

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

* ndim： 数组行数
* shape：数组形状

In [13]:
arr.ndim

2

In [14]:
arr.shape

(2, 4)

# ndarray的数据类型

* ndarray的数据类型存储在dtype属性[元数据]当中
* 除非显式的指定，否则numpy会自动推断生成数组的数据类型

In [15]:
arr.dtype

dtype('int32')

In [16]:
arr1 = np.array([1, 2, 3], dtype='float')

In [17]:
arr2 = np.array([1, 2, 3], dtype='int')

In [18]:
arr1.dtype

dtype('float64')

In [19]:
arr2.dtype

dtype('int32')

* 使用astype方法替换数组的数据类型
* astype方法是将数组复制并替换成目标数据类型，在返回修改后的数组
    * astype方法不会修改数组

In [20]:
arr = np.array([1, 2, 3, 4, 5])

In [21]:
arr.dtype

dtype('int32')

In [22]:
float_arr = arr.astype(np.float64)

In [23]:
float_arr.dtype

dtype('float64')

* 浮点数转换整数类型是将小数点后的数据忽略
* float -> int -> float 将会丢失数据

In [24]:
arr = np.array([3.1, 4.32, 5.33])

In [25]:
float_arr = arr.astype(np.int32)

In [26]:
float_arr

array([3, 4, 5])

In [27]:
int_arr = float_arr.astype(np.float64)

In [28]:
int_arr

array([3., 4., 5.])

* 可以从其他ndarray数组中获取dtype属性，做类型转换

In [29]:
int_arr = np.arange(10)

In [30]:
calibers = np.array([.22, .39, .13, .567], dtype=np.float64)

In [31]:
int_arr.astype(calibers.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

# NumPy数组算术

In [32]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])

In [33]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [34]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [35]:
arr ** 0.5

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [36]:
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

* 数组之间的比较会产生布尔值

In [37]:
arr2 = np.array([[1., 0.22, 2.3], [7.2, 5.4, 1.4]])

In [38]:
arr2 > arr

array([[False, False, False],
       [ True,  True, False]])

# 基础索引与切片

In [39]:
arr = np.arange(10)

In [40]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [41]:
arr[5]

5

In [42]:
arr[5:8]

array([5, 6, 7])

In [43]:
arr[5: 8] = 12

In [44]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

* numpy切片返回的是数组的视图
* 任何对视图的修改，都会反应到原数组上

In [45]:
arr_slice = arr[5: 8]

In [46]:
arr_slice

array([12, 12, 12])

In [47]:
arr_slice[1] = 12345

In [48]:
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
           9])

* 使用cope方法获取视图的拷贝

In [49]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [53]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [50]:
arr2d[2]

array([7, 8, 9])

In [51]:
arr2d[0][2]

3

In [52]:
arr2d[0, 2]

3

In [54]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

In [55]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [57]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [59]:
old_values = arr3d[0].copy()

In [60]:
arr3d[0] = 42

In [61]:
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [62]:
arr3d[0] = old_values

In [63]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [64]:
arr3d[1, 0]

array([7, 8, 9])

In [65]:
arr3d[0, 1]

array([4, 5, 6])

* 数组的切片索引

In [66]:
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
           9])

In [67]:
arr[1: 6]

array([ 1,  2,  3,  4, 12])

In [68]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [69]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [70]:
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [71]:
arr2d[1, :2]

array([4, 5])

In [72]:
arr2d[:2, 2]

array([3, 6])

In [73]:
arr2d[:2, 1:] = 0

In [74]:
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

* 布尔索引

In [75]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

In [76]:
data = np.random.randn(7, 4)

In [77]:
data

array([[-0.0937235 ,  1.31799233,  1.34732102, -0.11028228],
       [-0.10158959, -0.49464566, -1.34454876, -1.08573752],
       [ 0.30141971,  0.95021548,  1.0065367 ,  1.55530229],
       [-1.04598614, -2.02119487,  1.27361311,  0.91942274],
       [ 0.49897635, -0.01285819,  1.01087831, -0.95450928],
       [-0.77709759,  0.32461758, -1.24237629, -0.19049386],
       [-0.3408991 ,  1.77429795,  0.66135014, -0.25484669]])

In [78]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [79]:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [80]:
data[names == 'Bob']

array([[-0.0937235 ,  1.31799233,  1.34732102, -0.11028228],
       [-1.04598614, -2.02119487,  1.27361311,  0.91942274]])

布尔值数组的长度必须和数组轴索引的长度一致


布尔值数组的长度不正确时， 布尔值数组选择数据的方法不会报错

In [84]:
data[names == 'Bob', : 2]

array([[-0.0937235 ,  1.31799233],
       [-1.04598614, -2.02119487]])

In [85]:
data[~(names != 'Bob')]

array([[-0.0937235 ,  1.31799233,  1.34732102, -0.11028228],
       [-1.04598614, -2.02119487,  1.27361311,  0.91942274]])

In [86]:
cond = names == 'Bob'

In [87]:
data[~cond]

array([[-0.10158959, -0.49464566, -1.34454876, -1.08573752],
       [ 0.30141971,  0.95021548,  1.0065367 ,  1.55530229],
       [ 0.49897635, -0.01285819,  1.01087831, -0.95450928],
       [-0.77709759,  0.32461758, -1.24237629, -0.19049386],
       [-0.3408991 ,  1.77429795,  0.66135014, -0.25484669]])

In [88]:
mask = (names == 'Bob') | (names == 'Will')

In [89]:
mask

array([ True, False,  True,  True,  True, False, False])

In [90]:
data[mask]

array([[-0.0937235 ,  1.31799233,  1.34732102, -0.11028228],
       [ 0.30141971,  0.95021548,  1.0065367 ,  1.55530229],
       [-1.04598614, -2.02119487,  1.27361311,  0.91942274],
       [ 0.49897635, -0.01285819,  1.01087831, -0.95450928]])

In [91]:
data

array([[-0.0937235 ,  1.31799233,  1.34732102, -0.11028228],
       [-0.10158959, -0.49464566, -1.34454876, -1.08573752],
       [ 0.30141971,  0.95021548,  1.0065367 ,  1.55530229],
       [-1.04598614, -2.02119487,  1.27361311,  0.91942274],
       [ 0.49897635, -0.01285819,  1.01087831, -0.95450928],
       [-0.77709759,  0.32461758, -1.24237629, -0.19049386],
       [-0.3408991 ,  1.77429795,  0.66135014, -0.25484669]])

需要注意 python 中 bool运算的关键字对布尔值数组没有效果吗需要用 & | 代替

In [92]:
data[data > 0] = 0

In [93]:
data

array([[-0.0937235 ,  0.        ,  0.        , -0.11028228],
       [-0.10158959, -0.49464566, -1.34454876, -1.08573752],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [-1.04598614, -2.02119487,  0.        ,  0.        ],
       [ 0.        , -0.01285819,  0.        , -0.95450928],
       [-0.77709759,  0.        , -1.24237629, -0.19049386],
       [-0.3408991 ,  0.        ,  0.        , -0.25484669]])

In [94]:
data[names != 'Joe'] = 7

In [95]:
data

array([[ 7.        ,  7.        ,  7.        ,  7.        ],
       [-0.10158959, -0.49464566, -1.34454876, -1.08573752],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [-0.77709759,  0.        , -1.24237629, -0.19049386],
       [-0.3408991 ,  0.        ,  0.        , -0.25484669]])

# 神奇索引

神奇索引是numpy中的术语， 用于描述使用整数数组进行数据索引

In [96]:
arr = np.empty((8, 4))

In [97]:
for i in range(8):
    arr[i] = i

In [98]:
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [99]:
arr[[4, 3, 0, 6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [100]:
arr[[-3, -5, -7]]

array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])

In [101]:
arr = np.arange(32).reshape((8, 4))

In [102]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [103]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

# 数组转置和换轴

转置是数组的一种特殊的重组形式

可以返回底层数据的视图而不需要赋值任何内容

数组拥有的transpose方法， 也有T属性

In [107]:
arr = np.arange(32).reshape((8, 4))

In [108]:
arr @ arr.T

array([[  14,   38,   62,   86,  110,  134,  158,  182],
       [  38,  126,  214,  302,  390,  478,  566,  654],
       [  62,  214,  366,  518,  670,  822,  974, 1126],
       [  86,  302,  518,  734,  950, 1166, 1382, 1598],
       [ 110,  390,  670,  950, 1230, 1510, 1790, 2070],
       [ 134,  478,  822, 1166, 1510, 1854, 2198, 2542],
       [ 158,  566,  974, 1382, 1790, 2198, 2606, 3014],
       [ 182,  654, 1126, 1598, 2070, 2542, 3014, 3486]])

In [109]:
arr.transpose(1)

ValueError: axes don't match array

In [111]:
arr.T

array([[ 0,  4,  8, 12, 16, 20, 24, 28],
       [ 1,  5,  9, 13, 17, 21, 25, 29],
       [ 2,  6, 10, 14, 18, 22, 26, 30],
       [ 3,  7, 11, 15, 19, 23, 27, 31]])

In [112]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [113]:
arr = np.random.randn(6, 3)

In [114]:
arr

array([[-3.41767584e-02, -2.99219626e-01,  5.45555861e-01],
       [ 3.73957039e-01,  3.21428036e-02,  1.05984884e+00],
       [-2.04302536e+00, -1.36496800e+00,  1.73195270e+00],
       [-1.15145543e+00, -1.79252881e+00, -6.78871141e-01],
       [ 6.86309871e-04, -1.61327776e+00, -7.18108537e-02],
       [-5.28428281e-01,  3.15476200e-01,  2.50694536e+00]])

np.dot() 等价与 @ 运算

In [117]:
np.dot(arr.T, arr)

array([[ 5.92005108,  4.7071139 , -3.70383094],
       [ 4.7071139 ,  7.86905309, -0.36960598],
       [-3.70383094, -0.36960598, 11.17136878]])

In [118]:
arr.T @ arr

array([[ 5.92005108,  4.7071139 , -3.70383094],
       [ 4.7071139 ,  7.86905309, -0.36960598],
       [-3.70383094, -0.36960598, 11.17136878]])

transpose 换轴

In [119]:
arr = np.arange(16).reshape((2, 2, 4))

In [123]:
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [121]:
arr.transpose((1, 0, 2))

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

对于更高维度的数组、transpose方法可以接受包含轴编号的元组、用于置换轴

swapaxes方法、接收一对轴编号作为参数，并对轴进行调整用于重组数据

* swapaxes方法返回的也是数据的视图、而没有对数据进行复制