## 2.1 认识NumPy数组对象

In [2]:
import numpy as np                     # 导入NumPy工具包

In [3]:
data = np.arange(12).reshape(3, 4)  # 创建一个3行4列的数组

In [4]:
data 

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [5]:
type(data)

numpy.ndarray

In [6]:
data.ndim         # 数组维度的个数，输出结果2，表示二维数组

2

In [7]:
data.shape        # 数组的维度，输出结果（3，4），表示3行4列

(3, 4)

In [8]:
data.size         # 数组元素的个数，输出结果12，表示总共有12个元素

12

In [9]:
data.dtype # 数组元素的类型，输出结果dtype('int64'),表示元素类型都是int64

dtype('int32')

## 2.2 创建NumPy数组

In [10]:
import numpy as np

In [11]:
data1 = np.array([1, 2, 3])                   # 创建一个一维数组

In [12]:
data1

array([1, 2, 3])

In [13]:
data2 = np.array([[1, 2, 3], [4, 5, 6]])   # 创建一个二维数组

In [14]:
data2

array([[1, 2, 3],
       [4, 5, 6]])

In [15]:
np.zeros((3, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [16]:
np.ones((3, 4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [17]:
np.empty((5, 2))

array([[2.26658445e-317, 2.32210854e-322],
       [0.00000000e+000, 0.00000000e+000],
       [0.00000000e+000, 1.16095484e-028],
       [7.49232601e+247, 2.09601532e-110],
       [2.91237123e+257, 4.71294503e+257]])

In [18]:
np.arange(1, 20, 5)

array([ 1,  6, 11, 16])

In [19]:
np.array([1, 2, 3, 4], float)

array([1., 2., 3., 4.])

In [20]:
np.ones((2, 3), dtype='float64')

array([[1., 1., 1.],
       [1., 1., 1.]])

## 2.3 ndarry对象的数据类型

### 2.3.1 查看数据类型

In [21]:
data_one = np.array([[1, 2, 3], [4, 5, 6]])

In [22]:
data_one.dtype.name

'int32'

### 2.3.2 转换数据类型

In [23]:
data = np.array([[1, 2, 3], [4, 5, 6]]) 

In [24]:
data.dtype

dtype('int32')

In [25]:
float_data = data.astype(np.float64) # 数据类型转换为float64

In [26]:
float_data.dtype

dtype('float64')

In [27]:
float_data = np.array([1.2, 2.3, 3.5])

In [28]:
float_data

array([1.2, 2.3, 3.5])

In [29]:
int_data = float_data.astype(np.int64) # 数据类型转换为int64

In [30]:
int_data

array([1, 2, 3], dtype=int64)

In [31]:
str_data = np.array(['1', '2', '3'])

In [32]:
int_data = str_data.astype(np.int64)

In [33]:
int_data

array([1, 2, 3], dtype=int64)

## 2.4 数组运算

### 2.4.1 矢量化运算

In [34]:
import numpy as np

In [35]:
data1 = np.array([[1, 2, 3], [4, 5, 6]])

In [36]:
data2 = np.array([[1, 2, 3], [4, 5, 6]])

In [37]:
data1 + data2        # 数组相加

array([[ 2,  4,  6],
       [ 8, 10, 12]])

In [38]:
data1 * data2        # 数组相乘

array([[ 1,  4,  9],
       [16, 25, 36]])

In [39]:
data1 - data2        # 数组相减

array([[0, 0, 0],
       [0, 0, 0]])

In [40]:
data1 / data2       # 数组相除

array([[1., 1., 1.],
       [1., 1., 1.]])

### 2.4.2 数组广播

In [41]:
import numpy as np

In [42]:
arr1 = np.array([[0], [1], [2], [3]])

In [43]:
arr1.shape

(4, 1)

In [44]:
arr2 = np.array([1, 2, 3])

In [45]:
arr2.shape

(3,)

In [46]:
arr1 + arr2

array([[1, 2, 3],
       [2, 3, 4],
       [3, 4, 5],
       [4, 5, 6]])

### 2.4.3 数组与标量间的运算

In [47]:
import numpy as np

In [48]:
data1 = np.array([[1, 2, 3], [4, 5, 6]])

In [49]:
data2 = 10

In [50]:
data1 + data2      # 数组相加

array([[11, 12, 13],
       [14, 15, 16]])

In [51]:
data1 * data2       # 数组相乘

array([[10, 20, 30],
       [40, 50, 60]])

In [52]:
data1 - data2        # 数组相减

array([[-9, -8, -7],
       [-6, -5, -4]])

In [53]:
data1 / data2       # 数组相除

array([[0.1, 0.2, 0.3],
       [0.4, 0.5, 0.6]])

## 2.5 ndarray的索引和切片

### 2.5.1 整数索引和切片的基本使用

In [54]:
import numpy as np

In [55]:
arr = np.arange(8)    # 创建一个一维数组

In [56]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [57]:
arr[5]                  # 获取索引为5的元素

5

In [58]:
arr[3:5]                # 获取索引为3~5的元素，但不包括5

array([3, 4])

In [59]:
arr[1:6:2]              # 获取索引为1~6的元素，步长为2

array([1, 3, 5])

In [60]:
import numpy as np

In [61]:
arr2d = np.array([[1, 2, 3],[4, 5, 6],[7, 8, 9]]) # 创建二维数组

In [62]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [63]:
arr2d[1]            # 获取索引为1的元素

array([4, 5, 6])

In [64]:
arr2d[0, 1]        # 获取位于第0行第1列的元素

2

In [65]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [66]:
arr2d[0:2, 0:2]

array([[1, 2],
       [4, 5]])

In [67]:
arr2d[1, :2]

array([4, 5])

### 2.5.2 花式（数组）索引的基本使用

In [68]:
import numpy as np

In [69]:
demo_arr = np.empty((4, 4))               # 创建一个空数组
for i in range(4):
    demo_arr[i] = np.arange(i, i + 4)   # 动态地为数组添加元素

In [70]:
demo_arr

array([[0., 1., 2., 3.],
       [1., 2., 3., 4.],
       [2., 3., 4., 5.],
       [3., 4., 5., 6.]])

In [71]:
demo_arr[[0, 2]]        # 获取索引为[0,2]的元素

array([[0., 1., 2., 3.],
       [2., 3., 4., 5.]])

In [72]:
demo_arr[[1, 3], [1, 2]]     # 获取索引为(1,1)和(3,2)的元素

array([2., 5.])

### 2.5.3 布尔型

In [73]:
# 存储学生姓名的数组
student_name = np.array(['Tom', 'Lily', 'Jack', 'Rose'])

In [74]:
student_name

array(['Tom', 'Lily', 'Jack', 'Rose'], dtype='<U4')

In [75]:
# 存储学生成绩的数组
student_score = np.array([[79, 88, 80], [89, 90, 92], [83, 78, 85], [78, 76, 80]])

In [76]:
student_score

array([[79, 88, 80],
       [89, 90, 92],
       [83, 78, 85],
       [78, 76, 80]])

In [77]:
# 对student_name和字符串“Jack”通过运算符产生一个布尔型数组
student_name == 'Jack'

array([False, False,  True, False])

In [78]:
# 将布尔数组作为索引应用于存储成绩的数组student_score，
# 返回的数据是True值对应的行
student_score[student_name=='Jack']

array([[83, 78, 85]])

In [79]:
student_score[student_name=='Jack', :1]

array([[83]])

## 2.6 数组的转置和轴对称

In [80]:
arr = np.arange(12).reshape(3, 4)

In [81]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [82]:
arr.T      # 使用T属性对数组进行转置

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [83]:
arr = np.arange(16).reshape((2, 2, 4)) 

In [84]:
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [85]:
arr.transpose(1, 2, 0)   # 使用transpose()方法对数组进行转置

array([[[ 0,  8],
        [ 1,  9],
        [ 2, 10],
        [ 3, 11]],

       [[ 4, 12],
        [ 5, 13],
        [ 6, 14],
        [ 7, 15]]])

In [86]:
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [87]:
arr.swapaxes(1, 0)    # 使用swapaxes方法对数组进行转置

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

## 2.7 NumPy通用函数

In [88]:
arr = np.array([4, 9, 16])

In [89]:
np.sqrt(arr)

array([2., 3., 4.])

In [90]:
np.abs(arr)

array([ 4,  9, 16])

In [91]:
np.square(arr)

array([ 16,  81, 256], dtype=int32)

In [92]:
x = np.array([12, 9, 13, 15])

In [93]:
y = np.array([11, 10, 4, 8])

In [94]:
np.add(x, y)      # 计算两个数组的和

array([23, 19, 17, 23])

In [95]:
np.multiply(x, y) # 计算两个数组的乘积

array([132,  90,  52, 120])

In [96]:
np.maximum(x, y)  # 两个数组元素级最大值的比较

array([12, 10, 13, 15])

In [97]:
np.greater(x, y)  # 执行元素级的比较操作

array([ True, False,  True,  True])

## 2.8 利用NumPy数组进行数据处理

### 2.8.1 将条件逻辑转为数组运算

In [98]:
arr_x = np.array([1, 5, 7])

In [99]:
arr_y = np.array([2, 6, 8])

In [100]:
arr_con = np.array([True, False, True])

In [101]:
result = np.where(arr_con, arr_x, arr_y)

In [102]:
result

array([1, 6, 7])

### 2.8.2 数组统计运算

In [103]:
arr = np.arange(10)     

In [104]:
arr.sum()      # 求和

45

In [105]:
arr.mean()     # 求平均值 

4.5

In [106]:
arr.min()      # 求最小值

0

In [107]:
arr.max()       # 求最大值

9

In [108]:
arr.argmin()   # 求最小值的索引

0

In [109]:
arr.argmax()   # 求最大值的索引

9

In [110]:
arr.cumsum()   # 计算元素的累计和

array([ 0,  1,  3,  6, 10, 15, 21, 28, 36, 45], dtype=int32)

In [111]:
arr.cumprod()  # 计算元素的累计积

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

### 2.8.3 数组排序

In [112]:
arr = np.array([[6, 2, 7], [3, 6, 2], [4, 3, 2]])

In [113]:
arr

array([[6, 2, 7],
       [3, 6, 2],
       [4, 3, 2]])

In [114]:
arr.sort()

In [115]:
arr

array([[2, 6, 7],
       [2, 3, 6],
       [2, 3, 4]])

In [116]:
arr = np.array([[6, 2, 7], [3, 6, 2], [4, 3, 2]])

In [117]:
arr

array([[6, 2, 7],
       [3, 6, 2],
       [4, 3, 2]])

In [118]:
arr.sort(0)       # 沿着编号为0的轴对元素排序

In [119]:
arr

array([[3, 2, 2],
       [4, 3, 2],
       [6, 6, 7]])

### 2.8.4 检索数组元素

In [120]:
arr = np.array([[1, -2, -7], [-3, 6, 2], [-4, 3, 2]])

In [121]:
arr

array([[ 1, -2, -7],
       [-3,  6,  2],
       [-4,  3,  2]])

In [122]:
np.any(arr > 0)      # arr的所有元素是否有一个大于0

True

In [123]:
np.all(arr > 0)      # arr的所有元素是否都大于0

False

### 2.8.5 唯一化及其他集合逻辑

In [124]:
arr = np.array([12, 11, 34, 23, 12, 8, 11])

In [125]:
np.unique(arr)

array([ 8, 11, 12, 23, 34])

In [126]:
np.in1d(arr, [11, 12])

array([ True,  True, False, False,  True, False,  True])

## 2.9 线性代数模块

In [127]:
arr_x = np.array([[1, 2, 3], [4, 5, 6]])

In [128]:
arr_y = np.array([[1, 2], [3, 4], [5, 6]])

In [129]:
arr_x.dot(arr_y)   # 等价于np.dot(arr_x, arr_y)

array([[22, 28],
       [49, 64]])

## 2.10随机数模块

In [130]:
import numpy as np

In [131]:
np.random.rand(3, 3)     # 随机生成一个二维数组

array([[0.88271332, 0.89141968, 0.63803249],
       [0.68241233, 0.61433215, 0.4493645 ],
       [0.69786392, 0.07956108, 0.33203826]])

In [132]:
np.random.rand(2, 3, 3) # 随机生成一个三维数组

array([[[0.22607381, 0.95736878, 0.47247963],
        [0.08801577, 0.99162113, 0.57570546],
        [0.8455419 , 0.40145295, 0.19392599]],

       [[0.96464639, 0.10871251, 0.34226665],
        [0.65920344, 0.74579427, 0.02163981],
        [0.90476319, 0.62201272, 0.20389291]]])

In [133]:
import numpy as np

In [134]:
np.random.seed(0)   # 生成随机数的种子

In [135]:
np.random.rand(5)   # 随机生成包含5个元素的浮点数组

array([0.5488135 , 0.71518937, 0.60276338, 0.54488318, 0.4236548 ])

In [136]:
np.random.seed(0)

In [137]:
np.random.rand(5)

array([0.5488135 , 0.71518937, 0.60276338, 0.54488318, 0.4236548 ])

In [138]:
np.random.seed()

In [139]:
np.random.rand(5)

array([0.6531853 , 0.37799153, 0.30382486, 0.83366978, 0.58776806])

## 2.11案例—酒鬼漫步

In [140]:
# 导入numpy包
import numpy as np
steps = 2000
draws = np.random.randint(0, 2, size=steps)
# 当元素为1时，direction_steps为1，
# 当元素为0时，direction_steps为-1
direction_steps = np.where(draws > 0, 1, -1)
# 使用cumsum()计算步数累计和
distance = direction_steps.cumsum()

In [141]:
# 使用max()计算向前走的最远距离
distance.max()

-1

In [142]:
# 使用min()计算向后走的最远距离
distance.min()

-59

In [143]:
# 15米换算成步数
steps = 15 / 0.5
(np.abs(distance) >= steps).argmax()

275