# Numpy Basics

## Numpy 简介
- Numpy 是 Python 语言的一个 library
- Numpy 主要支持矩阵操作和运算
- Numpy 非常高效， core代码由C语言写成
- pandas 是基于 Numpy 构建的一个 libary
- 现在比较流行的机器学习框架（Tensorflow/PyTorch等）语法都与 Numpy 比较接近

## Index
- 数据简介和数据的构造（ndarray)
- 数据的取值和复制
- 数字运算
- broadcasting
- 逻辑运算
- 数据高级操作
- 文件输入输出
- Numpy 写一个 Softmax

In [1]:
import numpy as np

## 数组的初始化

### 使用 list 初始化数组

In [2]:
# 调用np.array 从 list 初始化一个数据
l = [1, 2, 3]
type(l)

list

In [3]:
a = np.array([1, 2, 3])
type(a)

numpy.ndarray

In [4]:
a

array([1, 2, 3])

In [5]:
a[2]

3

In [6]:
a[0] = 5
a

array([5, 2, 3])

In [7]:
# 二维数组
b = np.array([[1, 2, 3], [2, 3, 4]])
b

array([[1, 2, 3],
       [2, 3, 4]])

In [8]:
type(b)

numpy.ndarray

In [9]:
# 数组的维度
b.shape

(2, 3)

In [10]:
b[0, 2]

3

### 内置构建数组的函数

In [11]:
# 初始化一个以 0 填充的数组
a = np.zeros((2, 3))
a

array([[0., 0., 0.],
       [0., 0., 0.]])

In [12]:
# 初始化一个以 1 填充的数组
b = np.ones((1, 2))
b

array([[1., 1.]])

In [13]:
# 初始化一个以自定义数（比如 8 ）填充的数组
c = np.full((2, 2), 8)
c

array([[8, 8],
       [8, 8]])

In [14]:
# 对角矩阵
d = np.eye(3, k=1)
d

array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 0.]])

In [15]:
# 随机数矩阵
e = np.random.random((3,2))
e

array([[0.72632882, 0.25040302],
       [0.29407011, 0.0981733 ],
       [0.98051318, 0.74730246]])

In [16]:
# 没有被初始化值的矩阵
f = np.empty((2, 3, 4))
f

array([[[3.10503618e+231, 3.10503618e+231, 1.03753786e-322,
         0.00000000e+000],
        [0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
         0.00000000e+000],
        [0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
         0.00000000e+000]],

       [[0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
         2.28093341e+243],
        [3.10503618e+231, 3.10503618e+231, 6.42285340e-323,
         0.00000000e+000],
        [0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
         0.00000000e+000]]])

In [17]:
g = np.arange(15)
g

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [18]:
g.shape

(15,)

In [19]:
g.dtype

dtype('int64')

### 指定数据类型的数组

In [20]:
# 不指定则自动匹配
int_arr = np.array([1,2,3,4,5])
'%s %s' % (int_arr, int_arr.dtype)

'[1 2 3 4 5] int64'

In [21]:
float_arr = np.array([1, 2, 3, 4, 5], dtype=np.float64)
'%s %s' % (float_arr, float_arr.dtype)

'[1. 2. 3. 4. 5.] float64'

### 使用 astype 复制数组并转换数据类型

In [22]:
float_arr = int_arr.astype(np.float64)
'%s %s' % (float_arr, float_arr.dtype)

'[1. 2. 3. 4. 5.] float64'

In [23]:
# astype 将 float 转 int 时，会丢掉小数部分
float_arr = np.array([3.5, 2.3, 3.4, -2.3])
int_arr = float_arr.astype(np.int64)
'%s(%s) => %s(%s)' % (float_arr, float_arr.dtype, int_arr, int_arr.dtype)

'[ 3.5  2.3  3.4 -2.3](float64) => [ 3  2  3 -2](int64)'

In [24]:
# astype 将字符串转数组，失败则抛异常
str_arr = np.array(['1.2', '2.4', '5.6', 'asdfa'], dtype=np.string_)
str_arr

array([b'1.2', b'2.4', b'5.6', b'asdfa'], dtype='|S5')

In [25]:
# float_arr = str_arr.astype(dtype=np.float64)
# float_arr

In [26]:
# astype 使用其他数组的数据类型作为参数
int_arr = np.arange(10)
float_arr = np.array([2.3, 4.5, 6.5])
int_arr.astype(dtype = float_arr.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

## 数组的取值和赋值

In [27]:
a = np.array([[1,2,3,4],[5,6,7,8],[9,10,12,11]])
a

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 12, 11]])

### 切片 - 对原数组对引用

In [28]:
# 多维数组从多个维度同事切片
b = a[0:2,2:4]
b

array([[3, 4],
       [7, 8]])

In [29]:
# 对切片出来的数组赋值, b 只是对a 切片区的引用，copy() 函数可以实现复制
b[0, 0] = 9999
b, a

(array([[9999,    4],
        [   7,    8]]), array([[   1,    2, 9999,    4],
        [   5,    6,    7,    8],
        [   9,   10,   12,   11]]))

In [30]:
# 取第一行，得到的是值
row_1 = a[1, :]
row_1, row_1.shape

(array([5, 6, 7, 8]), (4,))

In [31]:
# 切第一行，得到的是数组
row_2 = a[1:2, :]
row_2, row_2.shape

(array([[5, 6, 7, 8]]), (1, 4))

In [32]:
# [1] 与 1:2 效果一样
row_3 = a[[0, 2], :]
row_3, row_3.shape

(array([[   1,    2, 9999,    4],
        [   9,   10,   12,   11]]), (2, 4))

In [33]:
# 在第二个维度（列）切片，结果还是行，只有一个维度
col_1 = a[:, 1]
col_1, col_1.shape

(array([ 2,  6, 10]), (3,))

In [34]:
# 对每一行的每一列切片
col_2 = a[:, 1:2]
col_2, col_2.shape

(array([[ 2],
        [ 6],
        [10]]), (3, 1))

### 通过下标 - 重建新数组

In [35]:
a

array([[   1,    2, 9999,    4],
       [   5,    6,    7,    8],
       [   9,   10,   12,   11]])

In [36]:
b = a[[0, 1, 2, 2], [2, 1, 2, 3]]
b, b.shape

(array([9999,    6,   12,   11]), (4,))

In [37]:
# 等同于这样：
c = np.array([a[0,2], a[1,1], a[2,2], a[2,3]])
c, c.shape

(array([9999,    6,   12,   11]), (4,))

In [38]:
# 取 (0, 0),(2, 1),(1, 2),(0, 3)
i = np.array([0, 2, 1, 0])
a[i, np.array(np.arange(4))]

array([ 1, 10,  7,  4])

In [39]:
# 对指定范围赋值
a[i, np.array(np.arange(4))] +=10
a

array([[  11,    2, 9999,   14],
       [   5,    6,   17,    8],
       [   9,   20,   12,   11]])

### 通过条件

In [40]:
bool_index = a>2
bool_index, bool_index.shape

(array([[ True, False,  True,  True],
        [ True,  True,  True,  True],
        [ True,  True,  True,  True]]), (3, 4))

In [41]:
a[bool_index]

array([  11, 9999,   14,    5,    6,   17,    8,    9,   20,   12,   11])

## 数据类型

In [42]:
# 通过 dtype 看 numpy 的数组中元素的数据类型
x = np.array([2, 1])
y = np.array([1.0, 2.0])
z = np.array([1, 2], dtype=np.float64)
x.dtype, y.dtype, z.dtype

(dtype('int64'), dtype('float64'), dtype('float64'))

## 数学运算

In [43]:
x = np.array([[1,2],[3,4]])
y = np.array([[5,6],[7,8]])
x, y

(array([[1, 2],
        [3, 4]]), array([[5, 6],
        [7, 8]]))

### 逐元素相加

In [44]:
x + y

array([[ 6,  8],
       [10, 12]])

In [45]:
# 与上等同
np.add(x, y)

array([[ 6,  8],
       [10, 12]])

### 逐元素相减

In [46]:
x - y

array([[-4, -4],
       [-4, -4]])

In [47]:
# 与上等同
np.subtract(x, y)

array([[-4, -4],
       [-4, -4]])

### 逐元素相乘

In [48]:
x * y

array([[ 5, 12],
       [21, 32]])

In [49]:
# 与上等同
np.multiply(x, y)

array([[ 5, 12],
       [21, 32]])

### 逐元素相除

In [50]:
x/y

array([[0.2       , 0.33333333],
       [0.42857143, 0.5       ]])

In [51]:
# 与上等同
np.divide(x, y)

array([[0.2       , 0.33333333],
       [0.42857143, 0.5       ]])

### 逐元素求平方根

In [52]:
np.sqrt(x)

array([[1.        , 1.41421356],
       [1.73205081, 2.        ]])

### 向量内积

In [53]:
v = np.array([9, 10])
w = np.array([10, 11])

In [54]:
v.dot(w)

200

In [55]:
# 与上等同
np.dot(v, w)

200

### 矩阵乘法

In [56]:
x

array([[1, 2],
       [3, 4]])

In [57]:
v.dot(x)

array([39, 58])

In [58]:
x.dot(v)

array([29, 67])

In [59]:
x.dot(y)

array([[19, 22],
       [43, 50]])

In [60]:
y.dot(x)

array([[23, 34],
       [31, 46]])

### 矩阵转置

In [61]:
# 一维数组的转置是它自己
v.T

array([ 9, 10])

In [62]:
# 二维数组的转置
x.T

array([[1, 3],
       [2, 4]])

### 点积

In [63]:
arr = np.random.random((6, 3))
arr

array([[0.95801962, 0.71207858, 0.02549979],
       [0.26201689, 0.35137539, 0.78078507],
       [0.35862835, 0.69337681, 0.07969498],
       [0.01624003, 0.33134177, 0.78040369],
       [0.84462217, 0.19032069, 0.7791057 ],
       [0.31202472, 0.64722625, 0.61003117]])

In [64]:
arr.T.dot(arr)

array([[1.9260785 , 1.39099679, 1.11865759],
       [1.39099679, 1.67620314, 1.14945363],
       [1.11865759, 1.14945363, 2.2048005 ]])

In [65]:
arr.dot(arr.T)

array([[1.42550774, 0.52113406, 0.83934398, 0.27139978, 0.96455492,
        0.77535743],
       [0.52113406, 0.80174283, 0.39982688, 0.73000806, 0.89649337,
        0.78547835],
       [0.83934398, 0.39982688, 0.61573699, 0.29776309, 0.49696022,
        0.60928901],
       [0.27139978, 0.73000806, 0.29776309, 0.71908103, 0.68479485,
        0.69559097],
       [0.96455492, 0.89649337, 0.49696022, 0.68479485, 1.35661427,
        0.86200231],
       [0.77535743, 0.78547835, 0.60928901, 0.69559097, 0.86200231,
        0.88839928]])

### 高维转置

In [66]:
arr = np.arange(16).reshape(2, 2, 4)
arr, arr.shape

(array([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7]],
 
        [[ 8,  9, 10, 11],
         [12, 13, 14, 15]]]), (2, 2, 4))

In [67]:
arr.transpose(0, 1, 2)

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [68]:
# 第1纬和第2纬转置
arr.transpose(0, 2, 1)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

In [69]:
# 同上
arr.swapaxes(1, 2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

In [70]:
arr.transpose(2, 1, 0)

array([[[ 0,  8],
        [ 4, 12]],

       [[ 1,  9],
        [ 5, 13]],

       [[ 2, 10],
        [ 6, 14]],

       [[ 3, 11],
        [ 7, 15]]])

### matmul

In [71]:
x = np.arange(24).reshape(2,3,4)
y = np.arange(8).reshape(4,2)
z = np.matmul(x, y)
z, z.shape

(array([[[ 28,  34],
         [ 76,  98],
         [124, 162]],
 
        [[172, 226],
         [220, 290],
         [268, 354]]]), (2, 3, 2))

In [72]:
z = x.dot(y)
z, z.shape

(array([[[ 28,  34],
         [ 76,  98],
         [124, 162]],
 
        [[172, 226],
         [220, 290],
         [268, 354]]]), (2, 3, 2))

In [73]:
x = np.arange(24).reshape(2,3,4)
y = np.arange(16).reshape(2,4,2)
z = np.matmul(x, y)
z, z.shape

(array([[[  28,   34],
         [  76,   98],
         [ 124,  162]],
 
        [[ 604,  658],
         [ 780,  850],
         [ 956, 1042]]]), (2, 3, 2))

In [74]:
z = np.dot(x,y)
z, z.shape

(array([[[[  28,   34],
          [  76,   82]],
 
         [[  76,   98],
          [ 252,  274]],
 
         [[ 124,  162],
          [ 428,  466]]],
 
 
        [[[ 172,  226],
          [ 604,  658]],
 
         [[ 220,  290],
          [ 780,  850]],
 
         [[ 268,  354],
          [ 956, 1042]]]]), (2, 3, 2, 2))

### sum

In [75]:
x

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [76]:
np.sum(x)

276

In [77]:
x.sum()

276

In [78]:
np.sum(x, axis=0).sum(axis=0)

array([60, 66, 72, 78])

### cumsum

In [79]:
x.cumsum(axis=0)

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 14, 16, 18],
        [20, 22, 24, 26],
        [28, 30, 32, 34]]])

In [80]:
x.cumsum(axis=1)

array([[[ 0,  1,  2,  3],
        [ 4,  6,  8, 10],
        [12, 15, 18, 21]],

       [[12, 13, 14, 15],
        [28, 30, 32, 34],
        [48, 51, 54, 57]]])

### cumprod

In [81]:
x.cumprod(axis=0)

array([[[  0,   1,   2,   3],
        [  4,   5,   6,   7],
        [  8,   9,  10,  11]],

       [[  0,  13,  28,  45],
        [ 64,  85, 108, 133],
        [160, 189, 220, 253]]])

In [82]:
x.cumprod(axis=1)

array([[[   0,    1,    2,    3],
        [   0,    5,   12,   21],
        [   0,   45,  120,  231]],

       [[  12,   13,   14,   15],
        [ 192,  221,  252,  285],
        [3840, 4641, 5544, 6555]]])

### mean

In [83]:
np.mean(x)

11.5

In [84]:
np.mean(x, axis=0)

array([[ 6.,  7.,  8.,  9.],
       [10., 11., 12., 13.],
       [14., 15., 16., 17.]])

In [85]:
np.mean(x, axis=1)

array([[ 4.,  5.,  6.,  7.],
       [16., 17., 18., 19.]])

In [86]:
x.mean(axis=2)

array([[ 1.5,  5.5,  9.5],
       [13.5, 17.5, 21.5]])

### sort

In [87]:
arr = np.random.random(8)*10
arr

array([7.19424122, 8.0811902 , 7.50321711, 2.13892441, 1.73852506,
       0.92492306, 0.54623947, 4.00142102])

In [88]:
arr.sort()
arr

array([0.54623947, 0.92492306, 1.73852506, 2.13892441, 4.00142102,
       7.19424122, 7.50321711, 8.0811902 ])

In [89]:
arr = np.random.random((3, 5))*10
arr

array([[5.90712012, 3.28090437, 6.88097913, 5.53413803, 3.98160366],
       [3.05963423, 4.14573272, 2.39868458, 0.78922131, 1.42740628],
       [2.77745502, 8.77015843, 2.06692661, 1.78726758, 3.2371118 ]])

In [90]:
arr.sort(0)
arr

array([[2.77745502, 3.28090437, 2.06692661, 0.78922131, 1.42740628],
       [3.05963423, 4.14573272, 2.39868458, 1.78726758, 3.2371118 ],
       [5.90712012, 8.77015843, 6.88097913, 5.53413803, 3.98160366]])

## Broadcasting

In [91]:
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])
x, v

(array([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12]]), array([1, 0, 1]))

In [92]:
y = np.empty_like(x)
for i in range(4):
    y[i,:] = x[i,:] + v
    
print(y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


In [93]:
# 与上等效
x + v

array([[ 2,  2,  4],
       [ 5,  5,  7],
       [ 8,  8, 10],
       [11, 11, 13]])

当操作两个array时，numpy会逐个比较它们的shape，在下述情况下，两arrays会兼容和输出broadcasting结果：<br>
```python
A      (3d array): 256 x 256 x 3
B      (1d array):             3
Result (3d array): 256 x 256 x 3

A      (4d array):  8 x 1 x 6 x 1
B      (3d array):      7 x 1 x 5
Result (4d array):  8 x 7 x 6 x 5

A      (2d array):  5 x 4
B      (1d array):      1
Result (2d array):  5 x 4

A      (2d array):  15 x 3 x 5
B      (1d array):  15 x 1 x 5
Result (2d array):  15 x 3 x 5
```

In [94]:
v = np.array([1,2,3])
w = np.array([4,5])
v = v.reshape(3, 1)
v

array([[1],
       [2],
       [3]])

In [95]:
v + w

array([[5, 6],
       [6, 7],
       [7, 8]])

In [96]:
x = np.array([[1,2,3], [4,5,6]])
v = np.array([1,2,3])
x + v

array([[2, 4, 6],
       [5, 7, 9]])

In [97]:
x = np.array([[1,2,3], [4,5,6]]) # 2x3的
w = np.array([4,5]) # 2
(x.T + w).T

array([[ 5,  6,  7],
       [ 9, 10, 11]])

In [98]:
# 与上等效
x + np.reshape(w, (2,1))

array([[ 5,  6,  7],
       [ 9, 10, 11]])

<center>Broadcasting 运算示意图</center>

![](http://www.astroml.org/_images/fig_broadcast_visual_1.png)

## 逻辑运算

### where

In [99]:
x_arr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
y_arr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])
# where 当前位为true时取第一个值，为false 时取第二个值
print(np.where(cond, x_arr, y_arr))

[1.1 2.2 1.3 1.4 2.5]


In [100]:
arr = np.random.randn(4,4)
print(arr)

[[-0.14442511 -1.21488914 -0.33802963 -0.82759908]
 [-0.69133238  0.6186164  -0.16077464  0.18697184]
 [ 0.0173474   0.96103841  3.09606767 -0.84923695]
 [-0.00729641  2.03204212 -0.77438562  0.839679  ]]


In [101]:
arr > 0

array([[False, False, False, False],
       [False,  True, False,  True],
       [ True,  True,  True, False],
       [False,  True, False,  True]])

In [102]:
# where 自动将常数 1 和 -1 扩展成相应纬度的数组
print(np.where(arr > 0, 1,-1))

[[-1 -1 -1 -1]
 [-1  1 -1  1]
 [ 1  1  1 -1]
 [-1  1 -1  1]]


In [103]:
print(np.where(arr > 0, 1,arr))

[[-0.14442511 -1.21488914 -0.33802963 -0.82759908]
 [-0.69133238  1.         -0.16077464  1.        ]
 [ 1.          1.          1.         -0.84923695]
 [-0.00729641  1.         -0.77438562  1.        ]]


### reshape

In [104]:
arr = np.arange(8)
arr.shape

(8,)

In [105]:
arr.reshape(2,4)

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [106]:
arr.reshape(2,2,2)

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

In [107]:
arr = np.arange(15)
arr.reshape(5,3).shape

(5, 3)

In [108]:
# 如果我们在某一个维度上写上-1，numpy会帮我们自动推导出正确的维度
arr.reshape(5,-1).shape

(5, 3)

In [109]:
# 从其他的ndarray中获取shape信息然后reshape
other_arr = np.ones((3,5))
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

### ravel

In [110]:
# 高维数组可以用ravel来拉平
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

## 操作两个二维数组

### 连接数组

In [111]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])
print(arr1, "\n\n", arr2)

[[1 2 3]
 [4 5 6]] 

 [[ 7  8  9]
 [10 11 12]]


In [112]:
# axis = 0 表示第0维（行）
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [113]:
# axis = 1 表示第1维（列）
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [114]:
np.vstack((arr1, arr2)) # vertical

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [115]:
np.hstack((arr1, arr2)) # horizontal

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

### 拆分数组

In [116]:
arr = np.random.rand(5,5)
# 从0维 1 和 3 的位置截断
first, second, third = np.split(arr, [1,3], axis=0)
print(first, '\n\n', second, '\n\n', third)

[[0.16687906 0.52208094 0.69125    0.82477479 0.61625149]] 

 [[0.40952986 0.62394514 0.30230371 0.6587523  0.59678755]
 [0.3317204  0.47083863 0.22163046 0.27572834 0.13176832]] 

 [[0.01706571 0.72201728 0.28635626 0.04210895 0.45258635]
 [0.37904652 0.27609052 0.3484632  0.12566026 0.16391014]]


In [117]:
# 从1维 1 和 3 的位置截断
first, second, third = np.split(arr, [1,3], axis=1)
print(first, '\n\n', second, '\n\n', third)

[[0.16687906]
 [0.40952986]
 [0.3317204 ]
 [0.01706571]
 [0.37904652]] 

 [[0.52208094 0.69125   ]
 [0.62394514 0.30230371]
 [0.47083863 0.22163046]
 [0.72201728 0.28635626]
 [0.27609052 0.3484632 ]] 

 [[0.82477479 0.61625149]
 [0.6587523  0.59678755]
 [0.27572834 0.13176832]
 [0.04210895 0.45258635]
 [0.12566026 0.16391014]]


### 堆叠辅助

In [118]:
arr = np.arange(6)
arr1 = arr.reshape((3, 2))
arr2 = np.random.randn(3, 2)
print(arr1, '\n\n', arr2)

[[0 1]
 [2 3]
 [4 5]] 

 [[ 0.49032919 -0.61524917]
 [-0.34589037 -1.15004596]
 [-0.09358016 -0.65558148]]


In [119]:
# r_用于按行堆叠
arr_2 = np.r_[arr1, arr2]
arr_2

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [ 0.49032919, -0.61524917],
       [-0.34589037, -1.15004596],
       [-0.09358016, -0.65558148]])

In [120]:
arr = np.arange(6)
arr = arr.reshape(6,1)+arr
arr1, arr2, arr3 = np.split(arr, [1,3], axis=1)
arr1, arr2, arr3

(array([[0],
        [1],
        [2],
        [3],
        [4],
        [5]]), array([[1, 2],
        [2, 3],
        [3, 4],
        [4, 5],
        [5, 6],
        [6, 7]]), array([[ 3,  4,  5],
        [ 4,  5,  6],
        [ 5,  6,  7],
        [ 6,  7,  8],
        [ 7,  8,  9],
        [ 8,  9, 10]]))

In [121]:
# c_用于按列堆叠, 堆叠1维数组时可以自动转置
np.c_[arr_2, arr1.reshape(6,)], np.c_[arr_2, arr2], np.c_[arr_2, arr3]

(array([[ 0.        ,  1.        ,  0.        ],
        [ 2.        ,  3.        ,  1.        ],
        [ 4.        ,  5.        ,  2.        ],
        [ 0.49032919, -0.61524917,  3.        ],
        [-0.34589037, -1.15004596,  4.        ],
        [-0.09358016, -0.65558148,  5.        ]]),
 array([[ 0.        ,  1.        ,  1.        ,  2.        ],
        [ 2.        ,  3.        ,  2.        ,  3.        ],
        [ 4.        ,  5.        ,  3.        ,  4.        ],
        [ 0.49032919, -0.61524917,  4.        ,  5.        ],
        [-0.34589037, -1.15004596,  5.        ,  6.        ],
        [-0.09358016, -0.65558148,  6.        ,  7.        ]]),
 array([[ 0.        ,  1.        ,  3.        ,  4.        ,  5.        ],
        [ 2.        ,  3.        ,  4.        ,  5.        ,  6.        ],
        [ 4.        ,  5.        ,  5.        ,  6.        ,  7.        ],
        [ 0.49032919, -0.61524917,  6.        ,  7.        ,  8.        ],
        [-0.34589037, -1.15004

In [122]:
# 切片直接转为数组
np.c_[1:6, -5:0]

array([[ 1, -5],
       [ 2, -4],
       [ 3, -3],
       [ 4, -2],
       [ 5, -1]])

In [123]:
# 使用repeat来重复
arr = np.arange(3)
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [124]:
# 每个元素指定重复次数
arr.repeat([2,3,5])

array([0, 0, 1, 1, 1, 2, 2, 2, 2, 2])

In [125]:
# 指定axis来重复
arr = np.random.rand(2,2)
arr.repeat(2, axis=0)

array([[0.12396663, 0.90796795],
       [0.12396663, 0.90796795],
       [0.65358205, 0.43683345],
       [0.65358205, 0.43683345]])

In [126]:
arr.repeat(2, axis=1)

array([[0.12396663, 0.12396663, 0.90796795, 0.90796795],
       [0.65358205, 0.65358205, 0.43683345, 0.43683345]])

In [127]:
# Tile 整块重复
print(arr)
print()
print(np.tile(arr, 2))

[[0.12396663 0.90796795]
 [0.65358205 0.43683345]]

[[0.12396663 0.90796795 0.12396663 0.90796795]
 [0.65358205 0.43683345 0.65358205 0.43683345]]


In [128]:
# (2, 3) 表示按行重复两遍，按列重复三遍
print(np.tile(arr, (2,3)))

[[0.12396663 0.90796795 0.12396663 0.90796795 0.12396663 0.90796795]
 [0.65358205 0.43683345 0.65358205 0.43683345 0.65358205 0.43683345]
 [0.12396663 0.90796795 0.12396663 0.90796795 0.12396663 0.90796795]
 [0.65358205 0.43683345 0.65358205 0.43683345 0.65358205 0.43683345]]


## 文件输入输出

In [129]:
# 读取csv文件作为数组
arr = np.loadtxt('data/pf_gd.txt', delimiter=',')
arr

array([[0.26338619, 0.26338619, 0.43161792, 0.43161792],
       [0.22527752, 0.22527752, 0.41270022, 0.41270022]])

In [130]:
# 保存单个数组
arr = np.arange(50).reshape(2,5,5)
np.save('data/some_array', arr)

In [131]:
arr2 = np.load('data/some_array.npy')
print(arr2)

[[[ 0  1  2  3  4]
  [ 5  6  7  8  9]
  [10 11 12 13 14]
  [15 16 17 18 19]
  [20 21 22 23 24]]

 [[25 26 27 28 29]
  [30 31 32 33 34]
  [35 36 37 38 39]
  [40 41 42 43 44]
  [45 46 47 48 49]]]


In [132]:
# 保存多个数组
arr3 = np.arange(15).reshape(3,5)
np.savez("data/array_archive.npz", arr=arr, b=arr2, c=arr3)

In [133]:
arch = np.load('data/array_archive.npz')
arch['arr']

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]],

       [[25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49]]])

In [134]:
arch['b']

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]],

       [[25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49]]])

In [135]:
arch['c']

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

## softmax

In [136]:
m = np.random.rand(10,10) * 10 + 1000
print(m)

[[1001.36511264 1003.29084206 1007.39155062 1000.88635923 1008.22059507
  1001.94707327 1009.58469554 1005.15100649 1006.10503779 1000.24799105]
 [1000.01186585 1008.56163165 1003.59373026 1001.66684591 1001.30216238
  1000.96204105 1008.33976134 1009.93283148 1004.15530141 1003.48315771]
 [1008.15344191 1006.09020122 1009.74359865 1007.33818639 1008.80604834
  1004.55583212 1007.18401003 1008.82069605 1006.24843839 1004.92107389]
 [1001.09103691 1004.33058818 1006.23404469 1009.31071922 1004.93250684
  1002.01671236 1003.43103922 1007.57940504 1000.92249583 1005.11394214]
 [1007.25155075 1001.92826594 1004.37282853 1005.91962807 1000.63376098
  1006.74924425 1007.0662769  1004.02849198 1002.21830508 1008.49227317]
 [1006.70695793 1007.47999245 1000.74338089 1003.10749156 1002.1471034
  1005.59232514 1004.3687914  1003.58321178 1009.15155923 1007.51170082]
 [1005.37437922 1004.70439848 1006.71162828 1002.94373401 1007.74221064
  1009.99305889 1008.19559141 1002.66615439 1005.37775482 1

In [137]:
# 每行最大值
m_row_max = m.max(axis=1).reshape(10,1)
print(m_row_max, m_row_max.shape)

[[1009.58469554]
 [1009.93283148]
 [1009.74359865]
 [1009.31071922]
 [1008.49227317]
 [1009.15155923]
 [1009.99305889]
 [1009.2537422 ]
 [1009.09004985]
 [1009.86159767]] (10, 1)


In [138]:
m = m - m_row_max
print(m)

[[-8.2195829  -6.29385348 -2.19314493 -8.69833631 -1.36410047 -7.63762227
   0.         -4.43368905 -3.47965775 -9.33670449]
 [-9.92096563 -1.37119982 -6.33910121 -8.26598557 -8.63066909 -8.97079042
  -1.59307013  0.         -5.77753007 -6.44967376]
 [-1.59015673 -3.65339742  0.         -2.40541225 -0.93755031 -5.18776653
  -2.55958862 -0.9229026  -3.49516026 -4.82252476]
 [-8.21968231 -4.98013104 -3.07667453  0.         -4.37821238 -7.29400686
  -5.87968    -1.73131418 -8.38822339 -4.19677708]
 [-1.24072242 -6.56400723 -4.11944464 -2.57264511 -7.8585122  -1.74302893
  -1.42599627 -4.46378119 -6.27396809  0.        ]
 [-2.4446013  -1.67156678 -8.40817834 -6.04406767 -7.00445583 -3.55923409
  -4.78276783 -5.56834745  0.         -1.63985841]
 [-4.61867967 -5.28866041 -3.28143061 -7.04932488 -2.25084826  0.
  -1.79746748 -7.32690451 -4.61530407 -1.43376652]
 [-8.34336801 -8.09025703 -4.04066683 -5.38668974 -2.34272394 -2.3906858
   0.         -5.84343161 -3.60936898 -6.28016879]
 [-4.1470

In [139]:
# e 的 m 次方
m_exp = np.exp(m)
print(m_exp, m_exp.shape)

[[2.69327378e-04 1.84762641e-03 1.11565332e-01 1.66863189e-04
  2.55610502e-01 4.81973090e-04 1.00000000e+00 1.18706174e-02
  3.08179566e-02 8.81293892e-05]
 [4.91336890e-05 2.53802259e-01 1.76588868e-03 2.57115394e-04
  1.78545142e-04 1.27067715e-04 2.03300493e-01 1.00000000e+00
  3.09635375e-03 1.58103788e-03]
 [2.03893652e-01 2.59029757e-02 1.00000000e+00 9.02282909e-02
  3.91585924e-01 5.58446562e-03 7.73365489e-02 3.97363976e-01
  3.03438851e-02 8.04644611e-03]
 [2.69300606e-04 6.87316182e-03 4.61123473e-02 1.00000000e+00
  1.25477693e-02 6.79599530e-04 2.79567974e-03 1.77051579e-01
  2.27531153e-04 1.50439844e-02]
 [2.89175236e-01 1.41022328e-03 1.62535384e-02 7.63333682e-02
  3.86448403e-04 1.74989567e-01 2.40268970e-01 1.15187264e-02
  1.88473493e-03 1.00000000e+00]
 [8.67607193e-02 1.87952355e-01 2.23035781e-04 2.37189120e-03
  9.07827810e-04 2.84606147e-02 8.37279238e-03 3.81678264e-03
  1.00000000e+00 1.94007509e-01]
 [9.86581361e-03 5.04851866e-03 3.75744638e-02 8.67994760e

In [140]:
m_exp_row_sum = m_exp.sum(axis=1).reshape(10,1)
print(m_exp_row_sum, m_exp_row_sum.shape)

[[1.41271833]
 [1.46415789]
 [2.23028617]
 [1.26160095]
 [1.81222081]
 [1.51287353]
 [1.57335073]
 [1.24218068]
 [1.09984232]
 [2.13898732]] (10, 1)


In [141]:
m_softmax = m_exp / m_exp_row_sum
print(m_softmax)

[[1.90644783e-04 1.30785195e-03 7.89720990e-02 1.18114974e-04
  1.80935220e-01 3.41167154e-04 7.07855190e-01 8.40267816e-03
  2.18146505e-02 6.23828456e-05]
 [3.35576437e-05 1.73343504e-01 1.20607804e-03 1.75606330e-04
  1.21943912e-04 8.67855274e-05 1.38851481e-01 6.82986448e-01
  2.11476765e-03 1.07982745e-03]
 [9.14203994e-02 1.16141938e-02 4.48372956e-01 4.04559255e-02
  1.75576538e-01 2.50392336e-03 3.46756170e-02 1.78167261e-01
  1.36053775e-02 3.60780882e-03]
 [2.13459418e-04 5.44796816e-03 3.65506598e-02 7.92643663e-01
  9.94590978e-03 5.38680261e-04 2.21597783e-03 1.40338812e-01
  1.80351126e-04 1.19245189e-02]
 [1.59569537e-01 7.78174090e-04 8.96885099e-03 4.21214500e-02
  2.13245759e-04 9.65608415e-02 1.32582613e-01 6.35613844e-03
  1.04001395e-03 5.51809136e-01]
 [5.73482963e-02 1.24235339e-01 1.47425265e-04 1.56780535e-03
  6.00068540e-04 1.88122895e-02 5.53436373e-03 2.52286960e-03
  6.60993785e-01 1.28237758e-01]
 [6.27057493e-03 3.20876876e-03 2.38818105e-02 5.51685486e

In [142]:
print(m_softmax.sum(axis=1))

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


## [numpy指南](http://docs.scipy.org/doc/numpy/reference/)