# Numpy笔记(三)

## 1.4 numpy数组的计算

In [1]:
import numpy as np
from numpy import random
import warnings
warnings.filterwarnings('ignore')

### 1.4.1 简单的算术操作
这里的运算是numpy数组对应元素之间的运算, 所以被操作的两个数组的维度是相同的.

In [2]:
a = random.randint(10, size=5)
a

array([1, 1, 7, 4, 8])

In [3]:
b = random.randint(5, size=5) + 1
b

array([1, 1, 5, 3, 4])

In [4]:
# 加
print(a + b)

[ 2  2 12  7 12]


In [5]:
# 减
print(a - b)

[0 0 2 1 4]


In [6]:
# 乘
print(a * b)

[ 1  1 35 12 32]


In [7]:
# 除
print(a / b)

[1 1 1 1 2]


In [8]:
# 取余
print(a % b)

[0 0 2 1 0]


In [9]:
# 次幂
print(a ** b)

[    1     1 16807    64  4096]


## 1.4.2 Broadcasting

在1.4中我们做运算时, 两个数组的维度是相同的. 若两个数组的维度不同, 那么numpy会调整它们的维度直至一致后再进行运算.

In [10]:
# 例1
a = np.full((1,3), 1, dtype=int)
a

array([[1, 1, 1]])

In [11]:
b = np.full((3,), 2, dtype=int)
b

array([2, 2, 2])

In [12]:
# [[1,1,1]] + [2,2,2]
a + b
# 相当于: [[1,1,1]] + [[2,2,2]]

array([[3, 3, 3]])

In [13]:
# 例2
c = np.arange(6).reshape(2, 3)
c

array([[0, 1, 2],
       [3, 4, 5]])

In [14]:
d = np.arange(2).reshape(2,1)
d

array([[0],
       [1]])

In [15]:
# [[0,1,2],[3,4,5]] + [[0],[1]]
c + d
# 相当于: [[0,1,2],[3,4,5]] + [[0,0,0],[1,1,1]]

array([[0, 1, 2],
       [4, 5, 6]])

In [16]:
# 例3
f = np.arange(3).reshape(1,3)
f

array([[0, 1, 2]])

In [17]:
# [[0,1,2],[3,4,5]] + [[0,1,2]]
c + f
# 相当于: [[0,1,2],[3,4,5]] + [[0,1,2],[0,1,2]]

array([[0, 2, 4],
       [3, 5, 7]])

In [18]:
# 例4
g = np.array([1])
g

array([1])

In [19]:
# [[0,1,2],[3,4,5]] + [1]
c + g
# 相当于: [[0,1,2],[3,4,5]] + [[1,1,1],[1,1,1]]

array([[1, 2, 3],
       [4, 5, 6]])

更多关于Broadcasting请参考[Numpy官方文档](https://docs.scipy.org/doc/numpy-dev/user/basics.broadcasting.html).

### 1.4.3 数理统计

In [20]:
a = random.randint(10, size=(3,2))
print(a)

[[1 0]
 [6 3]
 [5 6]]


#### 最小值和最大值

In [21]:
print(a.min())

0


In [22]:
print(a.max())

6


#### 求和

In [23]:
a.sum()

21

#### 累积

In [24]:
a.prod()

0

#### 均值

In [25]:
a.mean()

3.5

#### 方差

In [26]:
a.var()

5.583333333333333

#### 标准差

In [27]:
a.std()

2.3629078131263039

以上所有结果都是按照所有元素来进行计算的, 我们可以使用参数来按列或行进行计算. 以均值为例:

In [28]:
# 按照列计算均值
a.mean(axis=0)

array([ 4.,  3.])

In [29]:
# 按照行计算均值
a.mean(axis=1)

array([ 0.5,  4.5,  5.5])

### 1.4.4 Universal functions

In [30]:
a

array([[1, 0],
       [6, 3],
       [5, 6]])

In [31]:
np.square(a)

array([[ 1,  0],
       [36,  9],
       [25, 36]])

In [32]:
np.abs(a)

array([[1, 0],
       [6, 3],
       [5, 6]])

In [33]:
np.sqrt(a)

array([[ 1.        ,  0.        ],
       [ 2.44948974,  1.73205081],
       [ 2.23606798,  2.44948974]])

In [34]:
np.exp(a)

array([[   2.71828183,    1.        ],
       [ 403.42879349,   20.08553692],
       [ 148.4131591 ,  403.42879349]])

In [35]:
np.log(a)

array([[ 0.        ,        -inf],
       [ 1.79175947,  1.09861229],
       [ 1.60943791,  1.79175947]])

In [36]:
np.sin(a)

array([[ 0.84147098,  0.        ],
       [-0.2794155 ,  0.14112001],
       [-0.95892427, -0.2794155 ]])

In [37]:
np.cos(a)

array([[ 0.54030231,  1.        ],
       [ 0.96017029, -0.9899925 ],
       [ 0.28366219,  0.96017029]])

In [38]:
b = random.randint(10, size=(3,2))
b

array([[6, 2],
       [2, 3],
       [1, 3]])

In [39]:
# 相当于 a + b
np.add(a, b)

array([[7, 2],
       [8, 6],
       [6, 9]])

In [40]:
# 相当于 a > b
np.greater(a, b)  # equivalent to a > b

array([[False, False],
       [ True, False],
       [ True,  True]], dtype=bool)

In [41]:
np.maximum(a, b)

array([[6, 2],
       [6, 3],
       [5, 6]])

In [42]:
vec = np.random.rand(5) * 10
vec

array([ 3.73052733,  2.61543596,  2.60834969,  9.11232738,  6.48641192])

In [43]:
# 提取整数部分和小数部分
integer = np.modf(vec)[1]
decimal = np.modf(vec)[0]
print "整数部分: ", integer
print "小数部分: ", decimal

整数部分:  [ 3.  2.  2.  9.  6.]
小数部分:  [ 0.73052733  0.61543596  0.60834969  0.11232738  0.48641192]


#### where

In [44]:
vec = random.randn(10)
vec

array([ 1.22329474, -0.07009466, -1.67605479,  0.24450657, -0.37638539,
       -0.07331108,  1.60514781,  1.25554266,  0.51684416, -0.14841898])

In [45]:
# 类似于三目运算符 bool ? a : b
np.where(vec > 0, vec, 0)

array([ 1.22329474,  0.        ,  0.        ,  0.24450657,  0.        ,
        0.        ,  1.60514781,  1.25554266,  0.51684416,  0.        ])

#### unique

去除重复元素

In [46]:
vec = random.randint(5, size=(10,))
vec

array([0, 1, 0, 1, 1, 0, 4, 4, 2, 4])

In [47]:
np.unique(vec)

array([0, 1, 2, 4])

#### inld
判断数组元素是否在另一数组

In [48]:
a = np.array([1,2,3,7,8,9])
b = random.randint(10, size=(10,))
print(a)
print(b)

[1 2 3 7 8 9]
[5 6 3 8 2 8 1 9 9 4]


In [49]:
print np.in1d(a, b)

[ True  True  True False  True  True]


#### tile

In [50]:
mat = np.array([[1,2,3], [4,5,6]])
mat

array([[1, 2, 3],
       [4, 5, 6]])

In [51]:
np.tile(mat, 2)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [52]:
np.tile(mat, (2,3))

array([[1, 2, 3, 1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6, 4, 5, 6],
       [1, 2, 3, 1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6, 4, 5, 6]])

### 1.4.5 其他

#### cumsum

In [53]:
mat = random.randint(10, size=(3,3))
mat

array([[2, 2, 2],
       [7, 9, 7],
       [3, 6, 2]])

In [54]:
# 按行进行累加
mat.cumsum(0)

array([[ 2,  2,  2],
       [ 9, 11,  9],
       [12, 17, 11]])

In [55]:
# 按列进行累加
mat.cumsum(1)

array([[ 2,  4,  6],
       [ 7, 16, 23],
       [ 3,  9, 11]])

In [56]:
# 按行进行累乘
mat.cumprod(0)

array([[  2,   2,   2],
       [ 14,  18,  14],
       [ 42, 108,  28]])

In [57]:
# 按列进行累乘
mat.cumprod(1)

array([[  2,   4,   8],
       [  7,  63, 441],
       [  3,  18,  36]])