# Numpy实用示例

## 1. 导入numpy包，并查看相关版本

In [1]:
import numpy as np
print(np.__version__)

1.15.2


## 2. 如何创建1维向量？

In [3]:
arr = np.arange(10)
print(arr)

[0 1 2 3 4 5 6 7 8 9]


## 3. 如何创建一个布尔矩阵？

In [7]:
arr = np.full([3,3], True, dtype=bool)
print(arr)

[[ True  True  True]
 [ True  True  True]
 [ True  True  True]]


## 4. 如何从1维整数向量查找奇数？

In [10]:
arr = np.arange(10)
print(arr)
print(arr[arr % 2 == 1])

[0 1 2 3 4 5 6 7 8 9]
[1 3 5 7 9]


## 5. 如何将array中的奇数都替换为-1？

In [11]:
arr = np.arange(10)
arr[arr % 2 == 1] = -1
print(arr)

[ 0 -1  2 -1  4 -1  6 -1  8 -1]


## 6. 如何在不改变原array的前提下，将奇数都替换为-1？

In [14]:
arr = np.arange(10)
out = np.where(arr % 2 == 1, -1, arr)
print(arr)
print(out)

[0 1 2 3 4 5 6 7 8 9]
[ 0 -1  2 -1  4 -1  6 -1  8 -1]


## 7. 如何reshape array?

In [15]:
arr = np.arange(10)
print(arr.reshape([2, 5]))

[[0 1 2 3 4]
 [5 6 7 8 9]]


## 8. 如何将两个array垂直叠加？

In [20]:
a = np.arange(10).reshape(2, -1)
b = np.repeat(1, 10).reshape(2, -1)
print('method 1: vstack')
print(np.vstack([a, b]))
print('method 2: concatenate')
print(np.concatenate([a, b], axis=0))
print('method 3: r_')
print(np.r_[a, b])

method 1: vstack
[[0 1 2 3 4]
 [5 6 7 8 9]
 [1 1 1 1 1]
 [1 1 1 1 1]]
method 2: concatenate
[[0 1 2 3 4]
 [5 6 7 8 9]
 [1 1 1 1 1]
 [1 1 1 1 1]]
method 3: r_
[[0 1 2 3 4]
 [5 6 7 8 9]
 [1 1 1 1 1]
 [1 1 1 1 1]]


## 9. 如何将两个array水平叠加？

In [21]:
a = np.arange(10).reshape(2, -1)
b = np.repeat(1, 10).reshape(2, -1)
print('method 1: hstack')
print(np.hstack([a, b]))
print('method 2: concatenate')
print(np.concatenate([a, b], axis=1))
print('method 3: c_')
print(np.c_[a, b])

method 1: hstack
[[0 1 2 3 4 1 1 1 1 1]
 [5 6 7 8 9 1 1 1 1 1]]
method 2: concatenate
[[0 1 2 3 4 1 1 1 1 1]
 [5 6 7 8 9 1 1 1 1 1]]
method 3: c_
[[0 1 2 3 4 1 1 1 1 1]
 [5 6 7 8 9 1 1 1 1 1]]


## 10. 如何创建非硬编码的自定义序列？

如array: ```[1, 2, 3]```, 期望输出：```[1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]```

In [23]:
arr = np.arange(1, 4)
print(arr)

[1 2 3]


In [28]:
print(np.hstack([np.repeat(arr, 3), np.tile(arr, 3)]))

[1 1 1 2 2 2 3 3 3 1 2 3 1 2 3 1 2 3]


In [29]:
print(np.r_[np.repeat(arr, 3), np.tile(arr, 3)])

[1 1 1 2 2 2 3 3 3 1 2 3 1 2 3 1 2 3]


## 11. 如何从两个array获得共同items？

In [30]:
a = np.array([1, 2, 3, 2, 3, 4, 3, 4, 5, 6])
b = np.array([7, 2, 10, 2, 7, 4, 9, 4, 9, 8])

In [33]:
print(np.intersect1d(a, b))

[2 4]


## 12. 如何从array a移除与array b的共同项？

In [2]:
a = np.array([1,2,3,4,5])
b = np.array([4, 5,6,7,8,9])

In [3]:
print(np.setdiff1d(a, b))

[1 2 3]


## 13. 如何获得两个array共有项的位置索引？

In [16]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

In [17]:
print(np.where(a == b))

(array([1, 3, 5, 7], dtype=int64),)


## 14. 如何从一个array获得给定范围数字的项？

In [18]:
a = np.arange(15)

In [19]:
# 期望获得5 <= items <= 10
# 方法1
print(a[(a >=5) & (a <= 10)])
# 方法2
index = np.where((a >=5) & (a <= 10))
print(a[index])
# 方法3
index = np.logical_and(a >= 5, a <= 10)
print(a[index])

[ 5  6  7  8  9 10]
[ 5  6  7  8  9 10]
[ 5  6  7  8  9 10]


## 15. 如何使得作用于标量的函数，可以用于numpy的array对象？

In [20]:
def maxx(x, y):
    """Get the maximum of two items"""
    if x >= y:
        return x
    else:
        return y

maxx(1, 5)

5

In [21]:
pair_max = np.vectorize(maxx, otypes=[float])

a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])

pair_max(a, b)

array([6., 7., 9., 8., 9., 7., 5.])

   ## 16. 如何对换矩阵中的两列？

In [23]:
arr = np.arange(9).reshape(3, 3)
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [27]:
arr[:, [1, 0, 2]]

array([[1, 0, 2],
       [4, 3, 5],
       [7, 6, 8]])

## 17. 如何对换矩阵中的两行？

In [28]:
arr[[1, 0 ,2], :]

array([[3, 4, 5],
       [0, 1, 2],
       [6, 7, 8]])

## 18. 如何翻转矩阵中的行？

In [35]:
arr[::-1]

array([[6, 7, 8],
       [3, 4, 5],
       [0, 1, 2]])

In [36]:
# 或者
arr[::-1, :]

array([[6, 7, 8],
       [3, 4, 5],
       [0, 1, 2]])

## 19. 如何翻转矩阵中的列？

In [38]:
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [39]:
arr[:, ::-1]

array([[2, 1, 0],
       [5, 4, 3],
       [8, 7, 6]])

## 20. 如何生成5行3列的大小5~10的浮点数矩阵？

In [42]:
# 方法1
randarr = np.random.randint(low=5, high=10, size=(5, 3)) + np.random.random(size=(5, 3))
randarr

array([[8.85273906, 8.88710481, 8.58032651],
       [7.41230335, 9.59575969, 6.66151892],
       [7.35201763, 7.77097071, 8.5926452 ],
       [8.61299784, 9.66859426, 7.48451589],
       [6.98250118, 7.40061701, 7.53711975]])

In [44]:
# 方法2
randarr = np.random.uniform(low=5, high=10, size=(5, 3))
randarr

array([[8.63296618, 9.81611242, 6.15759595],
       [5.7275462 , 9.2340188 , 9.0545074 ],
       [9.30250548, 9.9572826 , 8.973923  ],
       [5.8018489 , 5.3234115 , 7.25532269],
       [7.30637663, 8.07212602, 9.36331091]])

## 21. 如何设置浮点型矩阵小数精度为3位？

In [45]:
np.set_printoptions(precision=3)
randarr

array([[8.633, 9.816, 6.158],
       [5.728, 9.234, 9.055],
       [9.303, 9.957, 8.974],
       [5.802, 5.323, 7.255],
       [7.306, 8.072, 9.363]])

## 22. 如何使用小数形式替代科学计数法？

In [46]:
# Create the random array
np.random.seed(100)
rand_arr = np.random.random([3,3])/1e3
rand_arr

array([[5.434e-04, 2.784e-04, 4.245e-04],
       [8.448e-04, 4.719e-06, 1.216e-04],
       [6.707e-04, 8.259e-04, 1.367e-04]])

In [54]:
# Reset printoptions to default
np.set_printoptions(suppress=False)
np.random.seed(100)
rand_arr = np.random.random([3,3])/1e3
rand_arr

array([[5.434e-04, 2.784e-04, 4.245e-04],
       [8.448e-04, 4.719e-06, 1.216e-04],
       [6.707e-04, 8.259e-04, 1.367e-04]])

In [55]:
np.set_printoptions(suppress=True, precision=6)
rand_arr

array([[0.000543, 0.000278, 0.000425],
       [0.000845, 0.000005, 0.000122],
       [0.000671, 0.000826, 0.000137]])

## 23. 如何限制array显示数目？

In [56]:
a = np.arange(15)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [57]:
np.set_printoptions(threshold=6)
a

array([ 0,  1,  2, ..., 12, 13, 14])

## 24. 如何完整显示array数目？

In [58]:
np.set_printoptions(threshold=6)
a

array([ 0,  1,  2, ..., 12, 13, 14])

In [59]:
np.set_printoptions(threshold=np.nan)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

## 25. 如何导入既含有数字又含有字符串的数据集？

In [60]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
iris[:3]

array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa']], dtype=object)

## 26. 如何从1维元组向量获得特定列？（此例为鸢尾花名称）

In [75]:
# Input:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_1d = np.genfromtxt(url, delimiter=',', dtype=None, encoding='utf-8')
print(iris_1d[:2])
print(iris_1d.shape)

# Solution:
species = np.array([row[4] for row in iris_1d])
species[:5]

[(5.1, 3.5, 1.4, 0.2, 'Iris-setosa') (4.9, 3. , 1.4, 0.2, 'Iris-setosa')]
(150,)


array(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa'], dtype='<U15')

## 27. 如何将1维元组向量转为2维矩阵？

In [85]:
# Method 1: Convert each row to a list and get the first 4 items
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_1d = np.genfromtxt(url, delimiter=',', dtype=None, encoding='utf-8')
# 将索引为0~3的列转为矩阵
iris_2dfrom1dtuple = np.array([row.tolist()[:4] for row in iris_1d])
print(len(iris_2dfrom1dtuple))
print(iris_2dfrom1dtuple[:4])

print('直接作为矩阵导入')
# Alt Method 2: Import only the first 4 columns from source url
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3], encoding='utf-8')
print(iris_2d[:4])

150
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]]
直接作为矩阵导入
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]]


## 28. 如何计算均值，中位数以及标准差？

In [87]:
print(iris[:4])

[[b'5.1' b'3.5' b'1.4' b'0.2' b'Iris-setosa']
 [b'4.9' b'3.0' b'1.4' b'0.2' b'Iris-setosa']
 [b'4.7' b'3.2' b'1.3' b'0.2' b'Iris-setosa']
 [b'4.6' b'3.1' b'1.5' b'0.2' b'Iris-setosa']]


In [99]:
# 直接通过已经初始化的矩阵获取
sepallength = iris[:, 0].astype(float)
print(sepallength[:10])

# 重新获取数据，且只获取需要的列
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])
print(sepallength[:10])

[5.1 4.9 4.7 4.6 5.  5.4 4.6 5.  4.4 4.9]
[5.1 4.9 4.7 4.6 5.  5.4 4.6 5.  4.4 4.9]


In [106]:
mean, med, sd = np.mean(sepallength), np.median(sepallength), np.std(sepallength)
print(mean, med, sd)

5.843333333333334 5.8 0.8253012917851409


## 29. 如何归一化矩阵中的数据？

归一化: (X - min) / (max - min)

In [115]:
np.set_printoptions(threshold=10)
Smax, Smin = sepallength.max(), sepallength.min()
print('最大值: {0}'.format(Smax))
print('最小值: {0}'.format(Smin))
print('原数据: ')
print(sepallength)
# 方法1
S = (sepallength - Smin)/(Smax - Smin)
print('归一化后数据: ')
print(S)

最大值: 7.9
最小值: 4.3
原数据: 
[5.1 4.9 4.7 ... 6.5 6.2 5.9]
归一化后数据: 
[0.222 0.167 0.111 ... 0.611 0.528 0.444]


In [116]:
# 方法2 
S = (sepallength - Smin)/sepallength.ptp()
print(S)

[0.222 0.167 0.111 ... 0.611 0.528 0.444]


In [117]:
np.set_printoptions(threshold=np.nan)

## 30. 如何计算softmax的值？

Softmax 在机器学习和深度学习中有着非常广泛的应用。尤其在处理多分类（C > 2）问题，分类器最后的输出单元需要Softmax 函数进行数值处理。

<img src='./image/softmaxf.jpg' />

下图通过softmax进行3分类：

<img src='./image/softmax.jpg' />

实际应用中，使用 Softmax 需要注意数值溢出的问题。因为有指数运算，如果 V 数值很大，经过指数运算后的数值往往可能有溢出的可能。所以，需要对 V 进行一些数值处理：即 V 中的每个元素减去 V 中的最大值。

In [126]:
def softmax(x):
    """Compute softmax values for each sets of scores in x.
    https://stackoverflow.com/questions/34968722/how-to-implement-the-softmax-function-in-python"""
    e_x = np.exp(x - np.max(x))
#     print(e_x)
#     print('e_x.sum(axis=0)')
#     print(e_x.sum(axis=0))
#     print('e_x.sum()')
#     print(e_x.sum())
    return e_x / e_x.sum(axis=0)

In [127]:
print('原数据: ')
print(sepallength)
print('softmax数据: ')
print(softmax(sepallength))

原数据: 
[5.1 4.9 4.7 4.6 5.  5.4 4.6 5.  4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4 5.1
 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.  5.  5.2 5.2 4.7 4.8 5.4 5.2 5.5 4.9 5.
 5.5 4.9 4.4 5.1 5.  4.5 4.4 5.  5.1 4.8 5.1 4.6 5.3 5.  7.  6.4 6.9 5.5
 6.5 5.7 6.3 4.9 6.6 5.2 5.  5.9 6.  6.1 5.6 6.7 5.6 5.8 6.2 5.6 5.9 6.1
 6.3 6.1 6.4 6.6 6.8 6.7 6.  5.7 5.5 5.5 5.8 6.  5.4 6.  6.7 6.3 5.6 5.5
 5.5 6.1 5.8 5.  5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3
 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7 6.  6.9 5.6 7.7 6.3 6.7 7.2
 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7 6.3 6.4 6.  6.9 6.7 6.9 5.8 6.8
 6.7 6.7 6.3 6.5 6.2 5.9]
softmax数据: 
[0.002 0.002 0.001 0.001 0.002 0.003 0.001 0.002 0.001 0.002 0.003 0.002
 0.002 0.001 0.004 0.004 0.003 0.002 0.004 0.002 0.003 0.002 0.001 0.002
 0.002 0.002 0.002 0.002 0.002 0.001 0.002 0.003 0.002 0.003 0.002 0.002
 0.003 0.002 0.001 0.002 0.002 0.001 0.001 0.002 0.002 0.002 0.002 0.001
 0.003 0.002 0.015 0.008 0.013 0.003 0.009 0.004 0.007 0.002 0.01  0.002
 0.002 0

## 31. 如何计算array中的百分位数(Percentile)的值？

Percentile：百分位数是统计中使用的度量，表示小于这个值的观察值占总数q的百分比

In [128]:
print(sepallength)

[5.1 4.9 4.7 4.6 5.  5.4 4.6 5.  4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4 5.1
 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.  5.  5.2 5.2 4.7 4.8 5.4 5.2 5.5 4.9 5.
 5.5 4.9 4.4 5.1 5.  4.5 4.4 5.  5.1 4.8 5.1 4.6 5.3 5.  7.  6.4 6.9 5.5
 6.5 5.7 6.3 4.9 6.6 5.2 5.  5.9 6.  6.1 5.6 6.7 5.6 5.8 6.2 5.6 5.9 6.1
 6.3 6.1 6.4 6.6 6.8 6.7 6.  5.7 5.5 5.5 5.8 6.  5.4 6.  6.7 6.3 5.6 5.5
 5.5 6.1 5.8 5.  5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3
 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7 6.  6.9 5.6 7.7 6.3 6.7 7.2
 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7 6.3 6.4 6.  6.9 6.7 6.9 5.8 6.8
 6.7 6.7 6.3 6.5 6.2 5.9]


In [134]:
print(np.percentile(sepallength, q=[5, 95]))

[4.6   7.255]
