### NumPy 可以让Python计算更高效

1. List元素是分散储存，NumPy数组存储在一个均匀连续的内存块中，节省了计算资源。
2. 避免隐式拷贝： 让数值x是原来的2倍 x* = 2, no y = x*2

## ndarray ( N-dimensional Array)

In [5]:
import numpy as np
a = np.array([1,2,3])
b = np.array([[1,2,3], [4,5,6],[7,8,9]])
b[1,1] = 10

print(a)
print(b)
print(a.shape)
print(b.shape)
print(a.dtype)

[1 2 3]
[[ 1  2  3]
 [ 4 10  6]
 [ 7  8  9]]
(3,)
(3, 3)
int64


In [16]:
persontype = np.dtype({
    'names':['name','age','Chinese','Math','English'],
    'formats':['U32','i','i','i','f']
})
peoples = np.array([("ZhangFei",32,75,100, 90),("GuanYu",24,85,96,88.5),
       ("ZhaoYun",28,85,92,96.5),("HuangZhong",29,65,85,100)],
    dtype=persontype)

ages = peoples[:]['age']
chineses = peoples[:]['Chinese']
maths = peoples[:]['Math']
englishs = peoples[:]['English']

print(peoples)
print(np.mean(ages))
print(np.mean(maths))

[('ZhangFei', 32, 75, 100,  90. ) ('GuanYu', 24, 85,  96,  88.5)
 ('ZhaoYun', 28, 85,  92,  96.5) ('HuangZhong', 29, 65,  85, 100. )]
28.25
93.25


In [11]:
'''
data type: (https://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html)
'?'	boolean
'b'	(signed) byte
'B'	unsigned byte
'i'	(signed) integer
'u'	unsigned integer
'f'	floating-point
'c'	complex-floating point
'm'	timedelta
'M'	datetime
'O'	(Python) objects

'U'	character string
'V'	raw data (void)

'''

array([('ZhangFei', 32, 75, 100,  90. ), ('GuanYu', 24, 85,  96,  88.5),
       ('ZhaoYun', 28, 85,  92,  96.5),
       ('HuangZhong', 29, 65,  85, 100. )],
      dtype=[('name', '<U32'), ('age', '<i4'), ('Chinese', '<i4'), ('Math', '<i4'), ('English', '<f4')])

## ufunc(universal function)

In [18]:
#连续数组

x1 = np.arange(1,11,2)
#类似range，初始值、终值、步长， 不包括end point

x2 = np.linspace(1,9,5)
#linear space,初始值、终值、元素个数, 包括终值

print(x1)
print(x2)

[1 3 5 7 9]
[1. 3. 5. 7. 9.]


运算

In [21]:
print(np.add(x1, x2))
print(np.subtract(x1, x2))
print(np.multiply(x1, x2))
print(np.divide(x1, x2))
print(np.power(x1, x2))
print(np.remainder(x1, x2))  #np.mod

[ 2.  6. 10. 14. 18.]
[0. 0. 0. 0. 0.]
[ 1.  9. 25. 49. 81.]
[1. 1. 1. 1. 1.]
[1.00000000e+00 2.70000000e+01 3.12500000e+03 8.23543000e+05
 3.87420489e+08]
[0. 0. 0. 0. 0.]


max min

In [27]:
b = np.array([[1,2,3], [4,5,6], [7,8,9]])
print(np.amin(b))  #1
print(np.amin(b,0))#1,4,7
print(np.amin(b,1))#1,2,3
print(np.amax(b)) #9
print(np.amax(b,0))#3,6,9
print(np.amax(b,1))#7,8,9
# !!!： 0 column, 1 row

1
[1 2 3]
[1 4 7]
9
[7 8 9]
[3 6 9]


difference between max and min

In [28]:
a = np.array([[1,2,3], [4,5,6], [7,8,9]])
print(np.ptp(a))  #8
print(np.ptp(a,0))#6,6,6
print(np.ptp(a,1))#2,2,2

8
[6 6 6]
[2 2 2]


percentile

In [31]:
a = np.array([[1,2,3], [4,5,6], [7,8,9]])
print(np.percentile(a, 50))       #5
print(np.percentile(a, 50, axis=0)) #4,5,6
print(np.percentile(a, 50, axis=1)) #2,5,8

#!!!: p in [0,100]

5.0
[4. 5. 6.]
[2. 5. 8.]


np.mean
np.median
np.average(array, weights = wts)
np.std
np.var

排序

In [37]:
a = np.array([[4,3,2],[2,4,1]])
print(a)
print(np.sort(a))
print(np.sort(a, axis = None))
print(np.sort(a, axis = 0))
print(np.sort(a, axis = 1))
# sort(a, axis=-1, kind=‘quicksort’, order=None)
# quicksort、mergesort、heapsort分别表示快速排序、合并排序、堆排序

[[4 3 2]
 [2 4 1]]
[[2 3 4]
 [1 2 4]]
[1 2 2 3 4 4]
[[2 3 1]
 [4 4 2]]
[[2 3 4]
 [1 2 4]]


作业
练习题：统计全班的成绩

假设一个团队里有5名学员，成绩如下表所示。你可以用NumPy统计下这些人在语文、英语、数学中的平均成绩、最小成绩、最大成绩、方差、标准差。然后把这些人的总成绩排序，得出名次进行成绩输出。

In [48]:
persontype = np.dtype({
    'names':['name','Chinese','Math','English'],
    'formats':['U32','i','i','i']
})
peoples = np.array([("ZhangFei",66,65,30),("GuanYu",95,85,98),
       ("ZhaoYun",93,92,96),("HuangZhong",90,88,77),('DianWei', 80,90,90) ],
    dtype=persontype)

chinese = peoples[:]['Chinese']
math = peoples[:]['Math']
english = peoples[:]['English']

def info(sub,lst):
    print(sub,'|',min(lst),'|',max(lst),'|',np.mean(lst),'|',np.std(lst),'|',np.var(lst))
#     print('|')
#     print(min(lst))
#     print('|')
#     print(max(lst))
#     print('|')
#     print(np.mean(lst))
#     print('|')
#     print(np.std(lst))
#     print('|')
#     print(np.var(lst))


print('Subject | min | max | mean | std | var')
info('Chinese', chinese)
info('Math', math)
info('English', english)



Subject | min | max | mean | std | var
Chinese | 66 | 95 | 84.8 | 10.721940122944169 | 114.96000000000001
Math | 65 | 92 | 84.0 | 9.777525249264253 | 95.6
English | 30 | 98 | 78.2 | 25.19047439013406 | 634.56


In [None]:
persontype = np.dtype({
    'names':['name','Chinese','Math','English','Total'],
    'formats':['U32','i','i','i','i']
})
peoples[:]['Total'] = peoples[:]['Chinese'] + peoples[:]['Math'] + peoples[:]['English'] 
peoples = np.array([("ZhangFei",66,65,30),("GuanYu",95,85,98),
       ("ZhaoYun",93,92,96),("HuangZhong",90,88,77),('DianWei', 80,90,90) ],
    dtype=persontype)


print(np.sort(peoples, order='Total'))