# Learning Numpy

## 1. Install Numpy

In [None]:
# pip3 install numpy
# conda install numpy

In [2]:
import numpy as np
print(np.__version__)

1.23.5


## 2. Creating arrays

### 2.1 np.array()

In [None]:
# numpy.array(object, dtype = None, copy = True, order = None, subok = False, ndmin = 0)

In [None]:
arr1 = np.array([1, 2, 3, 4, 5, 6])
arr1

In [None]:
arr2 = np.array([1, 2, 3, 4, 5, 6], dtype = np.float64)
arr2

In [None]:
arr2_int = arr2.astype(np.int64) # astype from one data type to another

In [None]:
arr2

In [None]:
arr2_int

In [None]:
print(arr2.dtype, arr2_int.dtype)

In [None]:
arr3 = arr2_int.reshape(2,3)
arr3

In [None]:
arr3_complex = arr3.astype(np.complex128)
arr3_complex

In [None]:
arr3_str = arr3.astype(np.str_)
arr3_str

In [None]:
# dtype = np.float64, np.int64, np.int32
#         complex, np.complex128, str, np.str_

### 2.2 Built-in methods to generate arrays

In [None]:
# np.arange()
# empty, empty_like, full, full_like
# zeros, zeros_like, ones, one_like

In [None]:
# numpy.empty(shape, dtype = float, order = 'C') 
# order有"C"/"F"两个选项,分别代表行优/列优先，计算机内存中的存储元存储元素的顺序

In [None]:
empty_arr1 = np.empty((2,3), dtype=int)
empty_arr1 # 数组元素为随机值，因为未初始化

In [3]:
full_arr1 = np.full((2,4), 101) # 单一元素矩阵
full_arr1

array([[101, 101, 101, 101],
       [101, 101, 101, 101]])

In [None]:
# numpy.zeros(shape, dtype = float, order = 'C')
zeros_arr1 = np.zeros([2,3], dtype=np.float64, order='C')
zeros_arr1

In [None]:
zeros_arr2 = np.zeros((5), dtype=int)
zeros_arr2

In [None]:
arr5 = np.arange(2,8).reshape(3,2)
arr5_zeros_like = np.zeros_like(arr5) # 相同形状，数值全是同类型的0
arr5_zeros_like

In [None]:
ones_arr1 = np.ones((2,3), dtype=float) # int
ones_arr1

In [None]:
arr6 = np.arange(2,8).reshape(3,2)
arr6

In [None]:
arr6_ones_like = np.ones_like(arr6, dtype=float)
arr6_ones_like

In [11]:
# np.eye(num), creates an identity matrix
arr7 = np.eye(5)
print(arr7, '\n')

arr8 = np.eye(5, k=-2) # diagonal offset 下移2个单位
print(arr8, '\n')

print('Values on diagonal: ' + str(np.diag(arr7))) # 提取对角元素

[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]] 

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]] 

Values on diagonal: [1. 1. 1. 1. 1.]


In [12]:
arr9 = np.diag([1, 2, 3, 4, 5]) # 生成对角元素为此的矩阵
arr9

array([[1, 0, 0, 0, 0],
       [0, 2, 0, 0, 0],
       [0, 0, 3, 0, 0],
       [0, 0, 0, 4, 0],
       [0, 0, 0, 0, 5]])

### 2.3 random 方法

In [291]:
# np.random.rand(num) # uniform distributioin
np.random.rand(2,3)

array([[0.24221534, 0.16995561, 0.38685986],
       [0.84590563, 0.08110924, 0.7569834 ]])

In [292]:
# np.random.randn(num) # standard normal distribution (mean = 0, var = 1)
np.random.randn(2,4)

array([[-0.30028295,  0.72563905, -0.56309107,  0.07433637],
       [-2.0149985 , -1.79276979,  0.49782617,  1.31438442]])

In [296]:
# np.random.randint(low, high [, num])
print(np.random.randint(1, 10))
print(np.random.randint(-3, 6, 9))

5
[-2  0  2 -1  3 -1  3  0  3]


In [15]:
# np.random.choice(iterable_or_int, size, replace=True, p=weights)
print(np.random.choice(range(3), 10, replace=True, p=[0.1, 0.8, 0.1]))
print(np.random.choice(3, 10))
print(np.random.choice([1,2,3], 10))

[0 0 1 1 1 1 0 1 1 1]
[1 0 1 2 2 0 0 1 1 1]
[3 3 1 3 1 3 3 1 2 2]


In [16]:
# np.random.shuffle(arr) 打乱顺序
arr = np.array(range(10))
print(arr)
np.random.shuffle(arr) # 改变arr数组
print(arr)

# 与np.random.shuffle()相同，但返回副本
print('A permutation of the array: ', np.random.permutation(arr))

[0 1 2 3 4 5 6 7 8 9]
[9 4 0 8 5 2 3 7 1 6]
A permutation of the array:  [8 5 3 0 2 7 9 6 4 1]


### 2.4 Create array from other iterables

In [298]:
# np.linspace(start, end, num)
# np.logspace(start, end, num) # base is 10
#    np.logspace(1.0, 2.0, num=10)  
#    np.logspace(0, 9, 10, base=2)

# np.asarray
# diff between np.array and np.asarray: https://www.runoob.com/numpy/numpy-array-from-existing-data.html

# np.frombuffer, np.fromiter

In [5]:
# np.repeat(iterable, reps, axis=None): repeat each element by n times
arr = [[0, 1, 2], [3, 4, 5]]
print(np.repeat(arr, 3)) 
print(np.repeat(arr, 3, axis=0))
print(np.repeat(arr, 3, axis=1))  

[0 0 0 1 1 1 2 2 2 3 3 3 4 4 4 5 5 5]
[[0 1 2]
 [0 1 2]
 [0 1 2]
 [3 4 5]
 [3 4 5]
 [3 4 5]]
[[0 0 0 1 1 1 2 2 2]
 [3 3 3 4 4 4 5 5 5]]


In [6]:
# np.tile(): repeat the whole array by n times
arr = [0, 1, 2]
print(np.tile(arr, 3))
print(np.tile(arr, (2,2)))

[0 1 2 0 1 2 0 1 2]
[[0 1 2 0 1 2]
 [0 1 2 0 1 2]]


## 3. Array properties

In [14]:
# np.info(arr)
# arr.reshape, arr.ndim, arr.shape, arr.size, arr.dtype

arr2 = np.arange(2,37)

arr2_1d = arr2
arr2_2d = arr2.reshape(5,7)
print(arr2_1d, '\n')
print(arr2_2d, '\n')
print(np.info(arr2_2d))

[ 2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36] 

[[ 2  3  4  5  6  7  8]
 [ 9 10 11 12 13 14 15]
 [16 17 18 19 20 21 22]
 [23 24 25 26 27 28 29]
 [30 31 32 33 34 35 36]] 

class:  ndarray
shape:  (5, 7)
strides:  (56, 8)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x12c70cd10
byteorder:  little
byteswap:  False
type: int64
None


In [None]:
arr2_1d[0] = 100 # arr2_1d and arr2_2d都是arr2的变形，都指向同一个区域，如果修改一个元素相当于修改整个区域的一个元素，即所有数组都发生变化
print(arr2_1d,'\n')
print(arr2_2d)

# arr2_2d.shape = (7,5) # shape用于改变每个维度的大小

arr2_2d[0][0] = 2
print(arr2_1d,'\n')
print(arr2_2d)

In [None]:
print(arr2_1d.ndim, arr2_2d.ndim) # dimension of array

In [None]:
print(arr2_1d.shape, arr2_2d.shape) # shape of array

In [None]:
print(arr2_1d.size, arr2_2d.size) # nr. of items

In [None]:
print(arr2_1d.dtype, arr2_2d.dtype) # data type

In [None]:
print(arr2_1d.real, '\n\n', arr2_2d.imag) # real and imag parts

## 4. Slicing and indexing

In [None]:
arr7 = np.arange(12)
arr7

In [None]:
ss = slice(2,9,2)
print(arr7[ss]) # [2 4 6 8]
arr7_part = arr7[2:9:2]
arr7_part # array([2, 4, 6, 8])

In [None]:
arr8 = np.arange(2,17).reshape(3,5)
arr8

In [None]:
print (arr8[...,1], '\n')   # 第2列元素
print (arr8[0,...], '\n')   # 第1行元素
print (arr8[...,2:])  # 第3列及之后所有元素

In [None]:
# 高级索引 -- 整数数组索引
# 是用一数组来访问另一数组元素。这个数组的每个元素都是目标数组中某维度上索引值
rows = np.array([[0,1], [2,2]])
cols = np.array([[4,3], [4,3]])
# 第一个的id是0-4，第二个是1-3，第三个是2-4，第四个是2-3
arr8_part = arr8[rows, cols]
arr8_part

In [None]:
arr8[1:, 2:4]

In [None]:
# 高级索引 -- 布尔索引
# 通过一个布尔数组来索引目标数组
arr8

In [None]:
arr8[arr8 > 9]

In [None]:
arr8[arr8 % 3 == 0]

In [None]:
arr8[[1,0,2,0,-1]] # 双层[]，分别取对应行

## 5. NumPy 迭代数组

In [None]:
farr = np.arange(2, 14).reshape(3,4)
farr

In [None]:
farr.T # 转置

In [None]:
for x in np.nditer(farr): # order='C') 默认order是c风格
    print(x, end=' ')

In [None]:
for x in np.nditer(farr, order='F'):
    print(x, end=' ')

In [None]:
# Numpy 数组操作
# np.reshape, 
# arr.flatten(), arr.flatten(order='F')
# np.transpose(arr) = arr.T

## 5. Broadcasting

In [None]:
# 如两数组a和b形状相同，即a.shape==b.shape，那a*b是a与b数组对应位相乘

In [None]:
# 对不同形状(shape)的数组进行的是broadcast形式计算

In [None]:
brr = np.array([1, 2, 3, 4])
crr = np.array([10, 20, 30, 40])
drr = brr + crr
err = brr * crr
print(drr, '\n', err)

In [None]:
# 当2个数组形状不同时，numpy自动触发广播机制

brr = np.array([[ 0, 0, 0],
           [10,10,10],
           [20,20,20],
           [30,30,30]])
crr = np.array([0, 4, 7])

brr + crr

In [None]:
brr*crr

## 6. Numpy operations

### 6.1 NumPy bite-wise operation

In [None]:
# 操作	函数/运算符	描述
# 按位与	numpy.bitwise_and(x1, x2)	对数组的每个元素执行逐位与操作。
# 按位或	numpy.bitwise_or(x1, x2)	对数组的每个元素执行逐位或操作。
# 按位异或	numpy.bitwise_xor(x1, x2)	对数组的每个元素执行逐位异或操作。
# 按位取反	numpy.invert(x)	对数组的每个元素执行逐位取反（按位非）。
# 左移	numpy.left_shift(x1, x2)	将数组的每个元素左移指定的位数。
# 右移	numpy.right_shift(x1, x2)	将数组的每个元素右移指定的位数。

### 6.2 NumPy math functions

In [None]:
# np.sin(arr*np.pi/180), np.cos(), np.tan(), np.arcsin(), ...
# np.exp(arr), np.log(arr), ...
# np.around(arr), np.floor(), np.ceil(), 

In [None]:
# np.add(arr1, arr2)
# np.subtract(), multiply(), np.divide()
# np.reciprocal(arr)
# np.power(arr, 3)

# np.mod(arr, brr) # same shepe
# np.reminder(arr, brr)

## 7. NumPy 统计函数

In [None]:
# np.amin(arr), np.amax(arr, axis=None)

arb = np.array([[3,7,5],[8,4,3],[2,4,9]])
arb

In [None]:
np.amin(arb) # global minimum

In [None]:
np.amin(arb, axis=0) # 每列的最小值

In [None]:
np.amin(arb, 0) # 每列的最小值

In [None]:
np.amax(arb, 1) # 每行的最大值

In [None]:
# numpy.ptp()计算数组中最大值与最小值的差（最大值-最小值）
np.ptp(arb) # min=2 and max=9

In [None]:
np.ptp(arb, 0)

In [None]:
# numpy.median()用于计算数组a中元素的中位数
np.median(arb)

In [None]:
# numpy.mean()返回数组中元素的算术平均值
np.mean(arb)

In [None]:
# np.std()标准差是数据平均值分散程度，是方差的算术平方根。
np.std(arb)

## 8. NumPy 排序、条件筛选函数

In [None]:
# numpy.sort()返回输入数组的排序副本
print(arb, '\n')
np.sort(arb)

In [None]:
np.sort(arb, 0)

In [None]:
# quicksort（快速排序）速度最快 O(n^2) 不稳定
# mergesort（归并排序）速度中等 (n*log(n)) 稳定
# heapsort'（堆排序）速度较慢 (n*log(n)) 稳定

In [None]:
# np.argsort() 返回的是数组值从小到大的索引值
# np.lexsort() 用于对多个序列进行排序
# msort(arr)、sort_complex(arr)、partition(arr)
# np.argmax()和 np.argmin()分别沿给定轴返回最大和最小元素的索引
# np.nonzero() 返回输入数组中非零元素的索引
# np.where() 返回输入数组中满足给定条件的元素的索引
# np.extract() 函数根据某个条件从数组中抽取元素，返回满条件的元素

## 10. Matplotlib

In [None]:
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline

xarr = np.arange(1, 10, 0.1)
yarr = np.sin(xarr)

plt.plot(xarr, yarr, 'ro-')

plt.title("Matplotlib demo")
plt.xlabel("X axis")
plt.ylabel("Y axis")