# NumPy Crash Course

## Introduction

In [1]:
# pip3 install numpy

In [2]:
import numpy as np

## np.array()

In [3]:
l = [1, 2, 3, 4, 5]

In [4]:
npa = np.array(l)

In [5]:
npa

array([1, 2, 3, 4, 5])

In [7]:
type(l)

list

In [8]:
type(npa)

numpy.ndarray

In [9]:
# array的大小
npa.shape

(5,)

In [10]:
l1 = [1, 4, 9, 16]

In [11]:
l1

[1, 4, 9, 16]

In [12]:
np.array(l + l1)

array([ 1,  2,  3,  4,  5,  1,  4,  9, 16])

如果是list， + [20] 则会在list的末尾添加20这个元素。

In [13]:
l + [20]

[1, 2, 3, 4, 5, 20]

但是numpy会采用广播机制，将array上的每一儿元素都+20：

In [14]:
npa + 20

array([21, 22, 23, 24, 25])

In [15]:
l_2d = [[1, 2], [3, 4], [5, 6]]

In [16]:
l_2d

[[1, 2], [3, 4], [5, 6]]

In [17]:
l_2d[0]

[1, 2]

In [18]:
l_2d[0][1]

2

In [19]:
# 将其转换为 2d array
npa_2d = np.array(l_2d)
npa_2d

array([[1, 2],
       [3, 4],
       [5, 6]])

In [20]:
npa_2d[0]

array([1, 2])

In [21]:
npa_2d[0][0]

1

In [22]:
npa_2d.shape

(3, 2)

In [23]:
npa_2d_float = np.array(l_2d, dtype = "float")

npa_2d_float

array([[1., 2.],
       [3., 4.],
       [5., 6.]])

In [24]:
npa_2d_float.astype("int")  # astype(数据类型) 可以修改数据类型

array([[1, 2],
       [3, 4],
       [5, 6]])

## np.nan 和 np.inf

np.nan 并不是一个数字，np.inf则表示无穷。

In [25]:
np.nan == np.nan

False

In [27]:
None == None

True

In [26]:
np.inf == np.inf

True

In [28]:
npa_2d_float

array([[1., 2.],
       [3., 4.],
       [5., 6.]])

In [29]:
npa_2d_float = npa_2d_float.astype("int")

In [30]:
npa_2d_float

array([[1, 2],
       [3, 4],
       [5, 6]])

In [31]:
npa_2d

array([[1, 2],
       [3, 4],
       [5, 6]])

In [33]:
npa_2d[0][0]

1

In [34]:
npa_2d[0][0] = np.nan

ValueError: cannot convert float NaN to integer

这个错误是由于尝试将浮点数 NaN（Not a Number）转换为整数导致的。

In [35]:
type(np.nan)

float

In [36]:
npa_2d[0][0] = np.inf

OverflowError: cannot convert float infinity to integer

In [37]:
type(np.inf)

float

In [39]:
np.isnan(npa_2d_float)

array([[False, False],
       [False, False],
       [False, False]])

In [41]:
npa_2d_float.astype("float")

array([[1., 2.],
       [3., 4.],
       [5., 6.]])

In [42]:
npa_2d_float[0][0] = np.nan

ValueError: cannot convert float NaN to integer

In [43]:
import numpy as np

# 创建原始整数数组
npa_2d = np.array([[1, 2],
                   [3, 4],
                   [5, 6]])

# 创建浮点数类型的新数组
npa_2d_float = np.empty_like(npa_2d, dtype=float)

# 将原始数组的值复制到新数组中
np.copyto(npa_2d_float, npa_2d)

# 修改新数组中的元素为 NaN
npa_2d_float[0][0] = np.nan

print(npa_2d_float)

[[nan  2.]
 [ 3.  4.]
 [ 5.  6.]]


In [44]:
np.isnan(npa_2d_float)

array([[ True, False],
       [False, False],
       [False, False]])

In [45]:
np.isinf(npa_2d_float)

array([[False, False],
       [False, False],
       [False, False]])

## Statistical Operations

In [59]:
# mean, var, std, median

In [50]:
npa_2d = np.array([[0., 2.],
         [3., 0.],
         [3., 4.]])

In [51]:
npa_2d

array([[0., 2.],
       [3., 0.],
       [3., 4.]])

In [52]:
npa_2d.mean()

2.0

In [54]:
npa_2d.var()  # deviation

2.3333333333333335

In [55]:
npa_2d.std()  # standard deviation

1.5275252316519468

In [57]:
npa_2d.max()  # 最大值

4.0

In [58]:
npa_2d.min()  # 最小值

0.0

In [63]:
npa_2d.median() # 中位数

AttributeError: 'numpy.ndarray' object has no attribute 'median'

In [64]:
np.median(npa_2d)

2.5

## Shape，Reshape，Ravel 和 Flattern

In [67]:
npa_2d

array([[0., 2.],
       [3., 0.],
       [3., 4.]])

In [68]:
npa_2d.shape

(3, 2)

In [69]:
npa_2d.reshape(2, 3)

array([[0., 2., 3.],
       [0., 3., 4.]])

In [70]:
# 但是可能出错
npa_2d.reshape(1, 3)

ValueError: cannot reshape array of size 6 into shape (1,3)

In [71]:
npa_2d

array([[0., 2.],
       [3., 0.],
       [3., 4.]])

In [72]:
npa_2d.reshape(1, 6)

array([[0., 2., 3., 0., 3., 4.]])

In [73]:
npa_2d.reshape(1, 6).shape

(1, 6)

In [74]:
npa_2d.reshape(1, 6).ravel().shape  # 将其修改为一维数组

(6,)

In [75]:
npa_2d

array([[0., 2.],
       [3., 0.],
       [3., 4.]])

In [77]:
npa_2d.flatten()  # 直接将其展平

array([0., 2., 3., 0., 3., 4.])

## Sequence，Repetitions和Random Numbers

In [78]:
np.arange(1, 5)

array([1, 2, 3, 4])

In [79]:
np.arange(1, 5, dtype = "float")

array([1., 2., 3., 4.])

In [81]:
np.arange(1, 20, 3)

array([ 1,  4,  7, 10, 13, 16, 19])

In [82]:
# linspace 可以获得 开始-结束 间隔相同的 n 个数字
np.linspace(1, 20, 3)

array([ 1. , 10.5, 20. ])

In [83]:
np.linspace(1, 20, 4)

array([ 1.        ,  7.33333333, 13.66666667, 20.        ])

In [85]:
np.linspace(1, 20, 5).astype("int")

array([ 1,  5, 10, 15, 20])

In [86]:
np.logspace(1, 50, 10)

array([1.00000000e+01, 2.78255940e+06, 7.74263683e+11, 2.15443469e+17,
       5.99484250e+22, 1.66810054e+28, 4.64158883e+33, 1.29154967e+39,
       3.59381366e+44, 1.00000000e+50])

In [87]:
np.zeros([1, 5])

array([[0., 0., 0., 0., 0.]])

In [88]:
np.ones([1, 5])

array([[1., 1., 1., 1., 1.]])

In [90]:
# 重复
np.tile([1, 2, 3], 3)

array([1, 2, 3, 1, 2, 3, 1, 2, 3])

In [91]:
# 重复
np.repeat([1, 2, 3], 3)

array([1, 1, 1, 2, 2, 2, 3, 3, 3])

In [92]:
# 随意一个符合大小的array
np.random.rand(3, 3) # 生成一个大小为 3 * 3 的array

array([[0.70035167, 0.06006385, 0.50655267],
       [0.36528735, 0.68866206, 0.94890702],
       [0.13852133, 0.63630133, 0.65045395]])

In [94]:
# 或者这样
np.random.randint(0, 100, [3, 3])  # 生成一个 3*3 范围在 0 ~ 100 之间的 array

array([[79, 89, 22],
       [17, 77, 59],
       [29,  3, 71]])

In [95]:
npa_2d

array([[0., 2.],
       [3., 0.],
       [3., 4.]])

In [96]:
np.unique(npa_2d)

array([0., 2., 3., 4.])

## Where() 函数

In [97]:
arr = np.array([8, 94, 8, 56, 1, 3, 4, 5, 7])

In [98]:
arr

array([ 8, 94,  8, 56,  1,  3,  4,  5,  7])

In [99]:
np.where(arr > 15)

(array([1, 3], dtype=int64),)

返回一个arr，给出所有满足的元素的index。

In [100]:
index = np.where(arr > 15)

In [101]:
index

(array([1, 3], dtype=int64),)

In [102]:
arr[index]

array([94, 56])

In [104]:
index = np.where(arr == 8)
arr[index]

array([8, 8])

In [107]:
# 如果不使用 where()
arr > 15  # 还是广播机制

array([False,  True, False,  True, False, False, False, False, False])

In [108]:
arr[arr > 15]

array([94, 56])

In [110]:
arr.max()

94

In [111]:
arr.argmax()

1

In [112]:
arr.argmin()

4

In [113]:
arr[arr.argmin()]

1

## File Read and Write

In [115]:
npa_2d

array([[0., 2.],
       [3., 0.],
       [3., 4.]])

In [117]:
np.savetxt("npa_2d.csv", npa_2d, delimiter = ",")  # 本地就出现了一个npa_2d的csv文件

In [118]:
np.loadtxt("npa_2d.csv")

ValueError: could not convert string '0.000000000000000000e+00,2.000000000000000000e+00' to float64 at row 0, column 1.

In [119]:
np.loadtxt("npa_2d.csv", delimiter = ",")

array([[0., 2.],
       [3., 0.],
       [3., 4.]])

In [120]:
# 另一种方式的存储
np.save("data.npy", npa_2d)

In [121]:
# 导入
np.load("data.npy")

array([[0., 2.],
       [3., 0.],
       [3., 4.]])

## Concatenate and Sorting

In [122]:
npa_2d

array([[0., 2.],
       [3., 0.],
       [3., 4.]])

In [123]:
np.concatenate([npa_2d, npa_2d], axis = 0)

array([[0., 2.],
       [3., 0.],
       [3., 4.],
       [0., 2.],
       [3., 0.],
       [3., 4.]])

In [124]:
np.concatenate([npa_2d, npa], axis = 1)

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)

可以发现必须大小相同。

## Working With Dates

In [125]:
d = np.datetime64('2020-12-1 23:34:23')

ValueError: Error parsing datetime string "2020-12-1 23:34:23" at position 8

In [126]:
d = np.datetime64("2020-12-01 23:34:23")

In [127]:
d

numpy.datetime64('2020-12-01T23:34:23')

In [128]:
d + 10  # 会在最后 + 10

numpy.datetime64('2020-12-01T23:34:33')

In [129]:
oneday = np.timedelta64(1, "D")

In [130]:
oneday

numpy.timedelta64(1,'D')

In [131]:
d + oneday

numpy.datetime64('2020-12-02T23:34:23')