In [1]:
import numpy as np

# NumPy的ndarray:一种多维数组对象

## 创建ndarray

In [2]:
data1 = [6,7.5,8,0,1]

arr1 = np.array(data1)

arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [3]:
data2 = [[1,2,3,4],[5,6,7,8]]

arr2 = np.array(data2)

arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [4]:
arr2.ndim

2

In [5]:
arr2.shape

(2, 4)

In [6]:
arr2.dtype

dtype('int32')

In [7]:
arr1.dtype

dtype('float64')

In [8]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [9]:
np.zeros((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [10]:
np.empty((2,3,2))    #返回的不一定全0数组，而是一些未初始化的垃圾值

array([[[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]]])

In [11]:
np.arange(1,10,2)

array([1, 3, 5, 7, 9])

In [12]:
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [13]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [14]:
np.linspace(0,1,9)

array([0.   , 0.125, 0.25 , 0.375, 0.5  , 0.625, 0.75 , 0.875, 1.   ])

## ndarray的数据类型

In [15]:
arr1 = np.array([1,2,3],dtype=np.float64)

arr2 = np.array([1,2,3],dtype=np.int32)

In [16]:
arr1.dtype

dtype('float64')

In [17]:
arr2.dtype

dtype('int32')

In [18]:
arr = np.array([1,2,3,4,5])

arr.dtype

dtype('int32')

In [19]:
float_arr = arr.astype(np.float64)

float_arr.dtype

dtype('float64')

In [20]:
arr = np.array([3.7,-1.2,2.6,0.5,12.8,10.1])

arr

array([ 3.7, -1.2,  2.6,  0.5, 12.8, 10.1])

In [21]:
arr.astype(np.int32)

array([ 3, -1,  2,  0, 12, 10])

## 数组和标量之间的运算

In [22]:
arr = np.array([[1.,2,3],[4,5,6]])

arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [23]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [24]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [25]:
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [26]:
arr ** 0.5

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

## 基本的索引与切片

In [27]:
arr = np.arange(10)

arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [28]:
arr[5]

5

In [29]:
arr[5:8]

array([5, 6, 7])

In [30]:
arr[5:8] = 12

arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [35]:
arr = np.arange(10)

arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [36]:
#视图
arr_view = arr[5:8]

arr_view[1] = 99

arr

array([ 0,  1,  2,  3,  4,  5, 99,  7,  8,  9])

In [38]:
arr = np.arange(10)

arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [39]:
#副本
arr_copy = arr[5:8].copy()

arr_copy[1] = 99

arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [40]:
arr_copy

array([ 5, 99,  7])

In [41]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])

arr2d[2]

array([7, 8, 9])

In [42]:
arr2d[0][2]

3

In [43]:
arr2d[0,2]

3

In [44]:
arr3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])

arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [45]:
arr3d.shape

(2, 2, 3)

In [46]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [47]:
arr3d[1,0]

array([7, 8, 9])

In [48]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [49]:
arr[1:6]

array([1, 2, 3, 4, 5])

In [50]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [51]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [52]:
arr2d[:2,1:]

array([[2, 3],
       [5, 6]])

## 布尔型索引

In [57]:
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])

data = np.random.randn(7,4)

data

array([[-0.54944519,  0.92602467, -0.46599517, -0.89173482],
       [ 0.34681786, -0.2683059 ,  0.75128226,  1.85602353],
       [ 0.33468605,  1.37769067, -0.64593435, -0.60772167],
       [ 0.73229416,  0.25579332,  0.73139384, -0.29225846],
       [-0.13140623, -0.31660335, -0.13937012,  0.40380991],
       [ 0.28721827,  0.57437958, -0.76008619, -1.19775937],
       [-1.13252428, -0.46288239, -1.89356923, -2.71333673]])

In [58]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [59]:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [60]:
data[names=='Bob']

array([[-0.54944519,  0.92602467, -0.46599517, -0.89173482],
       [ 0.73229416,  0.25579332,  0.73139384, -0.29225846]])

## 花式索引

In [2]:
import numpy as np

arr = np.zeros((8,4))

for i in range(8):
    arr[i] = i
    
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [3]:
arr[[4,3,0,6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [4]:
arr = np.arange(32).reshape((8,4))

arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [5]:
arr[[1,5,7,2],[0,3,1,2]]

array([ 4, 23, 29, 10])

In [6]:
arr[np.ix_([1,5,7,2],[0,3,1,2])]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

## 数组转置和轴边换

In [7]:
arr = np.arange(15).reshape((3,5))

arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [8]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

## 通用函数

In [10]:
arr = np.arange(10)

arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [11]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [12]:
np.square(arr)

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81], dtype=int32)

In [13]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [14]:
arr = np.random.randn(8)

arr

array([ 1.14326231,  0.77764652,  0.2789127 ,  0.17958528, -3.64914518,
       -0.36656244, -0.09762475, -1.49760018])

In [15]:
np.sign(arr)

array([ 1.,  1.,  1.,  1., -1., -1., -1., -1.])

In [16]:
np.ceil(arr)

array([ 2.,  1.,  1.,  1., -3., -0., -0., -1.])

In [17]:
np.floor(arr)

array([ 1.,  0.,  0.,  0., -4., -1., -1., -2.])

In [18]:
np.sin(arr)

array([ 0.90999098,  0.70160435,  0.27531053,  0.17862154,  0.48603977,
       -0.35840837, -0.09746975, -0.99732236])

## 条件逻辑

In [20]:
arr = np.random.randn(4,4)

arr

array([[-1.88506447,  0.75110186,  0.85423086,  0.39844424],
       [ 0.24524321, -2.1245616 ,  0.19443252, -1.28011579],
       [-0.08003314, -0.0333045 ,  1.29075359, -2.64283392],
       [-0.19192672, -0.80804954,  0.91362289, -0.17807779]])

In [21]:
np.where(arr>0,2,-2)

array([[-2,  2,  2,  2],
       [ 2, -2,  2, -2],
       [-2, -2,  2, -2],
       [-2, -2,  2, -2]])

In [22]:
np.where(arr>0,0,arr)

array([[-1.88506447,  0.        ,  0.        ,  0.        ],
       [ 0.        , -2.1245616 ,  0.        , -1.28011579],
       [-0.08003314, -0.0333045 ,  0.        , -2.64283392],
       [-0.19192672, -0.80804954,  0.        , -0.17807779]])

## 数学和统计方法

In [23]:
arr = np.random.randn(5,4)

arr

array([[-1.16703348, -0.81471763, -0.3615287 ,  0.89271847],
       [ 0.08365138, -1.14115543,  0.87414794,  2.30358626],
       [ 0.44128078,  1.76516092,  0.45824755,  0.50351536],
       [ 0.70059531, -0.17171075, -0.6058067 , -0.15560726],
       [-0.00769996,  1.6006821 , -0.96649663, -0.26254335]])

In [24]:
arr.mean()   #算术平均数

0.19846430907469964

In [25]:
np.sum(arr)  #求和

3.9692861814939926

In [26]:
np.mean(arr,axis=1)

array([-0.36264033,  0.53005754,  0.79205115, -0.05813235,  0.09098554])

In [27]:
arr.sum(axis=0)

array([ 0.05079403,  1.23825922, -0.60143653,  3.28166947])

In [28]:
arr = np.arange(9).reshape((3,3))

arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [29]:
np.cumsum(arr,axis=0)  #累计和

array([[ 0,  1,  2],
       [ 3,  5,  7],
       [ 9, 12, 15]], dtype=int32)

In [30]:
np.cumprod(arr,axis=1)  #累计积

array([[  0,   0,   0],
       [  3,  12,  60],
       [  6,  42, 336]], dtype=int32)

In [31]:
np.std(arr)  #标准差

2.581988897471611

In [32]:
np.var(arr)  #方差

6.666666666666667

In [33]:
np.min(arr)   #最小值

0

In [34]:
np.max(arr)  #最大值

8

In [35]:
np.argmin(arr)  #最小值索引

0

In [36]:
np.argmax(arr)  #最大值索引

8

## 用于布尔型数组的方法

In [37]:
arr = np.random.randn(100)

(arr>0).sum()

52

In [38]:
bools = np.array([False,False,True,False])

bools.any()

True

In [39]:
bools.all()

False

## 排序

In [40]:
arr = np.random.randn(8)

arr

array([-1.36241715e+00, -6.99881325e-02,  1.85041492e+00,  3.31770531e-01,
       -1.60428812e+00, -7.05882625e-01, -8.80851487e-01,  1.50973131e-03])

In [41]:
arr.sort()

In [42]:
arr

array([-1.60428812e+00, -1.36241715e+00, -8.80851487e-01, -7.05882625e-01,
       -6.99881325e-02,  1.50973131e-03,  3.31770531e-01,  1.85041492e+00])

In [43]:
arr = np.random.randn(5,3)

arr

array([[-2.48855416, -0.28854795,  0.8857749 ],
       [ 1.19405828, -0.04136204, -1.538307  ],
       [ 0.90318801, -1.40802753,  1.10737706],
       [-0.10807838, -0.30087844,  1.67187417],
       [ 0.34360207, -0.23377582, -0.95304037]])

In [44]:
arr.sort(1)

In [45]:
arr

array([[-2.48855416, -0.28854795,  0.8857749 ],
       [-1.538307  , -0.04136204,  1.19405828],
       [-1.40802753,  0.90318801,  1.10737706],
       [-0.30087844, -0.10807838,  1.67187417],
       [-0.95304037, -0.23377582,  0.34360207]])

In [46]:
arr.sort(0)

In [47]:
arr

array([[-2.48855416, -0.28854795,  0.34360207],
       [-1.538307  , -0.23377582,  0.8857749 ],
       [-1.40802753, -0.10807838,  1.10737706],
       [-0.95304037, -0.04136204,  1.19405828],
       [-0.30087844,  0.90318801,  1.67187417]])

## 线性代数

In [49]:
x = np.arange(1,7).reshape((2,3))

x

array([[1, 2, 3],
       [4, 5, 6]])

In [50]:
y = np.array([[6,23],[-1,7],[8,9]])

y

array([[ 6, 23],
       [-1,  7],
       [ 8,  9]])

In [51]:
np.dot(x,y)

array([[ 28,  64],
       [ 67, 181]])

In [52]:
arr = np.arange(5)

arr

array([0, 1, 2, 3, 4])

In [53]:
np.diag(arr)

array([[0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 2, 0, 0],
       [0, 0, 0, 3, 0],
       [0, 0, 0, 0, 4]])

In [54]:
arr = np.diag([1,2,3])

arr

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

In [55]:
np.trace(arr)

6

In [56]:
X = np.random.randn(5,5)

mat = X.T.dot(X)

mat

array([[ 0.96211999, -0.19508039, -0.43499113,  0.24904465,  1.14224015],
       [-0.19508039,  1.16652504, -0.14988385, -1.23221106, -0.69328259],
       [-0.43499113, -0.14988385,  1.17686409, -0.60835454, -1.30662655],
       [ 0.24904465, -1.23221106, -0.60835454,  2.77743208,  0.60112021],
       [ 1.14224015, -0.69328259, -1.30662655,  0.60112021,  3.30207955]])

In [58]:
np.linalg.inv(mat)

array([[  16.75288964, -101.68596862,  -97.19884161,  -55.90893981,
         -55.42795856],
       [-101.68596862,  690.0437618 ,  659.14627676,  379.15253406,
         371.85260968],
       [ -97.19884161,  659.14627676,  631.27841632,  362.40516322,
         355.83489491],
       [ -55.90893981,  379.15253406,  362.40516322,  208.7370076 ,
         204.34803317],
       [ -55.42795856,  371.85260968,  355.83489491,  204.34803317,
         201.15098955]])

In [59]:
mat.dot(np.linalg.inv(mat))

array([[ 1.00000000e+00, -6.04925741e-14,  5.69241765e-14,
        -1.14540235e-13,  4.02987114e-14],
       [ 2.17681785e-14,  1.00000000e+00,  3.59347896e-14,
        -4.98384329e-14, -1.14433217e-13],
       [ 3.57916118e-15,  1.08520907e-13,  1.00000000e+00,
         3.36688513e-14,  6.97328032e-15],
       [ 1.61972207e-14,  4.40946587e-14, -2.75393277e-13,
         1.00000000e+00, -2.03274465e-14],
       [ 1.56041853e-15, -8.63153831e-14, -8.17226659e-14,
         4.13244079e-14,  1.00000000e+00]])

In [61]:
q,r = np.linalg.qr(mat)

In [62]:
q

array([[-0.60611659, -0.22265935, -0.52147154, -0.54985423, -0.09366561],
       [ 0.12289679, -0.655497  ,  0.30175519, -0.26325441,  0.62837964],
       [ 0.27403582,  0.24868571, -0.69187893,  0.15095469,  0.60131191],
       [-0.15689321,  0.67704303,  0.36987706, -0.51082513,  0.34532   ],
       [-0.71958874,  0.02268571,  0.14664834,  0.58705009,  0.33991744]])

In [63]:
r

array([[-1.58735135e+00,  9.12733995e-01,  1.60341845e+00,
        -1.33741538e+00, -3.60604631e+00],
       [ 0.00000000e+00, -1.60847869e+00,  4.62485763e-02,
         2.49504734e+00,  3.57069142e-01],
       [ 0.00000000e+00,  0.00000000e+00, -1.04927121e+00,
         1.03467361e+00,  8.05765076e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        -9.70281647e-01,  9.88621516e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  1.68986212e-03]])

In [64]:
s,v,d = np.linalg.svd(mat)

In [65]:
s

array([[-0.27940117, -0.1966631 , -0.117063  , -0.9278606 , -0.09289551],
       [ 0.27863285, -0.36636828,  0.60951609, -0.14609516,  0.62871644],
       [ 0.33758258,  0.18423339, -0.69108484, -0.11371416,  0.60130432],
       [-0.46653352,  0.75565161,  0.28921058, -0.09076952,  0.34562299],
       [-0.71601899, -0.47132432, -0.23139874,  0.3107425 ,  0.33921073]])

In [66]:
v

array([5.02529033e+00, 2.78677946e+00, 1.05245981e+00, 5.19917950e-01,
       5.73219736e-04])

In [67]:
d

array([[-0.27940117,  0.27863285,  0.33758258, -0.46653352, -0.71601899],
       [-0.1966631 , -0.36636828,  0.18423339,  0.75565161, -0.47132432],
       [-0.117063  ,  0.60951609, -0.69108484,  0.28921058, -0.23139874],
       [-0.9278606 , -0.14609516, -0.11371416, -0.09076952,  0.3107425 ],
       [-0.09289551,  0.62871644,  0.60130432,  0.34562299,  0.33921073]])

In [68]:
A = np.array([[3,1],[1,2]])

b = np.array([[9],[8]])

x = np.linalg.solve(A,b)

In [69]:
x

array([[2.],
       [3.]])

In [70]:
w,v = np.linalg.eig(A)  #特征值 与 特征向量

In [71]:
w

array([3.61803399, 1.38196601])

In [72]:
v

array([[ 0.85065081, -0.52573111],
       [ 0.52573111,  0.85065081]])