In [1]:
import pandas as pd
import numpy as np

1. 利用列表推导式写矩阵乘法

In [3]:
M1 = np.random.rand(2,3)
M2 = np.random.rand(3,4)
res = np.empty((M1.shape[0],M2.shape[1]))
for i in range(M1.shape[0]):
    for j in range(M2.shape[1]):
        item = 0
        for k in range(M1.shape[1]):
            item += M1[i][k] * M2[k][j]
        res[i][j] = item
(np.abs((M1@M2 - res) < 1e-15)).all() # 排除数值误差

True

In [14]:
M1 = np.random.rand(2,3)
M2 = np.random.rand(3,4)
res = [[sum([M1[i][k] * M2[k][j] for k in range(M1.shape[1])]) for j in range(M2.shape[1])] for i in range(M1.shape[0])]
(np.abs((M1@M2 - res) < 1e-15)).all()

True

2. 更新矩阵

In [46]:
a = np.array(([1,2,3],[4,5,6],[7,8,9]))
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [47]:
a_2 = 1/a**2
a_2

array([[1.        , 0.25      , 0.11111111],
       [0.0625    , 0.04      , 0.02777778],
       [0.02040816, 0.015625  , 0.01234568]])

In [48]:
a_t = a_2.T
a_t

array([[1.        , 0.0625    , 0.02040816],
       [0.25      , 0.04      , 0.015625  ],
       [0.11111111, 0.02777778, 0.01234568]])

In [58]:
a_ = (a@a_t)[:,0]
a_

array([ 1.83333333,  5.91666667, 10.        ])

In [64]:
b = np.array([a[i,j]*a_[i] for j in range(a.shape[1]) for i in range(a_.shape[0])]).reshape(3,3, order='C')
b.T


array([[ 1.83333333,  3.66666667,  5.5       ],
       [23.66666667, 29.58333333, 35.5       ],
       [70.        , 80.        , 90.        ]])

3. 卡方统计量

In [37]:
np.random.seed(0)
A = np.random.randint(10, 20, (8, 5))
a = np.array([[1,2,3],[4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [46]:
a_sum = np.sum(a)
a_sum

21

In [50]:
b=np.array([(np.sum(a[i:i+1,:])*np.sum(a[:,j:j+1]))/a_sum for i in range(a.shape[0]) for j in range(a.shape[1])]).reshape([2,3])
b

array([[1.42857143, 2.        , 2.57142857],
       [3.57142857, 5.        , 6.42857143]])

In [56]:
b[0,1]

2.0

In [61]:
sum([(a[i,j]-b[i,j])**2/b[i,j] for i in range(a.shape[0]) for j in range(a.shape[1])])

0.2799999999999999

4. 改进矩阵计算的性能

In [62]:
b = np.array([1,2,3,4,5,6]).reshape([2,3])
b

array([[1, 2, 3],
       [4, 5, 6]])

In [63]:
u = np.array([1,2,3,4,5,6,7,8,9]).reshape([3,3])
u

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [64]:
z = np.array([10,11,12,13,14,15]).reshape([2,3])
z

array([[10, 11, 12],
       [13, 14, 15]])

In [86]:
np.random.seed(0)
m, n, p = 100, 80, 50
B = np.random.randint(0, 2, (m, p))
U = np.random.randint(0, 2, (p, n))
Z = np.random.randint(0, 2, (m, n))
def solution(B=B, U=U, Z=Z):
    L_res = []
    for i in range(m):
        for j in range(n):
            norm_value = ((B[i]-U[:,j])**2).sum()
            L_res.append(norm_value*Z[i][j])
    return sum(L_res)
solution(B, U, Z)

100566

In [87]:
b = B
u = U
z = Z

In [88]:
sum([np.sum((b[i:i+1,:]-u[:,j:j+1].T)**2)*z[i,j] for i in range(b.shape[0]) for j in range(u.shape[1])])

100566

5. 连续整数的最大长度

In [136]:
def len_int(np_array):
    # 对数组进行一阶差分
    a_diff = np.diff(np_array)
    print(a_diff)
    # 对一阶差分的结果继续进行一阶差分
    a_diff2 = np.diff(a_diff)
    if a_diff2[-1] == 0:
        a_diff2 = np.append(a_diff2,1)
    print(a_diff2)
    # 找到非零的索引
    a_nonzero = list(a_diff2.nonzero())[0]
    print(a_nonzero)
    # 再对非零索引数组进行差分得到连续整数的最大长度
    len_ = np.max(np.diff(a_nonzero))+1
    return print(len_)

In [137]:
a = np.array([1,2,5,6,7])
print(a)
len_int(a)

[1 2 5 6 7]
[1 3 1 1]
[ 2 -2  0  1]
[0 1 3]
3


In [138]:
b = np.array([3,2,1,2,3,4,6])
len_int(b)

[-1 -1  1  1  1  2]
[0 2 0 0 1]
[1 4]
4
