## 行和列的数据压缩实践

In [1]:
import numpy as np

A = [[0, 0, 0, 2, 2],
     [0, 0, 0, 3, 3],
     [0, 0, 0, 1, 1],
     [1, 1, 1, 0, 0],
     [2, 2, 2, 0, 0],
     [5, 5, 5, 0, 0],
     [1, 1, 1, 0, 0]]
U, sigma, VT = np.linalg.svd(A)
print(sigma)

[9.64365076e+00 5.29150262e+00 8.10743804e-16 8.49555901e-17
 7.85046229e-17]


In [2]:
# 行压缩
UT_2x7 = U.T[:2, :]
print(np.dot(UT_2x7, A))

[[-5.56776436e+00 -5.56776436e+00 -5.56776436e+00  2.48307945e-16
   2.48307945e-16]
 [-7.63278329e-17 -7.63278329e-17 -7.63278329e-17 -3.74165739e+00
  -3.74165739e+00]]


In [4]:
# 列压缩
VT_2x5 = VT[:2, :]
print(np.dot(VT_2x5, np.mat(A).T).T)

[[ 0.00000000e+00 -2.82842712e+00]
 [ 0.00000000e+00 -4.24264069e+00]
 [ 0.00000000e+00 -1.41421356e+00]
 [-1.73205081e+00  0.00000000e+00]
 [-3.46410162e+00  0.00000000e+00]
 [-8.66025404e+00  9.86076132e-32]
 [-1.73205081e+00  0.00000000e+00]]


## 利用数据压缩进行矩阵近似

In [9]:
A_1 = sigma[0] * np.dot(np.mat(U[:, 0]).T, np.mat(VT[0, :]))
A_2 = sigma[1] * np.dot(np.mat(U[:, 1]).T, np.mat(VT[1, :]))
print(A_1 + A_2)

[[ 6.13544436e-18 -1.03995782e-15 -1.03995782e-15  2.00000000e+00
   2.00000000e+00]
 [ 1.04609326e-15 -5.23046632e-16 -5.23046632e-16  3.00000000e+00
   3.00000000e+00]
 [ 3.48697754e-16 -1.74348877e-16 -1.74348877e-16  1.00000000e+00
   1.00000000e+00]
 [ 1.00000000e+00  1.00000000e+00  1.00000000e+00  7.69936548e-17
   7.69936548e-17]
 [ 2.00000000e+00  2.00000000e+00  2.00000000e+00 -9.23711205e-17
  -9.23711205e-17]
 [ 5.00000000e+00  5.00000000e+00  5.00000000e+00  8.79053493e-17
   8.79053493e-17]
 [ 1.00000000e+00  1.00000000e+00  1.00000000e+00 -4.61855603e-17
  -4.61855603e-17]]


In [11]:
%%html
<img src="./svd1.jpg" height=480 width=640>
<img src="./svd2.jpg" height=480 width=640>