# 第十四章: 利用SVD简化数据

# 14.3 利用Python实现SVD

In [1]:
from numpy import *

In [2]:
U, Sigma, VT = linalg.svd([ [1,1], [7,7] ])

In [3]:
U

array([[-0.14142136, -0.98994949],
       [-0.98994949,  0.14142136]])

In [4]:
Sigma

array([ 10.,   0.])

In [5]:
VT

array([[-0.70710678, -0.70710678],
       [-0.70710678,  0.70710678]])

In [6]:
import svdRec

In [7]:
Data = svdRec.loadExData()

In [8]:
U, Sigma, VT = linalg.svd(Data)

In [9]:
Sigma

array([  9.64365076e+00,   5.29150262e+00,   8.10743804e-16,
         8.49555901e-17,   7.85046229e-17])

In [10]:
Sig3 = mat( [ [Sigma[0], 0, 0], [0, Sigma[1], 0], [0, 0, Sigma[2]] ] )

In [11]:
Sig3

matrix([[  9.64365076e+00,   0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   5.29150262e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   8.10743804e-16]])

In [12]:
U[:,:3]*Sig3*VT[:3,:]

matrix([[  4.97898416e-16,  -1.50790933e-15,  -1.06376928e-15,
           2.00000000e+00,   2.00000000e+00],
        [  7.61006254e-16,  -2.51763710e-16,  -5.09242545e-16,
           3.00000000e+00,   3.00000000e+00],
        [  2.20432837e-16,  -5.22946258e-17,  -1.68138212e-16,
           1.00000000e+00,   1.00000000e+00],
        [  1.00000000e+00,   1.00000000e+00,   1.00000000e+00,
           7.69936548e-17,   7.69936548e-17],
        [  2.00000000e+00,   2.00000000e+00,   2.00000000e+00,
          -9.23711205e-17,  -9.23711205e-17],
        [  5.00000000e+00,   5.00000000e+00,   5.00000000e+00,
           8.79053493e-17,   8.79053493e-17],
        [  1.00000000e+00,   1.00000000e+00,   1.00000000e+00,
          -4.61855603e-17,  -4.61855603e-17]])

# 14.4 基于协同过滤的推荐引擎

In [13]:
from numpy import linalg as la

In [14]:
myMat = mat(svdRec.loadExData())

In [15]:
print( "The similarity of eclud 0 - 4 is : ", svdRec.ecludSim(myMat[:,0], myMat[:,4]) )
print( "The similarity of eclud 0 - 0 is : ", svdRec.ecludSim(myMat[:,0], myMat[:,0]) )

The similarity of eclud 0 - 4 is :  0.129731907557
The similarity of eclud 0 - 0 is :  1.0


In [16]:
print( "The similarity of cos 0 - 4 is : ", svdRec.cosSim(myMat[:,0], myMat[:,4]) )
print( "The similarity of cos 0 - 0 is : ", svdRec.cosSim(myMat[:,0], myMat[:,0]) )

The similarity of cos 0 - 4 is :  0.5
The similarity of cos 0 - 0 is :  1.0


In [17]:
print( "The similarity of pears 0 - 4 is : ", svdRec.pearsSim(myMat[:,0], myMat[:,4]) )
print( "The similarity of pears 0 - 0 is : ", svdRec.pearsSim(myMat[:,0], myMat[:,0]) )

The similarity of pears 0 - 4 is :  0.205965381738
The similarity of pears 0 - 0 is :  1.0


# 14.5 示例：餐馆菜肴推荐引擎

In [18]:
myMat = mat(svdRec.loadExData())

In [19]:
myMat[0,1] = myMat[0,0] = myMat[1,0] = myMat[2,0] = 4
myMat[3,3] = 2
myMat

matrix([[4, 4, 0, 2, 2],
        [4, 0, 0, 3, 3],
        [4, 0, 0, 1, 1],
        [1, 1, 1, 2, 0],
        [2, 2, 2, 0, 0],
        [5, 5, 5, 0, 0],
        [1, 1, 1, 0, 0]])

### 不使用 SVD

In [20]:
svdRec.recommend(myMat, 2)

the 1 and 0 similarity is: 1.000000
the 1 and 3 similarity is: 0.928746
the 1 and 4 similarity is: 1.000000
the 2 and 0 similarity is: 1.000000
the 2 and 3 similarity is: 1.000000
the 2 and 4 similarity is: 0.000000


[(2, 2.5), (1, 2.0243290220056256)]

In [21]:
svdRec.recommend(myMat, 2, simMeas = svdRec.pearsSim)

the 1 and 0 similarity is: 1.000000
the 1 and 3 similarity is: 1.000000
the 1 and 4 similarity is: 1.000000
the 2 and 0 similarity is: 1.000000
the 2 and 3 similarity is: 1.000000
the 2 and 4 similarity is: 0.000000


[(2, 2.5), (1, 2.0)]

In [22]:
svdRec.recommend(myMat, 2, simMeas = svdRec.ecludSim)

the 1 and 0 similarity is: 1.000000
the 1 and 3 similarity is: 0.309017
the 1 and 4 similarity is: 0.333333
the 2 and 0 similarity is: 1.000000
the 2 and 3 similarity is: 0.500000
the 2 and 4 similarity is: 0.000000


[(2, 3.0), (1, 2.8266504712098603)]

In [23]:
myMat = mat(svdRec.loadExData2())

In [24]:
from numpy import linalg as la

In [25]:
U, Sigma, VT = la.svd(mat(myMat))

In [26]:
Sigma

array([ 15.77075346,  11.40670395,  11.03044558,   4.84639758,
         3.09292055,   2.58097379,   1.00413543,   0.72817072,
         0.43800353,   0.22082113,   0.07367823])

In [27]:
Sig2 = Sigma**2
Sig2

array([  2.48716665e+02,   1.30112895e+02,   1.21670730e+02,
         2.34875695e+01,   9.56615756e+00,   6.66142570e+00,
         1.00828796e+00,   5.30232598e-01,   1.91847092e-01,
         4.87619735e-02,   5.42848136e-03])

In [28]:
sum(Sig2)

541.99999999999955

In [29]:
sum(Sig2) * 0.9

487.79999999999961

In [30]:
sum(Sig2[:3])

500.50028912757926

### 使用 SVD

In [31]:
svdRec.recommend(myMat, user = 1, estMethod = svdRec.svdEst)

the 0 and 3 similarity is: 0.490950
the 0 and 5 similarity is: 0.484274
the 0 and 10 similarity is: 0.512755
the 1 and 3 similarity is: 0.491294
the 1 and 5 similarity is: 0.481516
the 1 and 10 similarity is: 0.509709
the 2 and 3 similarity is: 0.491573
the 2 and 5 similarity is: 0.482346
the 2 and 10 similarity is: 0.510584
the 4 and 3 similarity is: 0.450495
the 4 and 5 similarity is: 0.506795
the 4 and 10 similarity is: 0.512896
the 6 and 3 similarity is: 0.743699
the 6 and 5 similarity is: 0.468366
the 6 and 10 similarity is: 0.439465
the 7 and 3 similarity is: 0.482175
the 7 and 5 similarity is: 0.494716
the 7 and 10 similarity is: 0.524970
the 8 and 3 similarity is: 0.491307
the 8 and 5 similarity is: 0.491228
the 8 and 10 similarity is: 0.520290
the 9 and 3 similarity is: 0.522379
the 9 and 5 similarity is: 0.496130
the 9 and 10 similarity is: 0.493617


[(4, 3.3447149384692283), (7, 3.3294020724526963), (9, 3.328100876390069)]

In [32]:
svdRec.recommend(myMat, user = 1, estMethod = svdRec.svdEst ,simMeas = svdRec.pearsSim)

the 0 and 3 similarity is: 0.341942
the 0 and 5 similarity is: 0.124132
the 0 and 10 similarity is: 0.116698
the 1 and 3 similarity is: 0.345560
the 1 and 5 similarity is: 0.126456
the 1 and 10 similarity is: 0.118892
the 2 and 3 similarity is: 0.345149
the 2 and 5 similarity is: 0.126190
the 2 and 10 similarity is: 0.118640
the 4 and 3 similarity is: 0.450126
the 4 and 5 similarity is: 0.528504
the 4 and 10 similarity is: 0.544647
the 6 and 3 similarity is: 0.923822
the 6 and 5 similarity is: 0.724840
the 6 and 10 similarity is: 0.710896
the 7 and 3 similarity is: 0.319482
the 7 and 5 similarity is: 0.118324
the 7 and 10 similarity is: 0.113370
the 8 and 3 similarity is: 0.334910
the 8 and 5 similarity is: 0.119673
the 8 and 10 similarity is: 0.112497
the 9 and 3 similarity is: 0.566918
the 9 and 5 similarity is: 0.590049
the 9 and 10 similarity is: 0.602380


[(4, 3.3469521867021736), (9, 3.3353796573274699), (6, 3.307193027813037)]

# 14.6 示例： 基于SVD的图像压缩

In [33]:
svdRec.imgCompress(2)

****original matrix******
00000000000000110000000000000000
00000000000011111100000000000000
00000000000111111110000000000000
00000000001111111111000000000000
00000000111111111111100000000000
00000001111111111111110000000000
00000000111111111111111000000000
00000000111111100001111100000000
00000001111111000001111100000000
00000011111100000000111100000000
00000011111100000000111110000000
00000011111100000000011110000000
00000011111100000000011110000000
00000001111110000000001111000000
00000011111110000000001111000000
00000011111100000000001111000000
00000001111100000000001111000000
00000011111100000000001111000000
00000001111100000000001111000000
00000001111100000000011111000000
00000000111110000000001111100000
00000000111110000000001111100000
00000000111110000000001111100000
00000000111110000000011111000000
00000000111110000000111111000000
00000000111111000001111110000000
00000000011111111111111110000000
00000000001111111111111110000000
00000000001111111111111110000000
00000000000111111