In [1]:
import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as la

In [2]:
la.svd([[1,2],[3,4]])

(array([[-0.40455358, -0.9145143 ],
        [-0.9145143 ,  0.40455358]]),
 array([5.4649857 , 0.36596619]),
 array([[-0.57604844, -0.81741556],
        [ 0.81741556, -0.57604844]]))

In [3]:
def loadExData():
    return[[1, 1, 0, 2, 2],
           [0, 0, 0, 3, 3],
           [0, 0, 0, 1, 1],
           [1, 1, 1, 0, 0],
           [2, 2, 2, 0, 0],
           [5, 5, 5, 0, 0],
           [1, 1, 1, 0, 0]]

In [4]:
data = loadExData()

In [5]:
U, Sigma, Vt = la.svd(data)

In [6]:
def makeSigma(s, n=0):
    if(n==0):
        n = len(s)
    sigma = np.zeros((n,n))
    for i in range(n):
        sigma[i,i] = s[i]
    return sigma

In [7]:
# U[:,:3] * makeSigma(Sigma,3) * Vt[:3,:]
np.dot( np.dot( U[:,:3] , makeSigma(Sigma,3)), Vt[:3,:])

array([[ 1.00000000e+00,  1.00000000e+00, -7.29983846e-16,
         2.00000000e+00,  2.00000000e+00],
       [ 3.29252254e-16,  3.48739958e-16, -4.90333126e-16,
         3.00000000e+00,  3.00000000e+00],
       [ 1.17013776e-16,  1.23509677e-16, -1.72716042e-16,
         1.00000000e+00,  1.00000000e+00],
       [ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00,
         3.07181397e-16,  3.11946003e-16],
       [ 2.00000000e+00,  2.00000000e+00,  2.00000000e+00,
         2.99662761e-16,  3.09191974e-16],
       [ 5.00000000e+00,  5.00000000e+00,  5.00000000e+00,
         4.41841827e-16,  4.65990119e-16],
       [ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00,
         1.49945160e-16,  1.52005145e-16]])

In [8]:
# 殴氏距离 相关系数
def ecludSim(inA,inB):
    return 1.0/(1.0 + la.norm(inA - inB))

In [9]:
# 皮尔逊相关系数
def pearsSim(inA,inB):
    if len(inA) < 3 : 
        return 1.0
    return 0.5+0.5* np.corrcoef(inA, inB, rowvar = 0)[0][1]

In [10]:
# 余弦相似度
def cosSim(inA,inB):
    num = float(inA.T*inB)
    denom = la.norm(inA)*la.norm(inB)
    return 0.5+0.5*(num/denom)

In [11]:
datMat = np.mat(data)

In [12]:
ecludSim(datMat[:,1],datMat[:,2]),pearsSim(datMat[:,1],datMat[:,2]), cosSim(datMat[:,1],datMat[:,2])

(0.5, 0.9889826260092474, 0.9921254921257382)

In [13]:
ecludSim(datMat[4,:],datMat[3,:]),pearsSim(datMat[4,:],datMat[3,:]), cosSim(datMat[4,:].T,datMat[3,:].T)

(0.36602540378443865, 1.0, 1.0)

In [14]:
def standEst(dataMat, user, simMeas, item):
    n = np.shape(dataMat)[1]
    simTotal = 0.0; 
    ratSimTotal = 0.0
    for j in range(n):
        userRating = dataMat[user,j]
        if userRating == 0 or j==item : continue
        overLap = np.nonzero(np.logical_and(dataMat[:,item].A>0, \
                                      dataMat[:,j].A>0))[0]
        if len(overLap) == 0: similarity = 0
        else: similarity = simMeas(dataMat[overLap,item], \
                                   dataMat[overLap,j])
        print ('the %d and %d similarity is: %f' % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: return 0
    else: return ratSimTotal/simTotal

In [15]:
def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
    unratedItems = np.nonzero(dataMat[user,:].A==0)[1]#find unrated items 
    if len(unratedItems) == 0: 
        return 'you rated everything'
    itemScores = []
    for item in unratedItems:
        estimatedScore = estMethod(dataMat, user, simMeas, item)
        itemScores.append((item, estimatedScore))
    return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]

In [16]:
myMat = np.mat([[4, 4, 0, 2, 2],
           [4, 0, 0, 3, 3],
           [4, 0, 0, 1, 1],
           [1, 1, 1, 2, 0],
           [2, 2, 2, 0, 0],
           [5, 5, 5, 0, 0],
           [1, 1, 1, 0, 0]])


In [17]:
recommend(myMat,2)

the 1 and 0 similarity is: 1.000000
the 1 and 3 similarity is: 0.928746
the 1 and 4 similarity is: 1.000000
the 2 and 0 similarity is: 1.000000
the 2 and 3 similarity is: 1.000000
the 2 and 4 similarity is: 0.000000


[(2, 2.5), (1, 2.0243290220056256)]

In [18]:
def loadExData2():
    return[[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
           [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
           [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
           [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
           [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
           [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
           [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
           [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
           [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
           [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
           [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]

In [19]:
myMat2 = np.mat(loadExData2())

In [20]:
U, Sigma, Vt = la.svd(myMat2)

In [21]:
Sigma

array([15.77075346, 11.40670395, 11.03044558,  4.84639758,  3.09292055,
        2.58097379,  1.00413543,  0.72817072,  0.43800353,  0.22082113,
        0.07367823])

In [22]:
sig2 = Sigma ** 2
np.sum(sig2)

541.9999999999995

In [23]:
def svdEst(dataMat, user, simMeas, item):
    n = np.shape(dataMat)[1]
    simTotal = 0.0; ratSimTotal = 0.0
    U,Sigma,VT = la.svd(dataMat)
    Sig4 = np.mat(np.eye(4)*Sigma[:4]) #arrange Sig4 into a diagonal matrix
    xformedItems = np.dot(np.dot(dataMat.T, U[:,:4]), Sig4.I)  #create transformed items
    for j in range(n):
        userRating = dataMat[user,j]
        if userRating == 0 or j==item: continue
        similarity = simMeas(xformedItems[item,:].T,\
                             xformedItems[j,:].T)
        print ('the %d and %d similarity is: %f' % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: return 0
    else: return ratSimTotal/simTotal

In [24]:
np.sum(sig2[:3])

500.5002891275793

In [25]:
recommend(myMat2,1 )

the 0 and 3 similarity is: 0.000000
the 0 and 5 similarity is: 0.000000
the 0 and 10 similarity is: 1.000000
the 1 and 3 similarity is: 0.000000
the 1 and 5 similarity is: 0.000000
the 1 and 10 similarity is: 1.000000
the 2 and 3 similarity is: 0.000000
the 2 and 5 similarity is: 0.000000
the 2 and 10 similarity is: 1.000000
the 4 and 3 similarity is: 0.000000
the 4 and 5 similarity is: 0.000000
the 4 and 10 similarity is: 0.000000
the 6 and 3 similarity is: 1.000000
the 6 and 5 similarity is: 1.000000
the 6 and 10 similarity is: 1.000000
the 7 and 3 similarity is: 0.000000
the 7 and 5 similarity is: 0.000000
the 7 and 10 similarity is: 1.000000
the 8 and 3 similarity is: 0.000000
the 8 and 5 similarity is: 0.000000
the 8 and 10 similarity is: 1.000000
the 9 and 3 similarity is: 1.000000
the 9 and 5 similarity is: 1.000000
the 9 and 10 similarity is: 1.000000


[(6, 3.3333333333333335), (9, 3.3333333333333335), (0, 3.0)]

In [26]:
recommend(myMat2, 1, estMethod = svdEst)

the 0 and 3 similarity is: 0.490950
the 0 and 5 similarity is: 0.484274
the 0 and 10 similarity is: 0.512755
the 1 and 3 similarity is: 0.491294
the 1 and 5 similarity is: 0.481516
the 1 and 10 similarity is: 0.509709
the 2 and 3 similarity is: 0.491573
the 2 and 5 similarity is: 0.482346
the 2 and 10 similarity is: 0.510584
the 4 and 3 similarity is: 0.450495
the 4 and 5 similarity is: 0.506795
the 4 and 10 similarity is: 0.512896
the 6 and 3 similarity is: 0.743699
the 6 and 5 similarity is: 0.468366
the 6 and 10 similarity is: 0.439465
the 7 and 3 similarity is: 0.482175
the 7 and 5 similarity is: 0.494716
the 7 and 10 similarity is: 0.524970
the 8 and 3 similarity is: 0.491307
the 8 and 5 similarity is: 0.491228
the 8 and 10 similarity is: 0.520290
the 9 and 3 similarity is: 0.522379
the 9 and 5 similarity is: 0.496130
the 9 and 10 similarity is: 0.493617


[(4, 3.344714938469228), (7, 3.3294020724526967), (9, 3.3281008763900686)]

In [27]:
def printMat(inMat, thresh=0.8):
    for i in range(32):
        for k in range(32):
            if float(inMat[i,k]) > thresh:
                print (1,end=' ')
            else: print (0,end=' ')
        print ('')

In [32]:
def imgCompress(numSV=3, thresh=0.8):
    myl = []
    for line in open('0_5.txt').readlines():
        newRow = []
        for i in range(32):
            newRow.append(int(line[i]))
        myl.append(newRow)
    myMat = np.mat(myl)
    print ("****original matrix******")
    print (' ')
    printMat(myMat, thresh)
    U,Sigma,VT = la.svd(myMat)
    SigRecon = makeSigma(Sigma,numSV) 
    reconMat = np.dot(np.dot(U[:,:numSV],SigRecon),VT[:numSV,:])
    print (' ')
    print ("****reconstructed matrix using %d singular values******" % numSV)
    print (' ')
    printMat(reconMat, thresh)

In [33]:
imgCompress(3)

****original matrix******
 
0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 
0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 
0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 
0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 