In [1]:
import numpy as np

In [2]:
def loadExData():
    return [[4, 4, 0, 2, 2],
            [4, 0, 0, 3, 3],
            [4, 0, 0, 1, 1],
            [1, 1, 1, 0, 0],
            [2, 2, 2, 0, 0],
            [5, 5, 5, 0, 0],
            [1, 1, 1, 0, 0]]


def loadExData2():
    return [[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
            [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
            [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
            [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
            [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
            [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
            [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
            [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
            [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
            [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
            [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]

In [3]:
def euclidSim(x, y):
    return 1.0/(1.0+np.linalg.norm(x - y))

In [4]:
def pearsSim(x, y):
    if len(x) < 3:
        return 1.0
    return 0.5 + 0.5 * np.corrcoef()(x, y, rowvar = 0)[0][1]

In [5]:
def cosSim(x, y):
    num = float(x.T * y)
    denom = np.linalg.norm(x) * np.linalg.norm(y)
    return 0.5 + 0.5 * (num/denom)

In [6]:
#item ：user未评分的物品
def standEst(data, user, simMeans, item):
    n = np.shape(data)[1]
    simTotal = 0.0
    ratSimTotal = 0.0
    for j in range(n):
        uRating = data[user, j]
        if uRating == 0:
            continue
        #寻找到用户评分过的商品
        #找出未评分商品item与评分过商品j之间各个用户都对两者打过分的重叠部分
        overlap = np.nonzero(np.logical_and(data[:, item].A > 0, data[:, j].A > 0))[0]
        if len(overlap) == 0:
            similarity = 0
        else:
            #对重叠部分进行相似度计算
            similarity = simMeans(data[overlap, item], data[overlap,j])
        print('the %d and %d similarity is : %f'%(item, j, similarity))
        #item与评分过的各个商品之间的相似度累加
        simTotal += similarity
        #累计权重后的评分总和
        ratSimTotal += similarity * uRating
    if simTotal == 0:
        return 0
    else:
        return ratSimTotal / simTotal

In [7]:
#为指定的用户推荐物品
def recommend(data, user, N=3, simMeans = cosSim, estMethod=standEst):
    #找出用户未评分的商品
    unratedItems = np.nonzero(data[user,:].A == 0)[1]
    if len(unratedItems) == 0:
        return ('you rated everything')
    itemScores = []
    #遍历为评分的商品
    for item in unratedItems:
        #为未评分的商品进行相似度评分
        estimatedScore = estMethod(data, user, simMeans, item)
        itemScores.append((item, estimatedScore))
    #返回最高的前N个
    return sorted(itemScores, key=lambda jj : jj[1], reverse=True)[:N]

In [8]:
data = np.mat(loadExData())

In [9]:
recommend(data, 2)

the 1 and 0 similarity is : 1.000000
the 1 and 3 similarity is : 1.000000
the 1 and 4 similarity is : 1.000000
the 2 and 0 similarity is : 1.000000
the 2 and 3 similarity is : 0.000000
the 2 and 4 similarity is : 0.000000


[(2, 4.0), (1, 2.0)]

In [10]:
def svdEst(data, user, simMeans, item):
    n = np.shape(data)[1]
    simTotal = 0.0
    ratSimTotal = 0.0
    U, sigma, vt = np.linalg.svd(data)
    sig4 = np.mat(np.eye(4) * sigma[:4])
    #利用U矩阵将物品转换到低维空间
    itemMat = data.T * U[:,:4] * sig4.I
    for j in range(n):
        uRating = data[user, j]
        if uRating == 0 or j == item:
            continue
        similarity = simMeans(itemMat[item,:].T, itemMat[j,:].T)
        print('the %d and %d similarity is: %f'%(item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * uRating
    if simTotal == 0:
        return 0
    else:
        return ratSimTotal/simTotal

In [11]:
data = np.mat(loadExData2())

In [12]:
recommend(data, 1, estMethod=svdEst)

the 0 and 3 similarity is: 0.490950
the 0 and 5 similarity is: 0.484274
the 0 and 10 similarity is: 0.512755
the 1 and 3 similarity is: 0.491294
the 1 and 5 similarity is: 0.481516
the 1 and 10 similarity is: 0.509709
the 2 and 3 similarity is: 0.491573
the 2 and 5 similarity is: 0.482346
the 2 and 10 similarity is: 0.510584
the 4 and 3 similarity is: 0.450495
the 4 and 5 similarity is: 0.506795
the 4 and 10 similarity is: 0.512896
the 6 and 3 similarity is: 0.743699
the 6 and 5 similarity is: 0.468366
the 6 and 10 similarity is: 0.439465
the 7 and 3 similarity is: 0.482175
the 7 and 5 similarity is: 0.494716
the 7 and 10 similarity is: 0.524970
the 8 and 3 similarity is: 0.491307
the 8 and 5 similarity is: 0.491228
the 8 and 10 similarity is: 0.520290
the 9 and 3 similarity is: 0.522379
the 9 and 5 similarity is: 0.496130
the 9 and 10 similarity is: 0.493617


[(4, 3.344714938469228), (7, 3.329402072452697), (9, 3.328100876390069)]

In [13]:
#对第二个矩阵进行奇异值分解后，取tmp
#tmp的每一行对应物品
u, sigma, vt = np.linalg.svd(data)
sig4 = np.mat(np.eye(4) * sigma[:4])
tmp = data.T * u[:,:4]*sig4.I

In [20]:
tmp

matrix([[-0.45137416,  0.03084799, -0.00290108,  0.01189185],
        [-0.36239706,  0.02584428, -0.00189127,  0.01348796],
        [-0.46879252,  0.03296133, -0.00281253,  0.01656192],
        [-0.01007685, -0.34024331, -0.22728592,  0.14546051],
        [-0.01567036, -0.38750193,  0.61197998, -0.17137451],
        [-0.01664563, -0.52000097, -0.3608907 , -0.14984063],
        [-0.00474684, -0.18887149, -0.00924222,  0.94228361],
        [-0.46712774,  0.00389831,  0.03349951, -0.02080674],
        [-0.47223188,  0.02853952, -0.00504059,  0.00160266],
        [-0.01591788, -0.39205093,  0.55707516,  0.04356321],
        [-0.0552444 , -0.52034959, -0.36330956, -0.19023805]])

In [21]:
u[:,:4]

matrix([[-0.02173672, -0.41043862, -0.29555566, -0.31993924],
        [-0.01664767, -0.40868796, -0.29149768, -0.15138979],
        [-0.03763173, -0.27302481,  0.4269746 , -0.10978297],
        [-0.3928286 ,  0.03215633,  0.00283676,  0.02145489],
        [-0.68146521,  0.05125169,  0.00962441,  0.02067521],
        [-0.01031581, -0.35826614,  0.52908411,  0.06256753],
        [-0.60364271, -0.00222591, -0.02262313, -0.02723249],
        [-0.02078959, -0.4841342 , -0.34503998, -0.16062914],
        [-0.01290907, -0.35922701, -0.12620599,  0.9008227 ],
        [-0.00900549, -0.30733798,  0.47941858, -0.14085095],
        [-0.11812788,  0.00805012,  0.00186006, -0.00117811]])

In [22]:
sig4

matrix([[15.77075346,  0.        ,  0.        ,  0.        ],
        [ 0.        , 11.40670395,  0.        ,  0.        ],
        [ 0.        ,  0.        , 11.03044558,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  4.84639758]])