In [19]:
%%html
<img src="./6-1.jpg" height=480 width=640>

In [2]:
import numpy as np

scoreData = np.mat([
    [5,2,1,4,0,0,2,4,0,0,0],
    [0,0,0,0,0,0,0,0,0,3,0],
    [1,0,5,2,0,0,3,0,3,0,1],
    [0,5,0,0,4,0,1,0,0,0,0],
    [0,0,0,0,0,4,0,0,0,4,0],
    [0,0,1,0,0,0,1,0,0,5,0],
    [5,0,2,4,2,1,0,3,0,1,0],
    [0,4,0,0,5,4,0,0,0,0,5],
    [0,0,0,0,0,0,4,0,4,5,0],
    [0,0,0,4,0,0,1,5,0,0,0],
    [0,0,0,0,4,5,0,0,0,0,3],
    [4,2,1,4,0,0,2,4,0,0,0],
    [0,1,4,1,2,1,5,0,5,0,0],
    [0,0,0,0,0,4,0,0,0,4,0],
    [2,5,0,0,4,0,0,0,0,0,0],
    [5,0,0,0,0,0,0,4,2,0,0],
    [0,2,4,0,4,3,4,0,0,0,0],
    [0,3,5,1,0,0,4,1,0,0,0]
])

In [3]:
# 衡量菜品间相似性:余弦相似度(0,1)
def cosSim(vec_1, vec_2):
    dotprod = float(np.dot(vec_1.T, vec_2))
    normprod = np.linalg.norm(vec_1) * np.linalg.norm(vec_2)
    return 0.5 + 0.5 * (dotprod / normprod)

In [4]:
# test
print(cosSim(scoreData[:,0], scoreData[:, 1]))

0.6523179489612356


In [5]:
# 真实稀疏矩阵的降维处理
U, sigma, VT = np.linalg.svd(scoreData)
print(sigma)

[18.00984878 13.34523472 11.52884033 10.1161419   7.13556169  5.86405759
  4.87893356  3.59711712  3.28710923  2.48996847  2.06103963]


In [7]:
# 18位顾客给11个菜打分，不可能每个人吃遍每道菜
# 为避免稀疏矩阵的不同，我们采用SVD进行行压缩（18->6)
# 使得贡献率达到90，即奇异值的平方和达到所有平方和的90%
sigmaSum = 0
k_num = 0

for k in range(len(sigma)):
    sigmaSum += sigma[k] * sigma[k]
    if float(sigmaSum) / float(np.sum(sigma ** 2)) > 0.9:
        k_num = k + 1
        break
print(k_num) 

6


In [9]:
# 通过行压缩的方式进行矩阵行压缩，推荐算法中通常还需要乘以奇异值方阵，赋予其对应的权重值
# 最终获得降维后为6x11行的压缩矩阵scoreDataRc
sigma_K = np.mat(np.eye(6) * sigma[:6])
scoreDataRc = sigma_K * U.T[:6,:] * scoreData
print(scoreDataRc)

[[-112.4308753  -112.87222698 -124.19623361 -105.3993477  -111.288632
   -73.59389971 -135.0414711  -100.44297783  -64.70437823  -40.78142832
   -36.26815254]
 [ -72.48369701   41.51056586    2.73164141  -63.4068466    80.85031966
    74.17305344    5.56275757  -78.96337678    0.5442874    22.36535334
    43.68006783]
 [  37.12342785   37.62324399  -48.30321076   12.27825448   44.01558208
    15.58603044  -61.15421157   29.1271841   -51.75734522  -48.33639061
    24.5927832 ]
 [  17.52124987  -26.0972729   -31.74323843    6.7731707    -9.84514566
    43.42277156  -20.38567072   17.78646057   -3.58400334   75.2486827
     6.44560751]
 [  -4.65216236  -30.40184468   14.31575194    8.88222668   -3.18752866
    25.17373196   -2.36071622    3.80908229    0.60261906  -21.93806491
    14.73475607]
 [  12.3915557    -6.28064351  -10.81041971   -9.75679724    6.46828122
    -3.64007586   -1.80356759   -1.88718634   25.44954779   -5.17787313
     6.4052445 ]]


In [10]:
# 评分估计：4个参数分别为：原始菜品打分矩阵，行压缩矩阵，用户位于scoreData中的行索引，菜品位于scoreData中的列索引
def estScore(scoreData, scoreDataRc, userIndex, itemIndex):
    n = np.shape(scoreData)[1]
    simSum = 0
    simSumScore = 0
    for i in range(n):
        userScore = scoreData[userIndex, i]
        if userScore==0 or i==itemIndex:
            continue
        sim = cosSim(scoreDataRc[:, i], scoreDataRc[:, itemIndex])
        simSum = float(simSum + sim)
        simSumScore = simSumScore + sim * userScore
    if simSum == 0:
        return 0
    return simSumScore / simSum

In [11]:
# 菜品结果推荐
n = np.shape(scoreData)[1]
userIndex = 17 #最后一行为待预测用户

for i in range(n):
    userScore = scoreData[17, i]
    if userScore != 0:
        continue
    print("index:{},score:{}".format(i, estScore(scoreData, scoreDataRc, userIndex, i)))

index:0,score:2.634711671533117
index:4,score:2.9259893459771122
index:5,score:2.933723884808588
index:8,score:2.9657073178482745
index:9,score:2.905707343296552
index:10,score:2.9263484655262877


In [12]:
%%html
<img src="./推荐系统.jpg" height=480 width=640>