In [12]:
#..........................案例1:
#.......................... SVD简化数据（矩阵分解的一种类型）
#..........矩阵分解：Data(mxn)=U(mxm)Σ(mxn)VT(nxn)，Σ矩阵只有对角元素，这些元素就是奇异值
#..........PCA是特征值，Σ是Data*DataT的特征值的平方根
import numpy as np
U,Sigma,VT=np.linalg.svd([[1,1],[7,7]])
# 这里Sigma不是矩阵，是因为numpy的空间节省机制
print('奇异矩阵：\n',Sigma)
# 新数据来近似 Data(mxn)=U(mx1)Σ(1x1)VT(1xn)
# 试图构建原数据
print('构建近似原数据：\n',U[:,:1]*Sigma[0]*VT[:1,:])
# 可以用“将奇异值的平方和累加到总值的90%”的原则来取主要数据

奇异矩阵：
 [1.00000000e+01 2.82797782e-16]
构建近似原数据：
 [[1. 1.]
 [7. 7.]]


In [13]:
#..........................案例2:
#.......................... 基于协同过滤的推荐引擎
#.........可以基于物品或者基于用户计算相似度，看哪个少，一般基于物品
#.........相似度：Pearson相关系数、1/（1+欧式距离）、余弦相似度
# 相似度
import numpy as np
def ecludSim(inA,inB): # 欧氏距离
    return 1.0/(1.0 + np.linalg.norm(inA - inB))
def pearsSim(inA,inB): # 相关系数
    if len(inA) < 3 : return 1.0
    return 0.5+0.5*np.corrcoef(inA, inB, rowvar = 0)[0][1]
def cosSim(inA,inB): # 余弦距离
    num = float(inA.T*inB)
    denom = np.linalg.norm(inA)*np.linalg.norm(inB)
    return 0.5+0.5*(num/denom)

In [33]:
#..........................案例3:
#.......................... 推荐菜肴
Mat=np.mat([[0, 0, 0, 2, 2],[0, 0, 0, 3, 3],[0, 0, 0, 1, 1],
           [1, 1, 1, 0, 0],[2, 2, 2, 0, 0],[5, 5, 5, 0, 0],[1, 1, 1, 0, 0]])
Mat[0,1]=Mat[0,0]=Mat[1,0]=Mat[2,0]=4
Mat[3,3]=2
def standEst(dataMat, user, simMeas, item):
    n = np.shape(dataMat)[1]
    simTotal = 0.0; ratSimTotal = 0.0
    for j in range(n):
        userRating = dataMat[user,j]
        if userRating == 0: continue
        overLap = np.nonzero(np.logical_and(dataMat[:,item].A>0,dataMat[:,j].A>0))[0]
        if len(overLap) == 0: similarity = 0
        else: similarity = simMeas(dataMat[overLap,item],dataMat[overLap,j])
        print('the %d and %d similarity is: %f' % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: return 0
    else: return ratSimTotal/simTotal
# SVD
def svdEst(dataMat, user, simMeas, item, nd=4):
    n = np.shape(dataMat)[1]
    simTotal = 0.0; ratSimTotal = 0.0
    U,Sigma,VT = np.linalg.svd(dataMat)
    Sig4 = np.mat(np.eye(nd)*Sigma[:nd]) #arrange Sig4 into a diagonal matrix
    xformedItems = dataMat.T * U[:,:nd] * Sig4.I  #create transformed items
    for j in range(n):
        userRating = dataMat[user,j]
        if userRating == 0 or j==item: continue
        similarity = simMeas(xformedItems[item,:].T,xformedItems[j,:].T)
        # print('the %d and %d similarity is: %f' % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: return 0
    else: return ratSimTotal/simTotal
def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
    # N最高推荐数
    unratedItems = np.nonzero(dataMat[user,:].A==0)[1]#find unrated items
    if len(unratedItems) == 0: return 'you rated everything'
    itemScores = []
    for item in unratedItems:
        estimatedScore = estMethod(dataMat, user, simMeas, item)
        itemScores.append((item, estimatedScore))
    return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]
print('第1行用户（物品1、2）:\n',recommend(Mat,1))

Mat2=np.mat([[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],[0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
           [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],[3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
           [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],[0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
           [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],[0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
           [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],[0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
           [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]])
# 矩阵比较稀疏
U,Sigma,VT=np.linalg.svd(Mat2)
S=Sigma**2
print('总平方和：',sum(S))
print('取前3个',sum(S[:3])/sum(S)) # 取前3个
# 然后构造矩阵
print('SVD结果：\n',recommend(Mat2,1,estMethod=svdEst))





the 1 and 0 similarity is: 1.000000
the 1 and 3 similarity is: 0.928746
the 1 and 4 similarity is: 1.000000
the 2 and 0 similarity is: 1.000000
the 2 and 3 similarity is: 1.000000
the 2 and 4 similarity is: 0.000000
第1行用户（物品1、2）:
 [(2, 3.5), (1, 3.341443007335209)]
总平方和： 541.9999999999994
取前3个 0.9234322677630624
SVD结果：
 [(4, 3.344714938469228), (7, 3.3294020724526967), (9, 3.3281008763900686)]
