# 极客时间 - 程序员基础数学课 - [第38课 - 矩阵(下)协同过滤](https://time.geekbang.org/column/article/85562)



## 基于用户的协同过滤

In [1]:
import numpy as np

# 用户 - 物品 喜好程度
X = np.array([
    [0.11, 0.20, 0.0],
    [0.81, 0.0, 0.0],
    [0.0, 0.88, 0.74],
    [0.0, 0.0, 0.42],
])

i = X.shape[0]
j = X.shape[1]

# X的转置矩阵X‘
Xt = X.transpose()

print("X矩阵:")
print(X)

print("X转置矩阵X':")
print(Xt)

X矩阵:
[[0.11 0.2  0.  ]
 [0.81 0.   0.  ]
 [0.   0.88 0.74]
 [0.   0.   0.42]]
X转置矩阵X':
[[0.11 0.81 0.   0.  ]
 [0.2  0.   0.88 0.  ]
 [0.   0.   0.74 0.42]]


In [2]:
# 得到 X 乘以 X'的结果
T = X.dot(Xt)

print("XX':")
print(T)

XX':
[[0.0521 0.0891 0.176  0.    ]
 [0.0891 0.6561 0.     0.    ]
 [0.176  0.     1.322  0.3108]
 [0.     0.     0.3108 0.1764]]


In [3]:
# 计算US，用户相似度矩阵
US = np.zeros((i, i))

for i1 in range(len(US)):
  for i2 in range(len(US[i1])):
    US[i1][i2] = T[i1][i2] / (np.sqrt(T[i1][i1] * T[i2][i2]))

print("US用户相似度矩阵:")
print(US)

US用户相似度矩阵:
[[1.         0.48191875 0.67062244 0.        ]
 [0.48191875 1.         0.         0.        ]
 [0.67062244 0.         1.         0.64359994]
 [0.         0.         0.64359994 1.        ]]


In [4]:
# 计算USP
USP = US.dot(X)

print("USP:")
print(USP)

# 计算USR，为了把USP归一化

# 把US根据行求和
rowSum = US.sum(axis=1)

print(rowSum)

USR = np.zeros(USP.shape)

for ii in range(len(USR)):
    USR[ii] = rowSum[ii]

print("USR:")
print(USR)

USP:
[[0.50035419 0.79014774 0.4962606 ]
 [0.86301106 0.09638375 0.        ]
 [0.07376847 1.01412449 1.01031197]
 [0.         0.56636794 0.89626395]]
[2.15254119 1.48191875 2.31422237 1.64359994]
USR:
[[2.15254119 2.15254119 2.15254119]
 [1.48191875 1.48191875 1.48191875]
 [2.31422237 2.31422237 2.31422237]
 [1.64359994 1.64359994 1.64359994]]


In [5]:
# 计算 P = USP/USR

P = USP / USR
print("P=USP/USR")
print(P)

P=USP/USR
[[0.23244814 0.36707671 0.23054639]
 [0.58236058 0.06503983 0.        ]
 [0.03187614 0.43821393 0.43656651]
 [0.         0.3445899  0.54530542]]


## 基于物品的协同过滤

In [6]:
import numpy as np

# 用户 - 物品 喜好程度
X = np.array([
    [0.11,0.20,0.0],
    [0.81,0.0,0.0],
    [0.0,0.88,0.74],
    [0.0,0.0,0.42],
])

i = X.shape[0]
j = X.shape[1]

# X的转置矩阵X‘
Xt = X.transpose()

print("X矩阵:")
print(X)

print("X转置矩阵X':")
print(Xt)


X矩阵:
[[0.11 0.2  0.  ]
 [0.81 0.   0.  ]
 [0.   0.88 0.74]
 [0.   0.   0.42]]
X转置矩阵X':
[[0.11 0.81 0.   0.  ]
 [0.2  0.   0.88 0.  ]
 [0.   0.   0.74 0.42]]


In [7]:
# 得到 X‘ 乘以 X的结果
T = Xt.dot(X)

print("X'X:")
print(T)

X'X:
[[0.6682 0.022  0.    ]
 [0.022  0.8144 0.6512]
 [0.     0.6512 0.724 ]]


In [8]:
# 计算IS，物品相似度矩阵
IS = np.zeros((j, j))

for j1 in range(len(IS)):
  for j2 in range(len(IS[j1])):
    IS[j1][j2] = T[j1][j2] / (np.sqrt(T[j1][j1] * T[j2][j2]))

print("IS物品相似度矩阵:")
print(IS)

IS物品相似度矩阵:
[[1.         0.02982295 0.        ]
 [0.02982295 1.         0.84805931]
 [0.         0.84805931 1.        ]]


In [9]:
# 计算ISP，用户对物品K的喜好程度
ISP = X.dot(IS)

print("ISP:")
print(ISP)

# 计算ISR，为了把ISP归一化

# 把IS根据列求和
colSum = IS.sum(axis=0)

print(rowSum)

ISR = np.zeros(ISP.shape)

for ii in range(len(ISR)):
    ISR[ii] = colSum

print("ISR:")
print(ISR)

ISP:
[[0.11596459 0.20328052 0.16961186]
 [0.81       0.02415659 0.        ]
 [0.02624419 1.50756389 1.48629219]
 [0.         0.35618491 0.42      ]]
[2.15254119 1.48191875 2.31422237 1.64359994]
ISR:
[[1.02982295 1.87788226 1.84805931]
 [1.02982295 1.87788226 1.84805931]
 [1.02982295 1.87788226 1.84805931]
 [1.02982295 1.87788226 1.84805931]]


In [10]:
# 计算 P = ISP / ISR
P = ISP / ISR

print("P=ISP/ISR:")
print(P)

P=ISP/ISR:
[[0.11260634 0.10824988 0.09177837]
 [0.78654297 0.01286374 0.        ]
 [0.02548418 0.80280001 0.80424486]
 [0.         0.18967372 0.22726543]]
