In [None]:
import numpy as np
import torch
from recsys_utils import *

In [None]:
#Load data
X, W, b, num_movies, num_features, num_users = load_precalc_params_small()
Y, R = load_ratings_small()

print("Y", Y.shape, "R", R.shape)
print("X", X.shape)
print("W", W.shape)
print("b", b.shape)
print("num_features", num_features)
print("num_movies",   num_movies)
print("num_users",    num_users)

# print(X)
print(W)

In [None]:
#  From the matrix, we can compute statistics like average rating.
tsmean =  np.mean(Y[0, R[0, :].astype(bool)])
print(f"Average rating for movie 1 : {tsmean:0.3f} / 5" )

In [None]:
# define loss function
X = torch.tensor(X, requires_grad=True)
R = torch.tensor(R)
Y = torch.tensor(Y)
# 是不是复制别人的参数就不能作为grad
# 但是可以手动设置requires_grad=True
W = torch.tensor(W, requires_grad=True)
b = torch.tensor(b, requires_grad=True)

print(X.is_leaf, W.is_leaf, b.is_leaf, Y.is_leaf, R.is_leaf)
print(X.requires_grad, W.requires_grad, b.requires_grad, Y.requires_grad, R.requires_grad)




In [None]:

# loss = torch.sum(torch.mul(R, torch.pow(torch.mm(F, W.t()) + b - Y, 2))) / 2

def loss_func(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Args:
      X (ndarray (num_movies,num_features)): matrix of item features
      W (ndarray (num_users,num_features)) : matrix of user parameters
      b (ndarray (1, num_users)            : vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
    Returns:
      J (float) : Cost
    """
    # define loss function
    X = torch.Tensor(X)
    W = torch.Tensor(W)
    b = torch.Tensor(b)
    R = torch.Tensor(R)
    Y = torch.Tensor(Y)
    # torch.mm torch.matmul都是matrix multiplication
    # 但是torch.mm不会boradcast
    J = torch.sum(torch.mul(R, torch.pow(torch.mm(X, W.t()) + b - Y, 2))) / 2
    # considier regularization
    J += lambda_ * torch.sum(torch.pow(W, 2)) / 2
    J += lambda_ * torch.sum(torch.pow(X, 2)) / 2
    return J.item()

def loss_fun_tensor(X, W, b, Y, R, lambda_):
    J = torch.sum(torch.mul(R, torch.pow(torch.mm(X, W.t()) + b - Y, 2))) / 2
    # considier regularization
    # J += lambda_ * torch.sum(torch.pow(W, 2)) / 2
    # J += lambda_ * torch.sum(torch.pow(X, 2)) / 2
    return J


# Public tests
from public_tests import *
test_cofi_cost_func(loss_func)


In [None]:
# do gradient descent
# loss = loss_fun_tensor(X, W, b, Y, R, 0.1)
# loss.backward()

# J = torch.sum(torch.mul(R, torch.pow(torch.mm(X, W.t()) + b - Y, 2))) / 2
lambda_ = 0.1
J = 0
J = torch.sum(torch.mul(R, torch.pow(torch.mm(X, W.t()) + b - Y, 2))) / 2
# considier regularization
J += lambda_ * torch.sum(torch.pow(W, 2)) / 2
J += lambda_ * torch.sum(torch.pow(X, 2)) / 2


# J = torch.sum(torch.mm(X, W.t()) + b - Y)
print(J.item())
J.backward()

# X为什么不是leaf node
print(b.requires_grad)
print(b.is_leaf)
print(b.grad)

# 终于能算出数来了
# 问题来了 怎么使用optimizer呢
# 就可以直接这么用吗??
# 也对啊 毕竟导数就存放在这些tensor里面
optimizer = torch.optim.SGD([W, b, X], lr=0.01)
optimizer.step()
# 清空之前计算的导数
optimizer.zero_grad()




In [None]:
# 重新测试几个例子
a = torch.tensor([2.0])
b = torch.tensor([3.0])
c = torch.mul(a, b)

print(c)
print(a.is_leaf)
print(b.is_leaf)
print(c.is_leaf)

# 我才tm发现 有一个大写的Tensor 还有一个小写的tensor
# cao 小写的就可以指定为grad了
# tensor这个函数可以返回一个大写的Tensor
a = torch.tensor(2.0, requires_grad=True)
b = torch.tensor(3.0)
c = torch.mul(a, b)

print(c)
print(a.is_leaf)
print(a.requires_grad)
print(b.is_leaf)
print(b.requires_grad)
print(c.is_leaf)
print(c.requires_grad)

c.backward()
# c = a * b
# 6 = 2 * 3
# a.grad = dc_da = b = 3
print(a.grad)


In [93]:
# 现在正式开始写

def train_loop():
    # load data
    X, W, b, num_movies, num_features, num_users = load_precalc_params_small()
    # Y, R = load_ratings_small()

    # 好像这个数据并不是我们需要的格式
    # 我们也并没有对数据进行nomalization
    # Reload ratings and add new ratings
    Y, R = load_ratings_small()

    # Normalize the Dataset
    Ynorm, Ymean = normalizeRatings(Y, R)

    # 要不随机一下初始参数
    # X_tensor = torch.tensor(X, requires_grad=True)
    R_tensor = torch.tensor(R)
    Y_tensor = torch.tensor(Ynorm)
    # 是不是复制别人的参数就不能作为grad
    # 但是可以手动设置requires_grad=True
    # W_tensor = torch.tensor(W, requires_grad=True)
    # b_tensor = torch.tensor(b, requires_grad=True)

    X_tensor = torch.randn(num_movies, num_features, requires_grad=True)
    W_tensor = torch.randn(num_users, num_features, requires_grad=True)
    b_tensor = torch.randn(1, num_users, requires_grad=True)

    lambda_ = 1

    # 为什么换个算法就行了??
    # 从SGD换成Adam就可以了??
    optimizer = torch.optim.Adam([W_tensor, b_tensor, X_tensor], lr=0.1)
    epoch = 200
    # 为什么pytorch和tensorflow用相同的参数相同的实现 但是pytorch收敛的这么慢
    for e in range(epoch):

        # prediction

        # forward
        # 为什么loss越来越大
        J = torch.sum(torch.mul(R_tensor, torch.pow(torch.mm(X_tensor, W_tensor.t()) + b_tensor - Y_tensor, 2))) / 2
        # considier regularization
        J += lambda_ * torch.sum(torch.pow(W_tensor, 2)) / 2
        J += lambda_ * torch.sum(torch.pow(X_tensor, 2)) / 2
        # backward
        print(J.item())
        J.backward()
        # update
        
        optimizer.step()
        optimizer.zero_grad()
        

train_loop()

# 我还是先简单的拟合一个线性模型吧
# 为什么 线性模型就没有问题

251134.53989979601
184425.07524261376
137134.28863682644
103904.85995904237
80541.54141590775
64023.0742580964
52229.31058927747
43696.93973841839
37425.95398606721
32734.165354433553
29154.379977684563
26365.334110161777
24145.372534754177
22341.405333758314
20847.68215371242
19590.6415633661
18518.201680442464
17592.5446745026
16785.351320797672
16074.742839617378
15443.358714986385
14877.160650588869
14364.713586253913
13896.766587192189
13465.982095774587
13066.695000042579
12694.59137292023
12346.363255098318
12019.401481423263
11711.545013022262
11420.912593822603
11145.84802122177
10884.942572241329
10637.073192262662
10401.434959344891
10177.507855925975
9964.908713010294
9763.169501729983
9571.610728438212
9389.389145795732
9215.657604301408
9049.707328029828
8891.034126764198
8739.328814106551
8594.42438347288
8456.207941366369
8324.51647918453
8199.087617896508
8079.606526649339
7965.7612341453
7857.260768742316
7753.81898609514
7655.115438488209
7560.821268701158
7470.66048

In [None]:
# 然后是进行推荐
# 对所有用户都可以进行推荐

# 所谓推荐其实就是根据用户的特征向量和电影的特征向量进行相乘
# 其实就是一个prediction
p = torch.mm(X_tensor, W_tensor.t()) + b_tensor
# 然后加上用户的平均评分
p += Ymean

# 每一列代表一个用户
# 我们最好可以去掉用户已经评分的电影
# 然后再进行排序
# 可以只显示前十

