# Load MovieLen data

In [1]:
import numpy as np
import pandas as pd
import os
# data path
data_path = '/Users/wenyi/Desktop/个人/学习/常用算法/data/ml-1m/ratings.dat'
data = pd.read_csv(data_path, sep='::', names=['user', 'item', 'rating', 'timestamp'])

  


In [2]:
data.head()

Unnamed: 0,user,item,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [3]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(data)

# MF Model

In [4]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset,DataLoader

In [14]:
class MF(nn.Module):
    def __init__(self, num_user, num_item, regulation_rate=1.0, k=18):
        super(MF, self).__init__()
        self.num_user = num_user
        self.num_item = num_item
        self.k = k
        self.regulation_rate = regulation_rate

        # embedding vector
        self.user_embed = nn.Embedding(num_user+1, self.k)
        self.item_embed = nn.Embedding(num_item+1, self.k)
        
        # init weight
        self.user_embed.weight.data.uniform_(0,0.05)
        self.item_embed.weight.data.uniform_(0,0.05)

    def forward(self, train_x):
        user_id = train_x[:,0]
        item_id = train_x[:,1]

        user_vec = self.user_embed(user_id)
        item_vec = self.item_embed(item_id)
        user_item_interaction = torch.sum(user_vec*item_vec, dim=1)
        return user_item_interaction

In [6]:
train_tensor = TensorDataset(torch.from_numpy(train[['user','item']].values), torch.from_numpy(train['rating'].values))
test_tensor = TensorDataset(torch.from_numpy(test[['user','item']].values), torch.from_numpy(test['rating'].values))

In [7]:
train_loader = DataLoader(train_tensor, batch_size=256, shuffle=True)
test_loader = DataLoader(test_tensor, batch_size=256, shuffle=True)

In [35]:
def train(model, train_loader, test_loader, epochs=3, lr=0.01, wd=0.0):
    optimizer = torch.optim.Adam(model.parameters(), lr, weight_decay=wd)
    model.train()
    min_loss = 999
    for i in range(epochs):
        print("Epoch %d" %(i+1))
        sum_loss = 0.0
        total = 0
        
        for features, labels in train_loader:
            batch = features.shape[0]
            y_pred = model(features)
            loss = F.mse_loss(y_pred, labels.view(-1, batch)[0].float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total += batch
            sum_loss += batch*loss.item()
        print("train loss %.3f" %(sum_loss/total))
        test_los = test_loss(model, test_loader)
        if test_los < min_loss:
            min_loss = test_los
            print("test loss %.3f" %(test_los))
        else:
            break

In [36]:
def test_loss(model, test_loader):
    model.eval()
    sum_loss = 0.0
    min_loss = 999
    total = 0
    for features, labels in test_loader:
        batch = features.shape[0]
        y_pred = model(features)
        loss = F.mse_loss(y_pred, labels.view(-1, batch)[0].float())
        total += batch
        sum_loss += batch*loss.item()
    return sum_loss/total

In [37]:
num_user = data['user'].max()
num_item = data['item'].max()
model = MF(num_user, num_item)
train(model, train_loader, test_loader,20, 0.001)

Epoch 1
train loss 4.143
test loss 1.068
Epoch 2
train loss 0.927
test loss 0.877
Epoch 3
train loss 0.859
test loss 0.860
Epoch 4
train loss 0.852
test loss 0.856
Epoch 5
train loss 0.849
test loss 0.855
Epoch 6
train loss 0.848
test loss 0.853
Epoch 7
train loss 0.847


In [48]:
# 根据训练的模型进行推荐
# 方法：模型通过优化学习到了user_embeding和item_embedding对于每个用户可以用相应的user_embedding
# 和所有的item_embedding进行矩阵相乘得到用户对每部电影的rating，根据rating排序去topN推荐（这里要去掉用户已经评分的电影）
def recommend(user, k=10):
    """
    根据用户id给用户推荐topK个电影
    user: int 用户id
    k: int topK result
    """
    #用户对所有的movie的评分
    score = torch.sum(model.user_embed.weight[user]*model.item_embed.weight,dim=1).detach().numpy()
    score = [(val, i) for i, val in enumerate(score)]
    score = sorted(score, key=lambda x:x[0], reverse=True)
    
    # 根据评分推荐
    rec_result = []
    for val, i in score:
        if i not in train[train['user']==user]['item'].tolist():
            if len(rec_result) == k:
                break
            rec_result.append(i)
    return rec_result

In [50]:
train[train['user']==1]['item']

TypeError: 'function' object is not subscriptable

In [49]:
user = 2
k = 10
rec_result = recommend(user,k)

TypeError: 'function' object is not subscriptable