In [42]:
import datetime
import numpy as np
import pandas as pd
from collections import Counter
import heapq

import torch
from torch.utils.data import DataLoader, Dataset, TensorDataset

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# from torchkeras import summary, Model

import warnings
warnings.filterwarnings('ignore')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [43]:
# 一些超参数设置
num_factors = 8
batch_size = 64
lr = 0.001

In [44]:
# path='/Users/bhj/Documents/code/working_directory/FM/ml-1m/'
# rnames = ['userID', 'itemID', 'rating', 'timestamp']
# ratings = pd.read_table(path+'ratings.dat', sep='::', header=None, names=rnames, engine='python')
# del ratings['timestamp']

train = pd.read_table('/Users/bhj/Documents/code/working_directory/AI-RecommenderSystem-master/Recall/NeuralCF/Data/ml-1m.train.rating', header=None, names=rnames, engine='python')
test = pd.read_table('/Users/bhj/Documents/code/working_directory/AI-RecommenderSystem-master/Recall/NeuralCF/Data/ml-1m.test.rating', header=None, names=rnames, engine='python')

users_list = train['userID'].tolist()
items_list = train['itemID'].tolist()
ratings_list = train['rating'].tolist()

users_test_list = test['userID'].tolist()
items_test_list = test['itemID'].tolist()
ratings_test_list = test['rating'].tolist()

In [45]:
num_users, num_items = 6040, 3706

In [46]:
user_input, item_input, labels = users_list, items_list, ratings_list

In [47]:
train_x = np.vstack([user_input, item_input]).T
labels = np.array(labels)

In [48]:
# 构建成Dataset和DataLoader
train_dataset = TensorDataset(torch.tensor(train_x), torch.tensor(labels).float())
dl_train = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [58]:
class GMF(nn.Module):
    
    def __init__(self, num_users, num_items, latent_dim, regs=[0, 0]):
        super(GMF, self).__init__()
        self.MF_Embedding_User = nn.Embedding(num_embeddings=num_users, embedding_dim=latent_dim)
        self.MF_Embedding_Item = nn.Embedding(num_embeddings=num_items, embedding_dim=latent_dim)
        self.linear = nn.Linear(latent_dim, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, inputs):
        # 这个inputs是一个批次的数据， 所以后面的操作切记写成inputs[0], [1]这种， 这是针对某个样本了， 我们都是对列进行的操作
        
        # 先把输入转成long类型
        inputs = inputs.long()
        
        # 用户和物品的embedding
        MF_Embedding_User = self.MF_Embedding_User(inputs[:, 0])  # 这里踩了个坑， 千万不要写成[0]， 我们这里是第一列
        MF_Embedding_Item = self.MF_Embedding_Item(inputs[:, 1])
        
        # 两个隐向量点积
        predict_vec = torch.mul(MF_Embedding_User, MF_Embedding_Item)
        
        # liner
        linear = self.linear(predict_vec)
        output = self.sigmoid(linear)
        
        return output

In [60]:
# 看一下这个网络
model = GMF(1, 1, 10)
model.to(device)
# summary(model, input_shape=(2,))

GMF(
  (MF_Embedding_User): Embedding(1, 10)
  (MF_Embedding_Item): Embedding(1, 10)
  (linear): Linear(in_features=10, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [61]:
## 设置
model = GMF(num_users, num_items, num_factors)
model.to(device)

GMF(
  (MF_Embedding_User): Embedding(6040, 8)
  (MF_Embedding_Item): Embedding(3706, 8)
  (linear): Linear(in_features=8, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [68]:
def evaluate_model_me(model, testRatings):
    users_test_list = testRatings['userID'].tolist()
    items_test_list = testRatings['itemID'].tolist()
    ratings_test_list = testRatings['rating'].tolist()
    test_tenser = torch.tensor(np.vstack([users_test_list, items_test_list]).T)
    predictions = model(test_tenser).T
    labels_tenser = torch.tensor(np.vstack(ratings_test_list).T)
    loss_fn = nn.MSELoss()
    RMSE_loss = torch.sqrt(loss_fn(5*predictions,labels_tenser))
    return RMSE_loss
#     return loss_func(predictions, labels_tenser)

In [62]:
# 简单测试一下模型
for (x, y) in iter(dl_train):
#     x = x.cuda()
    print(model(x))
    break

tensor([[0.6151],
        [0.7772],
        [0.6162],
        [0.5962],
        [0.3009],
        [0.3870],
        [0.6419],
        [0.6659],
        [0.5364],
        [0.4572],
        [0.6781],
        [0.7658],
        [0.6009],
        [0.3341],
        [0.5436],
        [0.7983],
        [0.7142],
        [0.8712],
        [0.6437],
        [0.5127],
        [0.5830],
        [0.5788],
        [0.5292],
        [0.4882],
        [0.4307],
        [0.1962],
        [0.7406],
        [0.5971],
        [0.4481],
        [0.5653],
        [0.5008],
        [0.4597],
        [0.5370],
        [0.1038],
        [0.6946],
        [0.5957],
        [0.7276],
        [0.3172],
        [0.4496],
        [0.3748],
        [0.3461],
        [0.5511],
        [0.4664],
        [0.6041],
        [0.4056],
        [0.3122],
        [0.3695],
        [0.6960],
        [0.6121],
        [0.6124],
        [0.7112],
        [0.3552],
        [0.2633],
        [0.5279],
        [0.4425],
        [0

In [54]:
# 训练参数设置
loss_func = nn.BCELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)

In [69]:
# 模型训练 
# best_hr, best_ndcg, best_iter = hr, ndcg, -1

epochs = 20
log_step_freq = 5000

# 模型训练 
lr = 0.003
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
# def loss_func(prediction, target):
#     loss_fn = nn.MSELoss()
#     RMSE_loss = torch.sqrt(loss_fn(prediction, target))
#     return RMSE_loss
# best_hr, best_ndcg, best_iter = hr, ndcg, -1

epochs = 30
log_step_freq = 5000
for epoch in range(epochs):
    
    # 训练阶段
    model.train()
    loss_sum = 0.0
    for step, (features, labels) in enumerate(dl_train, 1):
        
#         features, labels = features.cuda(), labels.cuda()
        # 梯度清零
        optimizer.zero_grad()
        
        # 正向传播
        predictions = model(features)
        predictions = predictions.squeeze(-1)
        loss = loss_func(predictions, 0.2*labels)
        
        # 反向传播求梯度
        loss.backward()
        optimizer.step()
        
        # 打印batch级别日志
        loss_sum += loss.item()
        if step % log_step_freq == 0:
            print(("[step = %d] loss: %.3f") %
                  (step, loss_sum/step))
    
    # 验证阶段
    model.eval()
    rmse_ = evaluate_model_me(model, test)
        
    info = (epoch, rmse_)
    print(("\nEPOCH = %d, loss = %.3f") %info)
print('Finished Training...') 

[step = 5000] loss: 0.539
[step = 10000] loss: 0.540
[step = 15000] loss: 0.540

EPOCH = 0, loss = 0.929
[step = 5000] loss: 0.538
[step = 10000] loss: 0.539
[step = 15000] loss: 0.539

EPOCH = 1, loss = 0.920
[step = 5000] loss: 0.537
[step = 10000] loss: 0.538
[step = 15000] loss: 0.538

EPOCH = 2, loss = 0.928
[step = 5000] loss: 0.537
[step = 10000] loss: 0.537
[step = 15000] loss: 0.538

EPOCH = 3, loss = 0.925
[step = 5000] loss: 0.536
[step = 10000] loss: 0.537
[step = 15000] loss: 0.537

EPOCH = 4, loss = 0.926
[step = 5000] loss: 0.536
[step = 10000] loss: 0.536
[step = 15000] loss: 0.537

EPOCH = 5, loss = 0.922
[step = 5000] loss: 0.535
[step = 10000] loss: 0.536
[step = 15000] loss: 0.537

EPOCH = 6, loss = 0.931
[step = 5000] loss: 0.535
[step = 10000] loss: 0.536
[step = 15000] loss: 0.536

EPOCH = 7, loss = 0.927
[step = 5000] loss: 0.535
[step = 10000] loss: 0.535
[step = 15000] loss: 0.536

EPOCH = 8, loss = 0.929
[step = 5000] loss: 0.534
[step = 10000] loss: 0.535
[s