In [1]:
import torch
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, num_users,num_items, hidden_dim, layers):
        super(MLP,self).__init__()
        self.mlp_user_emb = nn.Embedding(num_users, hidden_dim)
        self.mlp_item_emb = nn.Embedding(num_items, hidden_dim)

        layers = [layers] if type(layers) is int else layers
        layers = [2*hidden_dim] + layers

        self.mlp_fc_layers = nn.ModuleList(nn.Linear(layers[i], layers[i+1]) for i in range(len(layers) -1))
        self.mlp_last = nn.Linear(layers[-1],1)

    def forward(self, user_indices, item_indices):
        u = self.mlp_user_emb(user_indices)
        i = self.mlp_item_emb(item_indices)
        output = torch.cat([u, i], dim = -1)
        for layer in self.mlp_fc_layers:
            output = layer(output)
            output = torch.relu(output)
        logit = self.mlp_last(output)
        result = torch.sigmoid(logit)
        return result    

In [2]:
import pandas as pd
from utils.data import SampleGenerator

ratings = pd.read_csv('dataset/ratings.csv')
ratings = ratings.rename(columns={'movieId': 'itemId'})

userId = list(set(ratings.userId))
new_userId = list(range(0,len(userId)))

df = pd.DataFrame({'userId':userId,'new_userId':new_userId})
ratings = pd.merge(ratings,df,how='left', on='userId')

itemId = list(set(ratings.itemId))
new_itemId = list(range(0,len(itemId)))

df = pd.DataFrame({'itemId':itemId,'new_itemId':new_itemId})
ratings = pd.merge(ratings,df,how='left', on='itemId')

ratings = ratings.drop(['userId', 'itemId'],axis = 1)
ratings = ratings.rename(columns={'new_userId':'userId', 'new_itemId':'itemId'})

data = SampleGenerator(ratings, implicit=True)
hidden_dim = 128
layers = [128]
lr = 0.001
batch_size = 2048
epochs = 15

num_users = data.num_users
num_items = data.num_items
num_negatives_train = 5
num_negatives_test = 500

cuda =  torch.cuda.is_available()

model = MLP(num_users, num_items, hidden_dim,layers)
criterion = nn.BCELoss()
optim = torch.optim.Adam(model.parameters(), lr)

if cuda:
    model.cuda()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  ratings['rating'][ratings['rating'] >0] = 1.0


In [3]:
import os
from utils.eval import Evaluation

if not os.path.exists("./checkpoint"):
    os.mkdir("./checkpoint")

test_loader, negative_loader = data.instance_test_loader(num_negatives = num_negatives_test, batch_size = batch_size)
  
for epoch in range(1,epochs+1):

    train_loader = data.instance_a_train_loader(num_negatives=num_negatives_train, batch_size=batch_size)
    total_loss = 0
    for batch_id, batch in enumerate(train_loader):
        user, item, rating = batch[0], batch[1], batch[2]
        rating = rating.float()
        if cuda:
            user, item, rating = user.cuda(), item.cuda(), rating.cuda()
        optim.zero_grad()
        pred = model(user,item)
        loss = criterion(pred.view(-1), rating)
        loss.backward()
        optim.step()
        total_loss += loss.item()
    print("epoch{0} loss:{1:.4f}".format(epoch, total_loss))
  
    torch.save(model.state_dict(), "./checkpoint/mlp.pt")
  
    with torch.no_grad():
        test_users, test_items, test_preds = list(), list(), list()
        neg_users, neg_items, neg_preds = list(), list(), list()

        for batch in test_loader:
            user, item = batch[0], batch[1]
            test_users += user.data.view(-1).tolist()
            test_items += item.data.view(-1).tolist()

            if cuda:
                user, item = user.cuda(), item.cuda()
            pred = model(user,item)
            if cuda:
                pred = pred.cpu()

            test_preds += pred.data.view(-1).tolist()

        for batch in negative_loader:
            user, item = batch[0], batch[1]
            neg_users += user.data.view(-1).tolist()
            neg_items += item.data.view(-1).tolist()

            if cuda:
                user, item = user.cuda(), item.cuda()
            pred = model(user,item)
            if cuda:
                pred =  pred.cpu()

            neg_preds += pred.data.view(-1).tolist()

        eval = Evaluation([test_users, test_items, test_preds,
                           neg_users, neg_items, neg_preds])
        eval.print_eval_score_k(10)

epoch1 loss:10.2209
recall@10:0.2530, prec@10:0.0774
epoch2 loss:1.4937
recall@10:0.2576, prec@10:0.0826
epoch3 loss:0.4831
recall@10:0.2654, prec@10:0.0866
epoch4 loss:0.2485
recall@10:0.2632, prec@10:0.0867
epoch5 loss:0.1537
recall@10:0.2643, prec@10:0.0872
epoch6 loss:0.1039
recall@10:0.2672, prec@10:0.0886
epoch7 loss:0.0767
recall@10:0.2653, prec@10:0.0884
epoch8 loss:0.0583
recall@10:0.2657, prec@10:0.0885
epoch9 loss:0.0466
recall@10:0.2692, prec@10:0.0893
epoch10 loss:0.0368
recall@10:0.2734, prec@10:0.0918
epoch11 loss:0.0296
recall@10:0.2753, prec@10:0.0923
epoch12 loss:0.0252
recall@10:0.2754, prec@10:0.0939
epoch13 loss:0.0232
recall@10:0.2743, prec@10:0.0929
epoch14 loss:0.0195
recall@10:0.2747, prec@10:0.0938
epoch15 loss:0.0158
recall@10:0.2743, prec@10:0.0934
