In [1]:
from recom.datasets import load_ml_small_rating

# load data
# not that I use leave-one-out method to construct the testing set, where
# the latest rated item is masked and added to the testing set as an evaluation.
dataset = load_ml_small_rating(need_raw=True, time_ord=True, test_perc=0.1)

# load features
ratings = dataset['raw']    
ratings_train_dict = dataset['train_dict']
ratings_test_dict = dataset['test_dict']
n_user = dataset['n_user']
n_item = dataset['n_item']
user2ix = dataset['user2ix']
ix2user = dataset['ix2user']
item2ix = dataset['item2ix']
ix2item = dataset['ix2item']

del dataset

print(f'Users: {n_user}, Items: {n_item}. Sparsity: {round(1-len(ratings)/n_user/n_item, 4)}')
print(f'User reduced from {len(user2ix.keys())} to {len(ratings_train_dict.keys())}')

Users: 609, Items: 9562. Sparsity: 0.983
User reduced from 609 to 608


In [8]:
import torch.nn as nn
from torch import Tensor, LongTensor, sigmoid, concat
from torch.nn.functional import logsigmoid


class GMF(nn.Module):
    """ General Matrix Factorization """
    def __init__(self, n_user, n_item, k_dim) -> None:
        super(GMF, self).__init__()
        # embeddings
        self.embedding_user = nn.Embedding(n_user, k_dim)
        self.embedding_item = nn.Embedding(n_item, k_dim)
        # weights
        self.linear = nn.Linear(k_dim, 1)
        # activation
        self.sigmoid = sigmoid
        # init param
        nn.init.normal_(self.embedding_user.weight, mean=0, std=1)
        nn.init.normal_(self.embedding_item.weight, mean=0, std=1)

    def pred_score(self, user, item):
        user_emb = self.embedding_user(user)
        item_emb = self.embedding_item(item)

        return user_emb*item_emb

    def forward(self, user, item):
        return self.sigmoid(self.linear(
            self.pred_score(user, item)
        ))


class MLP(nn.Module):
    """ Multi-Layer Perceptron """
    def __init__(self, n_user, n_item, out_dim 
                     , num_layers, p_dropout=0.05) -> None:
        
        super(MLP, self).__init__()
        # embeddings
        emb_dim = out_dim * num_layers
        self.embedding_user = nn.Embedding(n_user, emb_dim)
        self.embedding_item = nn.Embedding(n_item, emb_dim)
        # MLP blocks
        MLP_Layer = []
        for i in range(num_layers):
            input_size = 2 * out_dim * (num_layers-i)
            MLP_Layer.append(nn.Dropout(p_dropout))
            MLP_Layer.append(nn.Linear(input_size, input_size//2))
            MLP_Layer.append(nn.ReLU())        
        self.MLP_Layer = nn.Sequential(*MLP_Layer) # Open the MLP layer
        # Last linear layer
        self.Linear = nn.Linear(out_dim, 1)
        # activations
        self.sigmoid = sigmoid
        # init param
        nn.init.normal_(self.embedding_user.weight, mean=0, std=1)
        nn.init.normal_(self.embedding_item.weight, mean=0, std=1)

    def pred_score(self, user, item):
        user_emb = self.embedding_user(user)
        item_emb = self.embedding_item(item)
        mlp_input = concat([user_emb, item_emb], 1)

        mlp_out = self.MLP_Layer(mlp_input)

        return mlp_out

    def forward(self, user, item):
        return self.sigmoid(self.Linear(
            self.pred_score(user, item)
        ))


class NeuMF(nn.Module):
    """ Neural Matrix Factorization: Fusion of GMF and MLP """
    def __init__(self, n_user, n_item, k_dim 
                     , num_mlp_layers, p_dropout=0.05) -> None:
        super(NeuMF, self).__init__()
        self.GMF = GMF(n_user, n_item, k_dim)
        self.MLP = MLP(n_user, n_item, k_dim, num_mlp_layers, p_dropout)
        self.Linear = nn.Linear(2*k_dim, 1)
        self.sigmoid = sigmoid

    def forward(self, user, item):
        GMF_output = self.GMF.pred_score(user, item)
        MLP_output = self.MLP.pred_score(user, item)

        return self.sigmoid(self.Linear(
            concat([GMF_output, MLP_output], 1)
        ))


In [17]:
def naive_pointwise_mf_loader(rat_dict, items, batch_size, neg_size=None
                              , random_sampling=True, user_size=None, pos_size=None
                              , user_neg_dict=None):
    from random import choices
    from torch.utils.data import DataLoader

    if not isinstance(items, set):
        all_items = set(items)
    all_items = items
    
    train_data = []

    if not random_sampling: # goover all dataset
        for user in rat_dict:
            pos_items = list(rat_dict[user].keys())
            neg_candidates = list(all_items - set(pos_items)) if user_neg_dict is None \
                             else user_neg_dict[user]
            neg_items = choices(neg_candidates, k=len(pos_items)*neg_size)
            # add positive and negative candidates to training data
            train_data.extend(zip([1]*len(pos_items), [user]*len(pos_items), pos_items))
            train_data.extend(zip([0]*len(neg_items), [user]*len(neg_items), neg_items))     
                
    else:
        users = choices(list(rat_dict.keys()), k=user_size)
        for user in users:
            neg_candidates = list(all_items - set(rat_dict[user])) if user_neg_dict is None \
                             else user_neg_dict[user]
            pos_items = choices(list(rat_dict[user].keys()), k=pos_size)
            neg_items = choices(neg_candidates, k=pos_size*neg_size)
            # add positive and negative candidates to training data
            train_data.extend(zip([1]*len(pos_items), [user]*len(pos_items), pos_items))
            train_data.extend(zip([0]*len(neg_items), [user]*len(neg_items), neg_items))     
            
    return DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)


# things for sampling
items = list(range(n_item))
# first define the dictionary can accelerate sampling efficiency
user_neg_dict = {
    u:list(set(items)-set(ratings_train_dict[u].keys()))
     for u in ratings_train_dict
}

# DL1: roll over all data
dl_roll = naive_pointwise_mf_loader(
    rat_dict=ratings_train_dict, items=items, user_neg_dict=user_neg_dict
    , random_sampling=False, neg_size=4
    , batch_size=128
)

# DL2: sampling by parameters
dl_sample = naive_pointwise_mf_loader(
    rat_dict=ratings_train_dict, items=items, user_neg_dict=user_neg_dict
    , random_sampling=True, neg_size=4
    , user_size=256, pos_size=64
    , batch_size=128
)

In [37]:
gmf = GMF(n_user=n_user, n_item=n_item, k_dim=K_DIM)



target, user, item = list(dl_roll)[0]

target = target.float().to(compute_device)
user = autograd.Variable(LongTensor(user)).to(compute_device)
item = autograd.Variable(LongTensor(item)).to(compute_device)

gmf(user, item)

NameError: name 'compute_device' is not defined

In [47]:
import time
from torch import autograd, LongTensor, FloatTensor, device
from torch import optim
import numpy as np


def log_loss(input, target):
    loss = nn.BCELoss()
    return loss(input, target)


def train_model(model, opt, rat_train, n_items
                , use_random_sampling=True, neg_size=4
                , user_size=256, pos_size=32
                , use_cuda=False, n_epochs=64, batch_size=256
                , test_dict=None, metrics=None, k=None
                , report_interval=1):

    if use_cuda:
        compute_device = device('cuda')
        model.cuda()
    else:
        compute_device = device('cpu')

    # things for sampling
    items = list(range(n_item))
    # first define the dictionary can accelerate sampling efficiency
    user_neg_dict = {
        u:list(set(items)-set(rat_train[u].keys()))
        for u in rat_train}

    train_loss_by_ep = []
    test_loss_by_ep = []

    # place holder for metric
    # if metrics is not None:
    #     metrics_at_k = {metric[0]:[] for metric in metrics.items()} 
    #     test_cands = generate_testing_candidates(ratings_train_dict, n_item, n=100)

    t0 = time.time()
    for epoch in range(n_epochs):
        train_data = dl_sample = naive_pointwise_mf_loader(
                        rat_dict=rat_train, items=items
                        , user_neg_dict=user_neg_dict
                        , random_sampling=use_random_sampling
                        , neg_size=neg_size
                        , user_size=user_size, pos_size=pos_size
                        , batch_size=batch_size
                    )

        ep_loss = []
        for i, batch in enumerate(train_data):
            target, user, item = batch

            model.zero_grad()

            target = target.float().to(compute_device)
            user = autograd.Variable(LongTensor(user)).to(compute_device)
            item = autograd.Variable(LongTensor(item)).to(compute_device)

            preds = model(user, item).squeeze(-1)
            loss = log_loss(input=preds, target=target) # todo
            # print(loss)
            loss.sum().backward()
            opt.step()
            ep_loss.append(loss.data.to('cpu'))
            
        train_loss_by_ep.append(np.mean(ep_loss))

        # compute testing result
        # preds = bpr.pred_all().to('cpu')
        # for u in results:
        #     pred_items = Tensor([preds[u][i] for i in test_cands[u]])
        #     results[u] = [test_cands[u][ix] for ix in argsort(-pred_items)[:100]]

        # for metric in metrics_at_k:
        #     metrics_at_k[metric].append(metrics[metric](k, ratings_test_dict, results))

        if report_interval > 0 \
                and ((epoch+1) % report_interval == 0):
            
            t1=time.time()
            print(f'Epoch: {epoch+1}, Time: {round(t1-t0,2)},  /Average train loss {round(sum(train_loss_by_ep[-report_interval:])/report_interval, 5)}')
            # average_metrics = {metric:round(sum(metrics_at_k[metric][-report_interval:])/report_interval, 5) for metric in metrics_at_k}
            # test_metrics = ' '.join(f'{m_items[0]}:{m_items[1]}' for m_items in average_metrics.items())
            # print(f'\t\t\t/Average test metric at {k}: {test_metrics}')
            t0=time.time()

    # finish traniing, send to cpu anyway
    model = model.to('cpu') 

    # if test_dict is not None:
    #     return model, train_loss_by_ep, metrics_at_k

    return model, train_loss_by_ep, test_loss_by_ep


# from recom.model.pairwise import BPR
from torch import optim
# from recom.eval.metrics import map, hit_rate, ndcg
import warnings
warnings.filterwarnings("ignore")

K_DIM=12
NEG_SIZE=16 # 4
USER_SIZE=128
POS_SIZE=64
USE_CUDA=True
N_EPOCHES=12
BATCH_SIZE=512
INTERVAL=1

# model = GMF(n_user=n_user, n_item=n_item, k_dim=K_DIM)
# model = MLP(n_user=n_user, n_item=n_item, out_dim=K_DIM, num_layers=2)
model = NeuMF(n_user=n_user, n_item=n_item, k_dim=K_DIM, num_mlp_layers=2)

# leave one out
# opt = optim.Adam(bpr.parameters(), lr=0.002, weight_decay=0.1) # :mAP:0.0008 hit_rate:0.07566 ndcg:0.00919
# leave last 10% chronologically
# optim.Adam(bpr.parameters(), lr=0.001, weight_decay=0.05) # mAP:0.00445 hit_rate:0.12336 ndcg:0.0537
opt = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.05)
model, train_loss_by_ep, test_rmse_by_ep = train_model(
    model=model, opt=opt, rat_train=ratings_train_dict
    , n_items=n_item, use_random_sampling=True
    , neg_size=NEG_SIZE
    , user_size=USER_SIZE, pos_size=POS_SIZE
    , use_cuda=USE_CUDA, n_epochs=N_EPOCHES, batch_size=BATCH_SIZE
    , report_interval=INTERVAL
)

Epoch: 1, Time: 1.95,  /Average train loss 0.34854
Epoch: 2, Time: 1.84,  /Average train loss 0.23749
Epoch: 3, Time: 1.72,  /Average train loss 0.23475
Epoch: 4, Time: 1.88,  /Average train loss 0.23421
Epoch: 5, Time: 1.77,  /Average train loss 0.23469
Epoch: 6, Time: 1.8,  /Average train loss 0.23494
Epoch: 7, Time: 1.88,  /Average train loss 0.23531
Epoch: 8, Time: 2.13,  /Average train loss 0.23558
Epoch: 9, Time: 2.29,  /Average train loss 0.236
Epoch: 10, Time: 1.98,  /Average train loss 0.23643
Epoch: 11, Time: 1.8,  /Average train loss 0.23637
Epoch: 12, Time: 1.9,  /Average train loss 0.23697


In [53]:
ews = gmf.embedding_user(users)*gmf.embedding_item(items)
ews

tensor([[-0.0038, -0.0053,  0.0074,  0.0145,  0.0039, -0.0008, -0.0024, -0.0097,
          0.0257, -0.0035],
        [ 0.0037, -0.0005, -0.0035, -0.0139,  0.0044, -0.0017, -0.0030,  0.0145,
          0.0175,  0.0036],
        [ 0.0120,  0.0057, -0.0035, -0.0130,  0.0129,  0.0036, -0.0010,  0.0110,
         -0.0025, -0.0002],
        [-0.0003,  0.0074, -0.0044, -0.0415,  0.0022, -0.0012,  0.0007, -0.0068,
         -0.0344, -0.0021],
        [-0.0031, -0.0081,  0.0037,  0.0005, -0.0003,  0.0045, -0.0119, -0.0006,
          0.0025, -0.0038]], grad_fn=<MulBackward0>)

In [60]:
(gmf.linear.weight.data[0] * ews).sum(1)

tensor([ 0.0020,  0.0078,  0.0021,  0.0036, -0.0032], grad_fn=<SumBackward1>)

In [54]:
ews.sum(1)

tensor([ 0.0261,  0.0211,  0.0251, -0.0805, -0.0165], grad_fn=<SumBackward1>)

In [47]:
gmf.linear.bias

Parameter containing:
tensor([0.2394], requires_grad=True)

In [48]:
gmf.linear.weight

Parameter containing:
tensor([[-0.2281,  0.0944, -0.2378, -0.2669,  0.1129,  0.0884,  0.2206, -0.0763,
          0.2844,  0.1728]], requires_grad=True)