In [1]:
# !pip install PyTorchCML

In [1]:
import sys
sys.path.append("../../")

from itertools import product

from PyTorchCML import losses, models, samplers, regularizers, evaluators, trainers
import torch
from torch import nn, optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

In [2]:
# download movielens dataset
movielens = pd.read_csv(
  'http://files.grouplens.org/datasets/movielens/ml-100k/u.data', 
    sep='\t', header=None, index_col=None,
    names = ["user_id", "item_id", "rating", "timestamp"]
)
# Set user/item id and number of users/items.
movielens.user_id -= 1
movielens.item_id -= 1
n_user = movielens.user_id.nunique()
n_item = movielens.item_id.nunique()

# make implicit feedback
movielens.rating = (movielens.rating >= 4).astype(int)


# train test split
train, test = train_test_split(movielens)


# all user item pairs
df_all = pd.DataFrame(
    [[u, i] for u,i in product(range(n_user), range(n_item))],
    columns=["user_id", "item_id"]
)

# frag train pairs
df_all = pd.merge(
    df_all, 
    train[["user_id", "item_id", "rating"]], 
    on=["user_id", "item_id"], 
    how="left"
)

# remove train pairs
test = pd.merge(
    df_all[df_all.rating.isna()][["user_id", "item_id"]], 
    test[["user_id", "item_id", "rating"]], 
    on=["user_id", "item_id"], 
    how="left"
).fillna(0)

# numpy array
train_set = train[train.rating == 1][["user_id", "item_id"]].values
test_set = test[["user_id", "item_id", "rating"]].values

# to torch.Tensor
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_set = torch.LongTensor(train_set).to(device)
test_set = torch.LongTensor(test_set).to(device)


## Defalt

In [3]:
lr = 1e-3
n_dim = 10
model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = losses.SumTripletLoss(margin=1).to(device)
sampler = samplers.BaseSampler(train_set, n_user, n_item, device=device, strict_negative=False)

score_function_dict = {
    "nDCG" : evaluators.ndcg,
    "MAP" : evaluators.average_precision,
    "Recall": evaluators.recall
}
evaluator = evaluators.UserwiseEvaluator(test_set, score_function_dict, ks=[3,5])
trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler)


In [4]:
trainer.fit(n_batch=256, n_epoch=20, valid_evaluator = evaluator, valid_per_epoch=10)

100%|██████████| 943/943 [00:20<00:00, 46.66it/s]
epoch1 avg_loss:0.931: 100%|██████████| 256/256 [00:06<00:00, 38.85it/s]
epoch2 avg_loss:0.753: 100%|██████████| 256/256 [00:06<00:00, 40.74it/s]
epoch3 avg_loss:0.658: 100%|██████████| 256/256 [00:06<00:00, 39.85it/s]
epoch4 avg_loss:0.597: 100%|██████████| 256/256 [00:05<00:00, 46.31it/s]
epoch5 avg_loss:0.558: 100%|██████████| 256/256 [00:07<00:00, 34.50it/s]
epoch6 avg_loss:0.525: 100%|██████████| 256/256 [00:05<00:00, 44.82it/s]
epoch7 avg_loss:0.500: 100%|██████████| 256/256 [00:06<00:00, 42.24it/s]
epoch8 avg_loss:0.476: 100%|██████████| 256/256 [00:07<00:00, 35.35it/s]
epoch9 avg_loss:0.455: 100%|██████████| 256/256 [00:07<00:00, 34.50it/s]
epoch10 avg_loss:0.433: 100%|██████████| 256/256 [00:06<00:00, 41.74it/s]
100%|██████████| 943/943 [00:21<00:00, 44.59it/s]
epoch11 avg_loss:0.412: 100%|██████████| 256/256 [00:06<00:00, 41.59it/s]
epoch12 avg_loss:0.387: 100%|██████████| 256/256 [00:06<00:00, 39.21it/s]
epoch13 avg_loss:0.36

In [5]:
trainer.valid_scores

Unnamed: 0,nDCG@3,MAP@3,Recall@3,nDCG@5,MAP@5,Recall@5,epoch,loss
0,0.007423,0.012902,0.001568,0.008634,0.017922,0.002917,0,
0,0.042387,0.070078,0.006008,0.046897,0.084353,0.011537,10,0.432954
0,0.202032,0.291888,0.044877,0.202131,0.311188,0.073077,20,0.274121


## Strict Negative

In [6]:
lr = 1e-3
n_dim = 10
model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = losses.SumTripletLoss(margin=1).to(device)
sampler = samplers.BaseSampler(train_set, n_user, n_item, device=device, strict_negative=True)

score_function_dict = {
    "nDCG" : evaluators.ndcg,
    "MAP" : evaluators.average_precision,
    "Recall": evaluators.recall
}
evaluator = evaluators.UserwiseEvaluator(test_set, score_function_dict, ks=[3,5])
trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler)


In [7]:
trainer.fit(n_batch=256, n_epoch=20, valid_evaluator = evaluator, valid_per_epoch=10)

100%|██████████| 943/943 [00:18<00:00, 51.01it/s]
epoch1 avg_loss:0.949: 100%|██████████| 256/256 [00:09<00:00, 26.99it/s]
epoch2 avg_loss:0.792: 100%|██████████| 256/256 [00:09<00:00, 26.42it/s]
epoch3 avg_loss:0.697: 100%|██████████| 256/256 [00:09<00:00, 25.81it/s]
epoch4 avg_loss:0.636: 100%|██████████| 256/256 [00:09<00:00, 27.21it/s]
epoch5 avg_loss:0.597: 100%|██████████| 256/256 [00:07<00:00, 34.30it/s]
epoch6 avg_loss:0.564: 100%|██████████| 256/256 [00:07<00:00, 34.35it/s]
epoch7 avg_loss:0.538: 100%|██████████| 256/256 [00:07<00:00, 34.95it/s]
epoch8 avg_loss:0.517: 100%|██████████| 256/256 [00:08<00:00, 29.53it/s]
epoch9 avg_loss:0.494: 100%|██████████| 256/256 [00:08<00:00, 28.63it/s]
epoch10 avg_loss:0.471: 100%|██████████| 256/256 [00:07<00:00, 32.86it/s]
100%|██████████| 943/943 [00:23<00:00, 40.65it/s]
epoch11 avg_loss:0.450: 100%|██████████| 256/256 [00:07<00:00, 33.37it/s]
epoch12 avg_loss:0.425: 100%|██████████| 256/256 [00:07<00:00, 33.03it/s]
epoch13 avg_loss:0.40

In [8]:
trainer.valid_scores

Unnamed: 0,nDCG@3,MAP@3,Recall@3,nDCG@5,MAP@5,Recall@5,epoch,loss
0,0.020385,0.036939,0.001419,0.018477,0.04164,0.002321,0,
0,0.059936,0.091552,0.004546,0.068632,0.110379,0.009836,10,0.47134
0,0.273629,0.369123,0.034714,0.272501,0.385817,0.05915,20,0.287908


## Global Orthogonal Regularization

In [9]:
lr = 1e-3
n_dim = 10
model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
regs = [regularizers.GlobalOrthogonalRegularizer(weight=1e-2)]
criterion = losses.SumTripletLoss(margin=1, regularizers=regs).to(device)
sampler = samplers.BaseSampler(train_set, n_user, n_item, device=device, strict_negative=True)

score_function_dict = {
    "nDCG" : evaluators.ndcg,
    "MAP" : evaluators.average_precision,
    "Recall": evaluators.recall
}
evaluator = evaluators.UserwiseEvaluator(test_set, score_function_dict, ks=[3,5])
trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler)

In [10]:
trainer.fit(n_batch=256, n_epoch=20, valid_evaluator = evaluator, valid_per_epoch=10)

100%|██████████| 943/943 [00:21<00:00, 44.39it/s]
epoch1 avg_loss:0.948: 100%|██████████| 256/256 [00:11<00:00, 21.50it/s]
epoch2 avg_loss:0.794: 100%|██████████| 256/256 [00:07<00:00, 32.35it/s]
epoch3 avg_loss:0.700: 100%|██████████| 256/256 [00:07<00:00, 33.16it/s]
epoch4 avg_loss:0.638: 100%|██████████| 256/256 [00:10<00:00, 23.36it/s]
epoch5 avg_loss:0.598: 100%|██████████| 256/256 [00:07<00:00, 34.20it/s]
epoch6 avg_loss:0.565: 100%|██████████| 256/256 [00:08<00:00, 31.81it/s]
epoch7 avg_loss:0.540: 100%|██████████| 256/256 [00:07<00:00, 32.40it/s]
epoch8 avg_loss:0.516: 100%|██████████| 256/256 [00:07<00:00, 33.46it/s]
epoch9 avg_loss:0.493: 100%|██████████| 256/256 [00:13<00:00, 19.51it/s]
epoch10 avg_loss:0.470: 100%|██████████| 256/256 [00:08<00:00, 31.15it/s]
100%|██████████| 943/943 [00:24<00:00, 38.92it/s]
epoch11 avg_loss:0.449: 100%|██████████| 256/256 [00:08<00:00, 28.90it/s]
epoch12 avg_loss:0.422: 100%|██████████| 256/256 [00:08<00:00, 31.93it/s]
epoch13 avg_loss:0.40

In [11]:
trainer.valid_scores

Unnamed: 0,nDCG@3,MAP@3,Recall@3,nDCG@5,MAP@5,Recall@5,epoch,loss
0,0.017032,0.029692,0.002178,0.017122,0.036462,0.003362,0,
0,0.074296,0.1155,0.005293,0.075128,0.125672,0.008741,10,0.469893
0,0.276036,0.380877,0.035444,0.276884,0.395498,0.060833,20,0.281246


## Two Stage

In [3]:
item_count = train.groupby("item_id")["user_id"].count()
count_index = np.array(item_count.index)
neg_weight = np.zeros(n_item)
neg_weight[count_index] = item_count ** 0.1

In [4]:
lr = 1e-3
n_dim = 10
model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)

regs = [regularizers.GlobalOrthogonalRegularizer(weight=1e-3)]
criterion = losses.MinTripletLoss(margin=1, regularizers=regs).to(device)
sampler = samplers.TwoStageSampler(
    train_set, n_user, n_item, 
    neg_weight=neg_weight, n_neg_samples=5,
    device=device, strict_negative=False
)

score_function_dict = {
    "nDCG" : evaluators.ndcg,
    "MAP" : evaluators.average_precision,
    "Recall": evaluators.recall
}
evaluator = evaluators.UserwiseEvaluator(test_set, score_function_dict, ks=[3,5])
trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler)

In [5]:
trainer.fit(n_batch=256, n_epoch=20, valid_evaluator = evaluator, valid_per_epoch=10)

100%|██████████| 943/943 [00:27<00:00, 34.76it/s]
epoch1 avg_loss:1.495: 100%|██████████| 256/256 [00:08<00:00, 31.49it/s]
epoch2 avg_loss:1.321: 100%|██████████| 256/256 [00:07<00:00, 36.29it/s]
epoch3 avg_loss:1.207: 100%|██████████| 256/256 [00:06<00:00, 37.32it/s]
epoch4 avg_loss:1.144: 100%|██████████| 256/256 [00:16<00:00, 15.77it/s]
epoch5 avg_loss:1.108: 100%|██████████| 256/256 [00:11<00:00, 22.04it/s]
epoch6 avg_loss:1.084: 100%|██████████| 256/256 [00:12<00:00, 20.87it/s]
epoch7 avg_loss:1.074: 100%|██████████| 256/256 [00:08<00:00, 28.55it/s]
epoch8 avg_loss:1.060: 100%|██████████| 256/256 [00:06<00:00, 38.29it/s]
epoch9 avg_loss:1.050: 100%|██████████| 256/256 [00:06<00:00, 37.60it/s]
epoch10 avg_loss:1.044: 100%|██████████| 256/256 [00:05<00:00, 43.88it/s]
100%|██████████| 943/943 [00:22<00:00, 42.64it/s]
epoch11 avg_loss:1.036: 100%|██████████| 256/256 [00:06<00:00, 40.12it/s]
epoch12 avg_loss:1.030: 100%|██████████| 256/256 [00:06<00:00, 39.84it/s]
epoch13 avg_loss:1.02

In [6]:
trainer.valid_scores

Unnamed: 0,nDCG@3,MAP@3,Recall@3,nDCG@5,MAP@5,Recall@5,epoch,loss
0,0.012986,0.022004,0.001167,0.013278,0.02819,0.002582,0,
0,0.207166,0.293655,0.019897,0.200064,0.305768,0.033076,10,1.043901
0,0.356546,0.484093,0.052573,0.326033,0.484409,0.074481,20,1.001474


## model weighted negative sampler

In [4]:
def svd_init(X, dim):
    """
    Args :
        X : csr_matrix which element is 0 or 1.
        dim : number of dimention
    """
    svd = TruncatedSVD(n_components=10)
    U_ = svd.fit_transform(X)
    V_ = svd.components_

    s = (U_.sum(axis=1).mean() + V_.sum(axis=0).mean()) / 2
    U = 2 ** 0.5 * U_ - (1 / n_dim) ** 0.5 * s * np.ones_like(U_)
    V = 2 ** 0.5 * V_ + (1 / n_dim) ** 0.5 / s * np.ones_like(V_)
    ub = -(2 / n_dim) ** 0.5 * U_.sum(axis=1) / s
    vb = (2 / n_dim) ** 0.5 * V_.sum(axis=0) * s

    return U, V, ub, vb

In [5]:
n_dim = 10
X = csr_matrix(
    (np.ones(train_set.shape[0]), (train_set[:,0], train_set[:,1])),
    shape=[n_user, n_item]
)
U, V, ub, vb = svd_init(X, n_dim)
neg_weight_model = models.LogitMatrixFactorization(
    n_user, n_item, n_dim, max_norm=None,
    user_embedding_init = torch.Tensor(U), 
    item_embedding_init = torch.Tensor(V.T),
    user_bias_init = torch.Tensor(ub), 
    item_bias_init = torch.Tensor(vb)
).to(device)
neg_weight_model.link_weight = lambda x : 1 - torch.sigmoid(x)

In [6]:
lr = 1e-3
model = models.CollaborativeMetricLearning(n_user, n_item, n_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = losses.SumTripletLoss(margin=1).to(device)
sampler = samplers.BaseSampler(
    train_set, n_user, n_item, 
    neg_weight=neg_weight_model,
    device=device, strict_negative=False
)

score_function_dict = {
    "nDCG" : evaluators.ndcg,
    "MAP" : evaluators.average_precision,
    "Recall": evaluators.recall
}
evaluator = evaluators.UserwiseEvaluator(test_set, score_function_dict, ks=[3,5])
trainer = trainers.BaseTrainer(model, optimizer, criterion, sampler)

In [7]:
trainer.fit(n_batch=256, n_epoch=20, valid_evaluator = evaluator, valid_per_epoch=10)

100%|██████████| 943/943 [00:16<00:00, 55.70it/s]
epoch1 avg_loss:0.968: 100%|██████████| 256/256 [00:05<00:00, 44.73it/s]
epoch2 avg_loss:0.846: 100%|██████████| 256/256 [00:05<00:00, 44.17it/s]
epoch3 avg_loss:0.766: 100%|██████████| 256/256 [00:06<00:00, 36.72it/s]
epoch4 avg_loss:0.718: 100%|██████████| 256/256 [00:06<00:00, 38.69it/s]
epoch5 avg_loss:0.677: 100%|██████████| 256/256 [00:07<00:00, 34.02it/s]
epoch6 avg_loss:0.650: 100%|██████████| 256/256 [00:06<00:00, 41.09it/s]
epoch7 avg_loss:0.629: 100%|██████████| 256/256 [00:05<00:00, 46.11it/s]
epoch8 avg_loss:0.610: 100%|██████████| 256/256 [00:05<00:00, 45.69it/s]
epoch9 avg_loss:0.589: 100%|██████████| 256/256 [00:07<00:00, 34.75it/s]
epoch10 avg_loss:0.572: 100%|██████████| 256/256 [00:07<00:00, 33.05it/s]
100%|██████████| 943/943 [00:19<00:00, 47.84it/s]
epoch11 avg_loss:0.555: 100%|██████████| 256/256 [00:07<00:00, 33.20it/s]
epoch12 avg_loss:0.539: 100%|██████████| 256/256 [00:06<00:00, 39.77it/s]
epoch13 avg_loss:0.52

In [8]:
trainer.valid_scores

Unnamed: 0,nDCG@3,MAP@3,Recall@3,nDCG@5,MAP@5,Recall@5,epoch,loss
0,0.016209,0.030399,0.001955,0.0169,0.038264,0.003477,0,
0,0.051292,0.078208,0.004285,0.05536,0.094836,0.00751,10,0.572125
0,0.233268,0.322552,0.030232,0.23401,0.336276,0.049536,20,0.430135
