### Initialization

In [1]:
# initialize dataset and popularity threshold P
dataset_name="movielens20m" # movielens100k, movielens20m, amazon
popularity_threshold=30000

In [2]:
import sys

import numpy as np

import pytrec_eval

from decimal import Decimal
import scipy.stats as st
from itertools import combinations
from spotlight.datasets import movielens, amazon, goodbooks
from spotlight.interactions import Interactions
from spotlight.cross_validation import random_train_test_split
from spotlight.evaluation import mrr_score, precision_recall_score
from spotlight.factorization.explicit import ExplicitFactorizationModel
from spotlight.factorization.implicit import ImplicitFactorizationModel

from popularity import PopularityModel
from randomm import RandomModel


# evaluation function based on pytrec_eval
def evaluate(interactions, model, topk):

    # create qrel
    qrel = {}
    for (u, i) in zip(interactions.user_ids,
                      interactions.item_ids):
        u = str(u)
        i = str(i)
        if u not in qrel:
            qrel[u] = {}
        qrel[u][i] = 1

    # relevance evaluator
    evaluator = pytrec_eval.RelevanceEvaluator(
        qrel, pytrec_eval.supported_measures)

    # create run
    run = {}
    for uid in np.unique(interactions.user_ids):
        predictions = -model.predict(user_ids=uid)
        predictions_argsort = predictions.argsort()[:topk]
        if str(uid) not in run:
            run[str(uid)] = {}
        for iid in predictions_argsort:
            run[str(uid)][str(iid)] = float(-predictions[iid])

    return evaluator.evaluate(run)


### Build popular and unpopular sample sets

In [5]:
if dataset_name == "movielens100k":
    ds = movielens.get_movielens_dataset(variant='100K')
elif dataset_name == "movielens20m":
    ds = movielens.get_movielens_dataset(variant='20M')
elif dataset_name == "amazon":
    ds = amazon.get_amazon_dataset()
elif dataset_name == "goodbook":
    ds = goodbooks.get_goodbooks_dataset()
    ds.ratings = ds.ratings.astype(np.float32)
else:
    print("unknown dataset")

# find the rating frequency of each item
item_freq = dict()
for uid, iid in zip(ds.user_ids, ds.item_ids):
    f = item_freq.setdefault(iid, 0)
    f += 1
    item_freq[iid] = f

# get the corresponding array for popularity
pops = []
for iid in ds.item_ids:
    pops.append(item_freq[iid])

ds.weights = np.array(pops, dtype=np.float64)
# ds.weights /= sum(ds.weights)

print("""
Total samples contains %s interactions with average popularity %.2E
""" % (format(len(ds.user_ids), ','), Decimal(np.mean(ds.weights))))

sample_size = int(0.2 * len(ds.user_ids))
indices = np.array([x for x in range(len(ds.user_ids))], np.int32)

print("""
Total population (popularity < %d): %s
""" % (popularity_threshold, format(sum(ds.weights < popularity_threshold), ',')))

unpopular_indices = np.random.choice(a=indices[ds.weights < popularity_threshold],
                                     size=sample_size,
                                     replace=False)

popular_indices = np.ones(len(ds.user_ids), dtype=bool)

popular_indices[unpopular_indices] = False

# build a popular dataset
ds_popular = Interactions(user_ids=ds.user_ids[popular_indices],
                          item_ids=ds.item_ids[popular_indices],
                          ratings=ds.ratings[popular_indices],
                          timestamps=ds.timestamps[popular_indices],
                          weights=ds.weights[popular_indices],
                          num_users=ds.num_users,
                          num_items=ds.num_items)

print("""
Popular samples contains %s interactions with average popularity %.2E
""" % (format(len(ds_popular.user_ids), ','), Decimal(np.mean(ds_popular.weights))))

ds_unpopular = Interactions(user_ids=ds.user_ids[unpopular_indices],
                            item_ids=ds.item_ids[unpopular_indices],
                            ratings=ds.ratings[unpopular_indices],
                            timestamps=ds.timestamps[unpopular_indices],
                            weights=ds.weights[unpopular_indices],
                            num_users=ds.num_users,
                            num_items=ds.num_items)

print("""
Unpopular samples contains %s interactions with average popularity %.2E
""" % (format(len(ds_unpopular.user_ids), ','), Decimal(np.mean(ds_unpopular.weights))))

# split the popular interactions
test_ratio = float(sample_size / len(ds_popular.user_ids))

ds_popular_train, ds_popular_test = random_train_test_split(
    ds_popular, test_percentage=test_ratio)



Total samples contains 20,000,263 interactions with average popularity 1.35E+04


Total population (popularity < 30000): 17,417,627


Popular samples contains 16,000,211 interactions with average popularity 1.46E+04


Unpopular samples contains 4,000,052 interactions with average popularity 9.08E+03



### Train/Evaluate different models

In [6]:
# fix model's parameters
LATENT_DIM = 32
NUM_EPOCHS = 1
BATCH_SIZE = 256
L2 = 1e-6
LEARNING_RATE = 1e-3
topk = 20

mrr_biased = []
ndcg_biased = []
mrr_unbiased = []
ndcg_unbiased = []

model_names = ["random", "popularity", "explicit", "bpr", "warp"]

for model_name in model_names:

    # fit the model
    if model_name == "random":
        model = RandomModel()

    elif model_name == "popularity":
        model = PopularityModel(k=topk)

    elif model_name == "explicit":
        model = ExplicitFactorizationModel(loss='regression',
                                           embedding_dim=LATENT_DIM,
                                           n_iter=NUM_EPOCHS,
                                           learning_rate=LEARNING_RATE,
                                           batch_size=BATCH_SIZE,
                                           l2=L2)

    elif model_name == "bpr":
        model = ImplicitFactorizationModel(loss='bpr',
                                           embedding_dim=LATENT_DIM,
                                           n_iter=NUM_EPOCHS,
                                           learning_rate=LEARNING_RATE,
                                           batch_size=BATCH_SIZE,
                                           l2=L2)

    elif model_name == "warp":
        model = ImplicitFactorizationModel(loss='adaptive_hinge',
                                           embedding_dim=LATENT_DIM,
                                           n_iter=NUM_EPOCHS,
                                           learning_rate=LEARNING_RATE,
                                           batch_size=BATCH_SIZE,
                                           l2=L2)

    else:
        print('Unknown model name')

    # fit the model to training set
    model.fit(ds_popular_train)

    # evaluate on the biased test set
    results = evaluate(ds_popular_test, model, topk)

    mrr_biased.append([query_measures['recip_rank']
                       for uid, query_measures in sorted(results.items())])
    ndcg_biased.append([query_measures['ndcg']
                        for uid, query_measures in sorted(results.items())])

    # evaluate the model on the biased dataset
    print()
    print('-'*5, model_name, '-'*5)
    print('\tMRR@%d  (Biased): %f' %
          (topk, pytrec_eval.compute_aggregated_measure(measure='recip_rank', values=mrr_biased[-1])))
    print('\tNDCG@%d (Biased): %f' %
          (topk, pytrec_eval.compute_aggregated_measure(measure='ndcg', values=ndcg_biased[-1])))

    # evaluate on the unbiased test set
    results = evaluate(ds_unpopular, model, topk)

    mrr_unbiased.append([query_measures['recip_rank']
                         for uid, query_measures in sorted(results.items())])
    ndcg_unbiased.append([query_measures['ndcg']
                          for uid, query_measures in sorted(results.items())])

    # evaluate the model on the biased dataset
    print('\tMRR@%d  (UnBiased): %f' %
          (topk, pytrec_eval.compute_aggregated_measure(measure='recip_rank', values=mrr_unbiased[-1])))
    print('\tNDCG@%d (UnBiased): %f' %
          (topk, pytrec_eval.compute_aggregated_measure(measure='ndcg', values=ndcg_unbiased[-1])))



----- random -----
	MRR@20  (Biased): 0.003912
	NDCG@20 (Biased): 0.000752
	MRR@20  (UnBiased): 0.003965
	NDCG@20 (UnBiased): 0.000718

----- popularity -----
	MRR@20  (Biased): 0.256568
	NDCG@20 (Biased): 0.098099
	MRR@20  (UnBiased): 0.000000
	NDCG@20 (UnBiased): 0.000000

----- explicit -----
	MRR@20  (Biased): 0.179767
	NDCG@20 (Biased): 0.071821
	MRR@20  (UnBiased): 0.022733
	NDCG@20 (UnBiased): 0.008036

----- bpr -----
	MRR@20  (Biased): 0.255316
	NDCG@20 (Biased): 0.099809
	MRR@20  (UnBiased): 0.000088
	NDCG@20 (UnBiased): 0.000006

----- warp -----
	MRR@20  (Biased): 0.262649
	NDCG@20 (Biased): 0.106603
	MRR@20  (UnBiased): 0.030897
	NDCG@20 (UnBiased): 0.005154
