In [1]:
import numpy as np
import pandas as pd

from polara import get_movielens_data
from polara.lib.earlystopping import early_stopping_callback

from dataprep import split_data, assign_positions
from model import seqtf_model_build, tf_scoring
from evaluation import downvote_seen_items, model_evaluate, topn_recommendations

# Preparing Data

In [2]:
mldata = get_movielens_data('ml-10m.zip', include_time=True)
mldata.head()

Unnamed: 0,userid,movieid,rating,timestamp
0,1,122,5.0,838985046
1,1,185,5.0,838983525
2,1,231,5.0,838983392
3,1,292,5.0,838983421
4,1,316,5.0,838983392


In [3]:
train_pack, valid_pack, test_pack = split_data(mldata, time_q=0.95)

Filtered 23202 invalid observations.


In [4]:
n_pos = 200

training_data = assign_positions(train_pack[0], n_pos)
testset_valid = assign_positions(valid_pack[0], n_pos)
testset = assign_positions(test_pack[0], n_pos)

In [5]:
training_data.head()

Unnamed: 0,userid,movieid,rating,timestamp,pos
18,0,582,5.0,838983339,178
5,0,325,5.0,838983392,179
2,0,228,5.0,838983392,180
4,0,313,5.0,838983392,181
3,0,289,5.0,838983421,182


In [6]:
testset_valid.head()

Unnamed: 0,userid,movieid,rating,timestamp,pos
5094,48,3662,4.0,1215134807,181
5089,48,515,3.5,1215134907,182
5093,48,2723,4.0,1215134942,183
5088,48,264,5.0,1215134949,184
5091,48,2363,5.0,1215134977,185


In [7]:
testset.head()

Unnamed: 0,userid,movieid,rating,timestamp,pos
5094,48,3662,4.0,1215134807,180
5089,48,515,3.5,1215134907,181
5093,48,2723,4.0,1215134942,182
5088,48,264,5.0,1215134949,183
5091,48,2363,5.0,1215134977,184


# Model

In [8]:
data_index = train_pack[1]
data_description = dict(
    userid = data_index['userid'].name,
    itemid = data_index['itemid'].name,
    positionid = 'pos',
    n_users = len(data_index['userid']),
    n_items = len(data_index['itemid']),
    n_pos = n_pos
)
data_description

{'userid': 'userid',
 'itemid': 'movieid',
 'positionid': 'pos',
 'n_users': 64680,
 'n_items': 9857,
 'n_pos': 200}

In [9]:
config = {
    "mlrank": (30, 25, 5),
    "n_pos": n_pos,
    "max_iters": 4,
    "update_order": (2, 1, 0),
    "growth_tol": 1e-6,
    "seed": 42
}

In [10]:
tf_params_old = seqtf_model_build(config, training_data, data_description)

Step 0 growth of the core: 1.0
Step 1 growth of the core: 0.2592736187067988
Step 2 growth of the core: 0.033582400237606905
Step 3 growth of the core: 0.008429917653930762


# Tuning

In [11]:
def tf_evaluator(testset, holdout, data_description, core_projected, topn=10):
    def iter_evaluate(core_factors, factors):
        model_params = tuple(factors) + (core_factors,)
        scores = tf_scoring(model_params, testset, data_description, core_projected=core_projected) 
        downvote_seen_items(scores, testset, data_description)
        top_recs = topn_recommendations(scores, topn=topn)
        hr, *_ = model_evaluate(top_recs, holdout, data_description)
        return hr
    return iter_evaluate

In [13]:
evaluator = tf_evaluator(testset_valid, valid_pack[1], data_description, core_projected=True)

params = seqtf_model_build(
    {**config, **{'max_iters': 15}},
    training_data,
    data_description,
    iter_callback=early_stopping_callback(evaluator, max_fails=3)
)

Step 0 metric score: 0.03962225415725724
Step 1 metric score: 0.042291110654896324
Step 2 metric score: 0.041469924040238144
Step 3 metric score: 0.0412646273865736
Step 4 metric score: 0.04290700061588996
Step 5 metric score: 0.04516526380619996
Step 6 metric score: 0.04557585711352905
Step 7 metric score: 0.04495996715253541
Step 8 metric score: 0.04516526380619996
Step 9 metric score: 0.045370560459864505
Metric no longer improves. Best score 0.04557585711352905, attained in 7 iterations.


In [14]:
evaluator = tf_evaluator(testset_valid, valid_pack[1], data_description, core_projected=False)

params = seqtf_model_build(
    {**config, **{'max_iters': 15}},
    training_data,
    data_description,
    iter_callback=early_stopping_callback(evaluator, max_fails=3)
)

Step 0 metric score: 0.033668651200985424
Step 1 metric score: 0.047834120303839045
Step 2 metric score: 0.04598645042085814
Step 3 metric score: 0.04557585711352905
Step 4 metric score: 0.04660234038185178
Metric no longer improves. Best score 0.047834120303839045, attained in 2 iterations.


In [15]:
evaluator = tf_evaluator(testset_valid, valid_pack[1], data_description, core_projected=False)

params = seqtf_model_build(
    {**config, **{'max_iters': 15, "update_order": (0, 1, 2)}},
    training_data,
    data_description,
    iter_callback=early_stopping_callback(evaluator, max_fails=3)
)

Step 0 metric score: 0.02340381851775816
Step 1 metric score: 0.04434407719154178
Step 2 metric score: 0.05132416341613632
Step 3 metric score: 0.04824471361116814
Step 4 metric score: 0.04803941695750359
Step 5 metric score: 0.04639704372818723
Metric no longer improves. Best score 0.05132416341613632, attained in 3 iterations.
