In [9]:
import math
import sys

import numpy as np

sys.path.append('../')
sys.path.append('../../')
from run_utils import get_env_dataset, run_env_experiment
from run_utils import ModelTuner
from reclab.environments import Topics
from env_defaults import TOPICS_STATIC, get_len_trial
from reclab.recommenders import EASE


In [10]:
# ====Step 4====
# S3 storage parameters
bucket_name = 'recsys-eval'
data_dir = 'master'
overwrite = True

# Experiment setup.
n_trials = 10
trial_seeds = [i for i in range(n_trials)]
len_trial = get_len_trial(TOPICS_STATIC)

# Environment setup
environment_name = TOPICS_STATIC['name']
env = Topics(**TOPICS_STATIC['params'], **TOPICS_STATIC['optional_params'])

# Recommender setup
recommender_name = 'EASE'
recommender_class = EASE


In [11]:
# ====Step 5====
starting_data = get_env_dataset(env)


In [12]:
# ====Step 6====
# Recommender tuning setup
n_fold = 5
default_params = {}
tuner = ModelTuner(starting_data,
                   default_params,
                   recommender_class,
                   n_fold=n_fold,
                   verbose=True,
                   bucket_name=bucket_name,
                   data_dir=data_dir,
                   environment_name=environment_name,
                   recommender_name=recommender_name,
                   overwrite=overwrite)


In [13]:

# Tune the hyperparameter.
# Start with a coarse grid.
lams = np.linspace(10, 1000, 10).tolist()
tuner.evaluate_grid(lam=lams)


Evaluating: {'lam': 10.0}
Fold 1/5, mse=10.787929042767392, rmse=3.284498293920609
Fold 2/5, mse=10.760066643355508, rmse=3.280254051648364
Fold 3/5, mse=10.850799858224315, rmse=3.294055229989976
Fold 4/5, mse=10.711384523733626, rmse=3.2728251593590554
Fold 5/5, mse=10.715816593934576, rmse=3.273502190916416
Average MSE: 10.765199332403084
Evaluating: {'lam': 120.0}
Fold 1/5, mse=10.644237549729038, rmse=3.262550773509745
Fold 2/5, mse=10.615216649353622, rmse=3.2581001595030226
Fold 3/5, mse=10.697777993557676, rmse=3.2707457855292996
Fold 4/5, mse=10.567095160958162, rmse=3.250706870967938
Fold 5/5, mse=10.570305113643144, rmse=3.251200564967216
Average MSE: 10.61892649344833
Evaluating: {'lam': 230.0}
Fold 1/5, mse=10.563941762894924, rmse=3.2502218021075
Fold 2/5, mse=10.53586742550184, rmse=3.2459000948121988
Fold 3/5, mse=10.614979435590707, rmse=3.2580637556055754
Fold 4/5, mse=10.487944206309233, rmse=3.238509565573218
Fold 5/5, mse=10.489976621931495, rmse=3.238823339105036


Unnamed: 0,lam,mse,average_mse
0,10.0,"[10.787929042767392, 10.760066643355508, 10.85...",10.765199
1,120.0,"[10.644237549729038, 10.615216649353622, 10.69...",10.618926
2,230.0,"[10.563941762894924, 10.53586742550184, 10.614...",10.538542
3,340.0,"[10.5072080584909, 10.480109329952885, 10.5575...",10.482125
4,450.0,"[10.463765368281436, 10.43747660628426, 10.513...",10.439063
5,560.0,"[10.429023593098869, 10.403381948240334, 10.47...",10.404682
6,670.0,"[10.400454828383657, 10.375327237981837, 10.45...",10.376433
7,780.0,"[10.376495996131432, 10.351777908114878, 10.42...",10.352753
8,890.0,"[10.356105745164946, 10.331715472842765, 10.40...",10.332602
9,1000.0,"[10.338552954993935, 10.31442637902544, 10.390...",10.315257


In [17]:

# It seems that larger regularizers are better, let's increase the grid.
lams = np.linspace(1000, 10000, 3).tolist()
tuner.evaluate_grid(lam=lams)


Evaluating: {'lam': 1000.0}
Fold 1/5, mse=10.338552954993935, rmse=3.215362025494786
Fold 2/5, mse=10.31442637902544, rmse=3.211608067467984
Fold 3/5, mse=10.390508501307268, rmse=3.22343116900412
Fold 4/5, mse=10.266811005581715, rmse=3.20418648108716
Fold 5/5, mse=10.265983987038538, rmse=3.2040574256774077
Average MSE: 10.31525656558938
Evaluating: {'lam': 5500.0}
Fold 1/5, mse=10.201378895639092, rmse=3.193959751724979
Fold 2/5, mse=10.177856695570027, rmse=3.190275332251125
Fold 3/5, mse=10.260443872701574, rmse=3.2031927623390968
Fold 4/5, mse=10.128897220675041, rmse=3.182592845570266
Fold 5/5, mse=10.129701031473925, rmse=3.1827191254450846
Average MSE: 10.179655543211931
Evaluating: {'lam': 10000.0}
Fold 1/5, mse=10.23114019586625, rmse=3.198615356035522
Fold 2/5, mse=10.20692187275601, rmse=3.1948273619643377
Fold 3/5, mse=10.292038038960072, rmse=3.208120639714173
Fold 4/5, mse=10.157598242703115, rmse=3.1870987186943416
Fold 5/5, mse=10.159515292929592, rmse=3.1873994561287

Unnamed: 0,lam,mse,average_mse
0,1000.0,"[10.338552954993935, 10.31442637902544, 10.390...",10.315257
1,5500.0,"[10.201378895639092, 10.177856695570027, 10.26...",10.179656
2,10000.0,"[10.23114019586625, 10.20692187275601, 10.2920...",10.209443


In [18]:

# It seems that larger regularizers are better, let's increase the grid.
lams = np.linspace(20000, 100000, 3).tolist()
tuner.evaluate_grid(lam=lams)


Evaluating: {'lam': 20000.0}
Fold 1/5, mse=10.317823256018299, rmse=3.212136867572473
Fold 2/5, mse=10.292715750469743, rmse=3.208226262355843
Fold 3/5, mse=10.380292152644575, rmse=3.221846078360134
Fold 4/5, mse=10.243269126044659, rmse=3.2005107601826084
Fold 5/5, mse=10.246222965019971, rmse=3.2009721906039688
Average MSE: 10.29606465003945
Evaluating: {'lam': 60000.0}
Fold 1/5, mse=10.516105572883307, rmse=3.2428545408148213
Fold 2/5, mse=10.489844356636663, rmse=3.2388029203143347
Fold 3/5, mse=10.580097302708548, rmse=3.2527061506856945
Fold 4/5, mse=10.440586482661361, rmse=3.2311896389195978
Fold 5/5, mse=10.444528343304498, rmse=3.2317995518448384
Average MSE: 10.494232411638876
Evaluating: {'lam': 100000.0}
Fold 1/5, mse=10.601714276080898, rmse=3.256027376432959
Fold 2/5, mse=10.575109164942123, rmse=3.2519392929361586
Fold 3/5, mse=10.666078842444923, rmse=3.265896330633433
Fold 4/5, mse=10.525963449286413, rmse=3.244374122891257
Fold 5/5, mse=10.530146021732307, rmse=3.24

Unnamed: 0,lam,mse,average_mse
0,20000.0,"[10.317823256018299, 10.292715750469743, 10.38...",10.296065
1,60000.0,"[10.516105572883307, 10.489844356636663, 10.58...",10.494232
2,100000.0,"[10.601714276080898, 10.575109164942123, 10.66...",10.579802


In [19]:

# Zeroing in on the best range.
lams = np.linspace(2000, 9000, 10).tolist()
tuner.evaluate_grid(lam=lams)


Evaluating: {'lam': 2000.0}
Fold 1/5, mse=10.24978175306962, rmse=3.2015280340908494
Fold 2/5, mse=10.226605118346296, rmse=3.1979063648497115
Fold 3/5, mse=10.30467530386892, rmse=3.210089609943766
Fold 4/5, mse=10.178588190133508, rmse=3.190389974616506
Fold 5/5, mse=10.177778602037092, rmse=3.190263092918371
Average MSE: 10.227485793491088
Evaluating: {'lam': 2777.777777777778}
Fold 1/5, mse=10.221532220194838, rmse=3.197113107194495
Fold 2/5, mse=10.198427814666726, rmse=3.1934977398875244
Fold 3/5, mse=10.27789258315675, rmse=3.205915248904242
Fold 4/5, mse=10.150104971136074, rmse=3.185922938668805
Fold 5/5, mse=10.149686751964996, rmse=3.1858573025113657
Average MSE: 10.199528868223876
Evaluating: {'lam': 3555.5555555555557}
Fold 1/5, mse=10.20786390255574, rmse=3.194974789032887
Fold 2/5, mse=10.184681208309755, rmse=3.1913447335425476
Fold 3/5, mse=10.265270625154546, rmse=3.2039461020988704
Fold 4/5, mse=10.136115688688971, rmse=3.18372669817762
Fold 5/5, mse=10.1360992183958

Unnamed: 0,lam,mse,average_mse
0,2000.0,"[10.24978175306962, 10.226605118346296, 10.304...",10.227486
1,2777.777778,"[10.221532220194838, 10.198427814666726, 10.27...",10.199529
2,3555.555556,"[10.20786390255574, 10.184681208309755, 10.265...",10.186006
3,4333.333333,"[10.202035674030457, 10.178725394735626, 10.26...",10.180256
4,5111.111111,"[10.200826454127945, 10.177375116957913, 10.25...",10.179089
5,5888.888889,"[10.202502531986418, 10.178910456580315, 10.26...",10.180789
6,6666.666667,"[10.20605088960224, 10.182323777017672, 10.265...",10.18435
7,7444.444444,"[10.210842954026532, 10.186988415745075, 10.27...",10.189148
8,8222.222222,"[10.216470498561877, 10.192496564310169, 10.27...",10.194778
9,9000.0,"[10.222658842239008, 10.19857331720103, 10.283...",10.200965


In [None]:

# Set regularization to 5000.
lam = 5000


In [None]:

# ====Step 7====
recommender = recommender_class(shrinkage=shrinkage, neighborhood_size=neighborhood_size)
for i, seed in enumerate(trial_seeds):
    run_env_experiment(
            [env],
            [recommender],
            [seed],
            len_trial,
            environment_names=[environment_name],
            recommender_names=[recommender_name],
            bucket_name=bucket_name,
            data_dir=data_dir,
            overwrite=overwrite)
