In [1]:
import math
import sys

import numpy as np

sys.path.append('../')
sys.path.append('../../')
from run_utils import get_env_dataset, run_env_experiment
from run_utils import ModelTuner
from reclab.environments import Topics
from env_defaults import TOPICS_STATIC, get_len_trial
from reclab.recommenders import SLIM


  import tqdm.autonotebook
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
# ====Step 4====
# S3 storage parameters
bucket_name = 'recsys-eval'
data_dir = 'master'
overwrite = True

# Experiment setup.
n_trials = 10
trial_seeds = [i for i in range(n_trials)]
len_trial = get_len_trial(TOPICS_STATIC)

# Environment setup
environment_name = TOPICS_STATIC['name']
env = Topics(**TOPICS_STATIC['params'], **TOPICS_STATIC['optional_params'])

# Recommender setup
recommender_name = 'SLIM'
recommender_class = SLIM


In [3]:
# ====Step 5====
starting_data = get_env_dataset(env)


In [4]:
# ====Step 6====
# Recommender tuning setup
n_fold = 5
default_params = dict(alpha=0.05, l1_ratio=2e-5,)
tuner = ModelTuner(starting_data,
                   default_params,
                   recommender_class,
                   n_fold=n_fold,
                   verbose=True,
                   bucket_name=bucket_name,
                   data_dir=data_dir,
                   environment_name=environment_name,
                   recommender_name=recommender_name,
                   overwrite=overwrite)


In [8]:

# Tune the hyperparameters.
# Start with a coarse grid.
alphas = np.linspace(0.02, 0.1, 4).tolist()
l1_ratios = np.linspace(1e-6, 1e-4, 4).tolist()
tuner.evaluate_grid(alpha=alphas, l1_ratio=l1_ratios)


Evaluating: {'alpha': 0.02, 'l1_ratio': 1e-06}
Fold 1/5, mse=9.247773924417217, rmse=3.0410152785570177
Fold 2/5, mse=9.30132945016417, rmse=3.0498081005473394
Fold 3/5, mse=9.26481610409117, rmse=3.0438160430767116
Fold 4/5, mse=9.31306648352771, rmse=3.0517317187996245
Fold 5/5, mse=9.162507455209285, rmse=3.026963405000015
Average MSE: 9.25789868348191
Evaluating: {'alpha': 0.02, 'l1_ratio': 3.4e-05}
Fold 1/5, mse=9.247792314327246, rmse=3.0410183022019526
Fold 2/5, mse=9.301347828722404, rmse=3.049811113613826
Fold 3/5, mse=9.264834477979718, rmse=3.0438190613076395
Fold 4/5, mse=9.313084838942206, rmse=3.051734726174968
Fold 5/5, mse=9.162525785051656, rmse=3.0269664327593158
Average MSE: 9.257917049004647
Evaluating: {'alpha': 0.02, 'l1_ratio': 6.7e-05}
Fold 1/5, mse=9.24781070958662, rmse=3.0410213267234116
Fold 2/5, mse=9.301366199411332, rmse=3.0498141253872064
Fold 3/5, mse=9.26485285252096, rmse=3.04382207964279
Fold 4/5, mse=9.313103192740737, rmse=3.0517377332825864
Fold 5

Unnamed: 0,alpha,l1_ratio,mse,average_mse
0,0.02,1e-06,"[9.247773924417217, 9.30132945016417, 9.264816...",9.257899
1,0.02,3.4e-05,"[9.247792314327246, 9.301347828722404, 9.26483...",9.257917
2,0.02,6.7e-05,"[9.24781070958662, 9.301366199411332, 9.264852...",9.257935
3,0.02,0.0001,"[9.247829104487469, 9.301384574621917, 9.26487...",9.257954
4,0.046667,1e-06,"[9.258102070824568, 9.311843613940505, 9.27513...",9.268604
5,0.046667,3.4e-05,"[9.258144884712976, 9.31188643049762, 9.275181...",9.268646
6,0.046667,6.7e-05,"[9.258187703465172, 9.311929242043744, 9.27522...",9.268689
7,0.046667,0.0001,"[9.25823052106453, 9.311972055341126, 9.275267...",9.268732
8,0.073333,1e-06,"[9.267863616904918, 9.321843395543704, 9.28497...",9.278772
9,0.073333,3.4e-05,"[9.267930875398251, 9.321910599320061, 9.28504...",9.278839


In [9]:

# Tune the hyperparameters.
# Start with a coarse grid.
alphas = np.linspace(0.005, 0.015, 4).tolist()
l1_ratios = np.linspace(5e-7, 5e-6, 4).tolist()
tuner.evaluate_grid(alpha=alphas, l1_ratio=l1_ratios)


Evaluating: {'alpha': 0.005, 'l1_ratio': 5e-07}
Fold 1/5, mse=9.2417033227754, rmse=3.0400169938300348
Fold 2/5, mse=9.29510101463037, rmse=3.048786810295264
Fold 3/5, mse=9.258738959754561, rmse=3.0428176021172484
Fold 4/5, mse=9.306347566181165, rmse=3.0506306833474883
Fold 5/5, mse=9.156157955547929, rmse=3.0259143999042553
Average MSE: 9.251609763777884
Evaluating: {'alpha': 0.005, 'l1_ratio': 2e-06}
Fold 1/5, mse=9.241703532129188, rmse=3.0400170282630308
Fold 2/5, mse=9.29510122374383, rmse=3.0487868445898
Fold 3/5, mse=9.258739168396987, rmse=3.0428176364016606
Fold 4/5, mse=9.306347774843655, rmse=3.050630717547382
Fold 5/5, mse=9.156158164219022, rmse=3.025914434384922
Average MSE: 9.251609972666538
Evaluating: {'alpha': 0.005, 'l1_ratio': 3.5e-06}
Fold 1/5, mse=9.241703741480315, rmse=3.0400170626955885
Fold 2/5, mse=9.295101432896617, rmse=3.0487868788907857
Fold 3/5, mse=9.258739377093816, rmse=3.0428176706950114
Fold 4/5, mse=9.306347983562528, rmse=3.0506307517565165
Fold

Unnamed: 0,alpha,l1_ratio,mse,average_mse
0,0.005,5e-07,"[9.2417033227754, 9.29510101463037, 9.25873895...",9.25161
1,0.005,2e-06,"[9.241703532129188, 9.29510122374383, 9.258739...",9.25161
2,0.005,3.5e-06,"[9.241703741480315, 9.295101432896617, 9.25873...",9.25161
3,0.005,5e-06,"[9.241703950759767, 9.29510164192735, 9.258739...",9.25161
4,0.008333,5e-07,"[9.24306802749385, 9.296503426885131, 9.260115...",9.253027
5,0.008333,2e-06,"[9.243068376547654, 9.296503775142309, 9.26011...",9.253027
6,0.008333,3.5e-06,"[9.24306872541041, 9.296504123375264, 9.260116...",9.253028
7,0.008333,5e-06,"[9.243069074281317, 9.296504471739812, 9.26011...",9.253028
8,0.011667,5e-07,"[9.24442370672361, 9.297898237501, 9.261474236...",9.254432
9,0.011667,2e-06,"[9.244424194816384, 9.297898724757125, 9.26147...",9.254433


In [11]:

# Zeroing in on the best range.
alphas = np.linspace(0.001, 0.0049, 4).tolist()
l1_ratios = [1e-6]
tuner.evaluate_grid(alpha=alphas, l1_ratio=l1_ratios)


Evaluating: {'alpha': 0.001, 'l1_ratio': 1e-06}
Fold 1/5, mse=9.240052184737683, rmse=3.0397454144611658
Fold 2/5, mse=9.293405816945802, rmse=3.0485087857747435
Fold 3/5, mse=9.257064422284596, rmse=3.042542427359822
Fold 4/5, mse=9.304504934503829, rmse=3.0503286600797344
Fold 5/5, mse=9.154439759772108, rmse=3.025630473103434
Average MSE: 9.249893423648803
Evaluating: {'alpha': 0.0023, 'l1_ratio': 1e-06}
Fold 1/5, mse=9.240590901062818, rmse=3.0398340252492106
Fold 2/5, mse=9.293958289041871, rmse=3.048599397927165
Fold 3/5, mse=9.25761099825093, rmse=3.0426322482763064
Fold 4/5, mse=9.305106597606773, rmse=3.050427281153703
Fold 5/5, mse=9.154998530888143, rmse=3.0257228113110664
Average MSE: 9.250453063370108
Evaluating: {'alpha': 0.0036, 'l1_ratio': 1e-06}
Fold 1/5, mse=9.241127368682314, rmse=3.039922263592001
Fold 2/5, mse=9.294509451618739, rmse=3.048689792618911
Fold 3/5, mse=9.258155810389821, rmse=3.042721776697604
Fold 4/5, mse=9.305705318012997, rmse=3.05052541671316
Fold

Unnamed: 0,alpha,l1_ratio,mse,average_mse
0,0.001,1e-06,"[9.240052184737683, 9.293405816945802, 9.25706...",9.249893
1,0.0023,1e-06,"[9.240590901062818, 9.293958289041871, 9.25761...",9.250453
2,0.0036,1e-06,"[9.241127368682314, 9.294509451618739, 9.25815...",9.251011
3,0.0049,1e-06,"[9.241662332884822, 9.29505889938447, 9.258697...",9.251567


In [12]:

# Zeroing in on the best range.
alphas = np.linspace(0.00001, 0.0009, 4).tolist()
l1_ratios = [1e-6]
tuner.evaluate_grid(alpha=alphas, l1_ratio=l1_ratios)


Evaluating: {'alpha': 1e-05, 'l1_ratio': 1e-06}
Fold 1/5, mse=9.239640552111483, rmse=3.0396777053022386
Fold 2/5, mse=9.292983645925455, rmse=3.048439542770277
Fold 3/5, mse=9.25664721493093, rmse=3.0424738642971003
Fold 4/5, mse=9.304045783444492, rmse=3.0502533965958456
Fold 5/5, mse=9.154012932077716, rmse=3.025559936950137
Average MSE: 9.249466025698016
Evaluating: {'alpha': 0.0003066666666666667, 'l1_ratio': 1e-06}
Fold 1/5, mse=9.239764068299744, rmse=3.039698022550882
Fold 2/5, mse=9.29311031571509, rmse=3.0484603188683774
Fold 3/5, mse=9.256772308611422, rmse=3.0424944221167314
Fold 4/5, mse=9.304183479322434, rmse=3.0502759677318436
Fold 5/5, mse=9.154141066613269, rmse=3.025581112218489
Average MSE: 9.249594247712391
Evaluating: {'alpha': 0.0006033333333333333, 'l1_ratio': 1e-06}
Fold 1/5, mse=9.239887422159683, rmse=3.0397183129625156
Fold 2/5, mse=9.293236859120382, rmse=3.0484810740958164
Fold 3/5, mse=9.2568973585757, rmse=3.042514972613233
Fold 4/5, mse=9.30432108095301

Unnamed: 0,alpha,l1_ratio,mse,average_mse
0,1e-05,1e-06,"[9.239640552111483, 9.292983645925455, 9.25664...",9.249466
1,0.000307,1e-06,"[9.239764068299744, 9.29311031571509, 9.256772...",9.249594
2,0.000603,1e-06,"[9.239887422159683, 9.293236859120382, 9.25689...",9.249722
3,0.0009,1e-06,"[9.240010646804757, 9.293363235646122, 9.25702...",9.24985


In [15]:

# Zeroing in on the best range.
alphas = np.linspace(1e-10, 1e-6, 4).tolist()
l1_ratios = [1e-6]
tuner.evaluate_grid(alpha=alphas, l1_ratio=l1_ratios)


Evaluating: {'alpha': 1e-10, 'l1_ratio': 1e-06}
Fold 1/5, mse=9.239636383545779, rmse=3.039677019610106
Fold 2/5, mse=9.292979376019002, rmse=3.0484388424272186
Fold 3/5, mse=9.256642997363626, rmse=3.042473171182225
Fold 4/5, mse=9.304041141077382, rmse=3.050252635615185
Fold 5/5, mse=9.154008610640988, rmse=3.025559222795182
Average MSE: 9.249461701729356
Evaluating: {'alpha': 3.334e-07, 'l1_ratio': 1e-06}
Fold 1/5, mse=9.23963652248157, rmse=3.0396770424638158
Fold 2/5, mse=9.29297951833548, rmse=3.0484388657697368
Fold 3/5, mse=9.256643137936935, rmse=3.042473194284041
Fold 4/5, mse=9.304041295820024, rmse=3.05025266098073
Fold 5/5, mse=9.154008754674466, rmse=3.0255592465979686
Average MSE: 9.249461845849694
Evaluating: {'alpha': 6.667e-07, 'l1_ratio': 1e-06}
Fold 1/5, mse=9.23963666141741, rmse=3.0396770653175333
Fold 2/5, mse=9.292979660652003, rmse=3.048438889112262
Fold 3/5, mse=9.256643278510294, rmse=3.0424732173858646
Fold 4/5, mse=9.304041450562682, rmse=3.0502526863462776

Unnamed: 0,alpha,l1_ratio,mse,average_mse
0,1e-10,1e-06,"[9.239636383545779, 9.292979376019002, 9.25664...",9.249462
1,3.334e-07,1e-06,"[9.23963652248157, 9.29297951833548, 9.2566431...",9.249462
2,6.667e-07,1e-06,"[9.23963666141741, 9.292979660652003, 9.256643...",9.249462
3,1e-06,1e-06,"[9.2396368003533, 9.292979802968569, 9.2566434...",9.249462


In [19]:

# Zeroing in on the best range.
lams = np.linspace(2000, 9000, 10).tolist()
tuner.evaluate_grid(lam=lams)


Evaluating: {'lam': 2000.0}
Fold 1/5, mse=10.24978175306962, rmse=3.2015280340908494
Fold 2/5, mse=10.226605118346296, rmse=3.1979063648497115
Fold 3/5, mse=10.30467530386892, rmse=3.210089609943766
Fold 4/5, mse=10.178588190133508, rmse=3.190389974616506
Fold 5/5, mse=10.177778602037092, rmse=3.190263092918371
Average MSE: 10.227485793491088
Evaluating: {'lam': 2777.777777777778}
Fold 1/5, mse=10.221532220194838, rmse=3.197113107194495
Fold 2/5, mse=10.198427814666726, rmse=3.1934977398875244
Fold 3/5, mse=10.27789258315675, rmse=3.205915248904242
Fold 4/5, mse=10.150104971136074, rmse=3.185922938668805
Fold 5/5, mse=10.149686751964996, rmse=3.1858573025113657
Average MSE: 10.199528868223876
Evaluating: {'lam': 3555.5555555555557}
Fold 1/5, mse=10.20786390255574, rmse=3.194974789032887
Fold 2/5, mse=10.184681208309755, rmse=3.1913447335425476
Fold 3/5, mse=10.265270625154546, rmse=3.2039461020988704
Fold 4/5, mse=10.136115688688971, rmse=3.18372669817762
Fold 5/5, mse=10.1360992183958

Unnamed: 0,lam,mse,average_mse
0,2000.0,"[10.24978175306962, 10.226605118346296, 10.304...",10.227486
1,2777.777778,"[10.221532220194838, 10.198427814666726, 10.27...",10.199529
2,3555.555556,"[10.20786390255574, 10.184681208309755, 10.265...",10.186006
3,4333.333333,"[10.202035674030457, 10.178725394735626, 10.26...",10.180256
4,5111.111111,"[10.200826454127945, 10.177375116957913, 10.25...",10.179089
5,5888.888889,"[10.202502531986418, 10.178910456580315, 10.26...",10.180789
6,6666.666667,"[10.20605088960224, 10.182323777017672, 10.265...",10.18435
7,7444.444444,"[10.210842954026532, 10.186988415745075, 10.27...",10.189148
8,8222.222222,"[10.216470498561877, 10.192496564310169, 10.27...",10.194778
9,9000.0,"[10.222658842239008, 10.19857331720103, 10.283...",10.200965


In [None]:

# Set regularization to 5000.
lam = 5000


In [None]:

# ====Step 7====
recommender = recommender_class(shrinkage=shrinkage, neighborhood_size=neighborhood_size)
for i, seed in enumerate(trial_seeds):
    run_env_experiment(
            [env],
            [recommender],
            [seed],
            len_trial,
            environment_names=[environment_name],
            recommender_names=[recommender_name],
            bucket_name=bucket_name,
            data_dir=data_dir,
            overwrite=overwrite)
