# User Selection Experiments

This notebook contains experiments for showing changes in recommender performance based on the frequency distribution of online users. We hypothesize that the length of a user's history should be positively correlated with the recommender's performance.

The methods below are modified version of run_utils.py. I have modified it to store which users come online at each timestep. Also, at the last iteration, the recommender rates all users so we can compare the performance across all users.

In [24]:
import numpy as np
import sys

from run_utils import *
import datetime
import boto3

sys.path.append('../') 
from reclab.environments.topics import Topics
from reclab.recommenders import LibFM


In [3]:
%load_ext autoreload
%autoreload 2


In [26]:
def run_selection_trial(env,
              rec,
              len_trial,
              trial_seed,
              bucket=None,
              dir_name=None,
              overwrite=False):
    """Logic for running each trial.

    Parameters
    ----------
    env : Environment
        The environment to use for this trial.
    rec : Recommender
        The recommender to use for this trial.
    len_trial : int
        The number of recommendation steps to run the trial for.
    trial_seed : int
        Used to seed the dynamics of the environment.
    bucket : s3.Bucket
        The S3 bucket to store the experiment results into. If this is None the results
        will not be saved in S3.
    dir_name : str
        The S3 directory to save the trial results into. Can be None if bucket is also None.
    overwrite : bool
        Whether to re-run the experiment and overwrite the trial's saved data in S3.

    Returns
    -------
    ratings : np.ndarray
        The array of all ratings made by users. ratings[i, j] is the rating
        made on round i by the j-th online user on the item recommended to them.
    predictions : np.ndarray
        The array of all predictions made by the recommender. preds[i, j] is the
        prediction the user made on round i for the item recommended to the j-th
        user. If the recommender does not predict items then each element is set
        to np.nan.
    dense_ratings : np.ndarray
        The array of all dense ratings across each step. dense_ratings[i] is the
        array of all ratings that would have been made on round i for each user-item pair
        with all noise removed.
    dense_predictions : np.ndarray
        The array of all dense predictions across each step. dense_predictions[i] is the
        array of all predictions on round i for each user-item pair.

    """
    if not overwrite and s3_dir_exists(bucket, dir_name):
        print('Loading past results from S3.')
        results = s3_load_trial(bucket, dir_name)
        return results[1:-1]

    # First generate the items and users to bootstrap the dataset.
    env.seed((INIT_SEED, trial_seed))
    items, users, ratings = env.reset()
    rec.reset(items, users, ratings)


    all_ratings = []
    all_predictions = []
    all_dense_ratings = []
    all_dense_predictions = []
    all_online_users = []
    all_env_snapshots = [pickle.dumps(env)]
    user_item = []
    for i in range(len(env.users)):
        for j in range(len(env.items)):
            user_item.append((i, j, np.zeros(0)))

    # Now recommend items to users.
    for i in tqdm.autonotebook.tqdm(range(len_trial)):
        if i + 2 == len_trial:
            env._rating_frequency = 1
            
        online_users = env.online_users()
    
        recommendations, predictions = rec.recommend(online_users, num_recommendations=1)
        recommendations = recommendations.flatten()
        dense_ratings = np.clip(env.dense_ratings.flatten(), 1, 5)
        items, users, ratings, _ = env.step(recommendations)

        # Account for the case where the recommender doesn't predict ratings.
        if predictions is None:
            predictions = np.ones_like(ratings) * np.nan
            dense_predictions = np.ones_like(dense_ratings) * np.nan
        else:
            predictions = predictions.flatten()
            dense_predictions = rec.predict(user_item)

        # Save all relevant info.
        all_ratings.append([rating for rating, _ in ratings.values()])
        all_predictions.append(predictions)
        all_dense_ratings.append(dense_ratings)
        all_dense_predictions.append(dense_predictions)
        all_env_snapshots.append(copy.deepcopy(env))
        all_online_users.append(online_users)

        rec.update(users, items, ratings)

    # Convert lists to numpy arrays
    all_ratings = np.array(all_ratings)
    all_predictions = np.array(all_predictions)
    all_dense_ratings = np.array(all_dense_ratings)
    all_dense_predictions = np.array(all_dense_predictions)

    # Save content to S3 if needed.
    if bucket is not None:
        print('Saving results to S3.')
        s3_save_trial(bucket,
                      dir_name,
                      env.name,
                      rec.name,
                      rec.hyperparameters,
                      all_ratings,
                      all_predictions,
                      all_dense_ratings,
                      all_dense_predictions,
                      all_env_snapshots)

    # TODO: We might want to return the env snapshots too.
    return all_ratings, all_predictions, all_dense_ratings, all_dense_predictions, all_online_users

In [27]:
def s3_save_trial(bucket,
                  dir_name,
                  env_name,
                  rec_name,
                  rec_hyperparameters,
                  ratings,
                  predictions,
                  dense_ratings,
                  dense_predictions,
                  env_snapshots,
                  online_users
                 ):
    """Save a trial in s3 within the given directory."""
    info = {
        'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),
        'environment': env_name,
        'git branch': git_branch(),
        'git hash': git_hash(),
        'git username': git_username(),
        'recommender': rec_name,
    }
    serialize_and_put(bucket, dir_name, 'info', info, use_json=True)
    serialize_and_put(bucket, dir_name, 'rec_hyperparameters', rec_hyperparameters, use_json=True)
    serialize_and_put(bucket, dir_name, 'ratings', ratings)
    serialize_and_put(bucket, dir_name, 'predictions', predictions)
    serialize_and_put(bucket, dir_name, 'dense_ratings', dense_ratings)
    serialize_and_put(bucket, dir_name, 'dense_predictions', dense_predictions)
    serialize_and_put(bucket, dir_name, 'env_snapshots', env_snapshots)
    serialize_and_put(bucket, dir_name, 'online_users', online_users)

In [28]:
env = Topics(num_topics=19, num_users=1000, 
             num_items=1700, num_init_ratings=100000,
             user_dist_choice='norm'
            )
rec = LibFM(num_user_features=0, num_item_features=0, num_rating_features=0, max_num_users=1000, max_num_items=1700)
bucket = boto3.resource('s3').Bucket('Alex')  # pylint: disable=no-member
run_selection_trial(env, rec, 500, np.arange(2))

HBox(children=(IntProgress(value=0, max=500), HTML(value='')))

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "//anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3325, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-28-b561e1e1ec1b>", line 10, in <module>
    run_selection_trial(env, rec, 500, np.arange(2))
  File "<ipython-input-26-33d5523a0a33>", line 76, in run_selection_trial
    recommendations, predictions = rec.recommend(online_users, num_recommendations=1)
  File "../reclab/recommenders/recommender.py", line 260, in recommend
    all_predictions = self._predict(ratings_to_predict)
  File "../reclab/recommenders/libfm.py", line 177, in _predict
    data.append(1)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "//anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2039, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has n

KeyboardInterrupt: 