In [1]:
from irec.recommendation.agents.value_functions import LinearUCB, MostPopular, GenericThompsonSampling, EGreedy
from irec.recommendation.agents.action_selection_policies import ASPGenericGreedy, ASPGreedy, ASPEGreedy
from irec.offline_experiments.metric_evaluators import UserCumulativeInteraction
from irec.offline_experiments.evaluation_policies import FixedInteraction
from irec.recommendation.agents import SimpleEnsembleAgent, SimpleAgent
from irec.offline_experiments.metrics import Hits, EPC, Recall, ILD
from irec.environment.loader import FullData

In [2]:
import pandas as pd
import numpy as np

## Load Dataset

In [3]:
# Dataset
dataset = {
    'path': "datasets/MovieLens 100k/ratings.csv",
    'random_seed': 0,
    'file_delimiter': ",",
    'skip_head': True
}
# Splitting
splitting = {'strategy': "global", 'train_size': 0.8, 'test_consumes': 5}
validation = {'validation_size': 0.2}
# Loader
loader = FullData(dataset, splitting, validation)
train_dataset, test_dataset, x_validation, y_validation = loader.process()


Applying splitting strategy: global

Test shape: (16892, 4)
Train shape: (80393, 4)

Generating x_validation and y_validation: 
Test shape: (15729, 4)
Train shape: (61345, 4)


## Creating the agents

In [4]:
params = {
    "LinearUCB": {"alpha": 1.0, "item_var": 0.01, "iterations": 20, "num_lat": 20, "stop_criteria": 0.0009, "user_var": 0.01, "var": 0.05},
    "MostPopular": {},
    "EGreedy": {},
    "GenericThompsonSampling": {"alpha_0": {"LinearUCB": 100, "MostPopular": 1}, "beta_0": {"LinearUCB": 100, "MostPopular": 1}},
}

### Creating the simple agents

In [17]:
vf1 = LinearUCB(**params["LinearUCB"])
vf2 = MostPopular(**params["MostPopular"])
vf3 = EGreedy(**params["EGreedy"])

asp_sa = ASPGreedy()
aspe_sa = ASPEGreedy(epsilon=0.1)

agent1 = SimpleAgent(vf1, asp_sa, name="LinearUCB")
agent2 = SimpleAgent(vf2, asp_sa, name="MostPopular")
agent3 = SimpleAgent(vf3, aspe_sa, name="EGreedy")

### Creating the Ensemble Agent

In [18]:
asp_sea = ASPGenericGreedy()
vf_sea = GenericThompsonSampling(**params["GenericThompsonSampling"])
ensemble_agent = SimpleEnsembleAgent(
    agents=[agent1, agent2],
    action_selection_policy=asp_sea,
    name="EnsembleAgent",
    value_function=vf_sea
)

In [19]:
agents = [agent1, agent2, agent3, ensemble_agent]

## Getting the recommendations

In [20]:
eval_policy = FixedInteraction(num_interactions=100, interaction_size=1, save_info=False)

In [21]:
interactions = {}
for agent in agents:
    print(agent.name)
    agent_interactions, action_info = eval_policy.evaluate(agent, train_dataset, test_dataset)
    interactions[agent.name] = agent_interactions

LinearUCB
Starting LinearUCB Training


rmse=0.800: 100%|██████████| 20/20 [00:17<00:00,  1.16it/s]


Ended LinearUCB Training


LinearUCB: 100%|██████████| 18900/18900 [00:16<00:00, 1116.64it/s]


MostPopular
Starting MostPopular Training
Ended MostPopular Training


MostPopular: 100%|██████████| 18900/18900 [00:02<00:00, 8415.24it/s]


EGreedy
Starting EGreedy Training
Ended EGreedy Training


EGreedy: 100%|██████████| 18900/18900 [00:01<00:00, 10974.61it/s]


EnsembleAgent
Starting EnsembleAgent Training


rmse=0.800: 100%|██████████| 20/20 [00:20<00:00,  1.04s/it]


Ended EnsembleAgent Training


EnsembleAgent: 100%|██████████| 18900/18900 [00:19<00:00, 977.76it/s] 


### Evaluating the models

In [22]:
# Cumulative Evaluation Setup
evaluator = UserCumulativeInteraction(
    ground_truth_dataset=test_dataset,
    num_interactions=100,
    interaction_size=1,
    interactions_to_evaluate=[5, 10, 20, 50, 100],
    relevance_evaluator_threshold=3.99
)

In [None]:
# Getting the results
cumulative_results = {}
for metric_class in [Hits, EPC, Recall, ILD]:
    for agent_name, agent_results in interactions.items():
        print(f"\nEvaluating {agent_name}\n")
        metric_values = evaluator.evaluate(metric_class=metric_class, results=agent_results)
        if metric_class.__name__ not in cumulative_results:
            cumulative_results[metric_class.__name__] = {}
        cumulative_results[metric_class.__name__].update({agent_name: metric_values})

In [24]:
cumulative_results.keys()

dict_keys(['Hits', 'EPC', 'Recall', 'ILD'])

In [25]:
cumulative_results["Hits"].keys()

dict_keys(['LinearUCB', 'MostPopular', 'EGreedy', 'EnsembleAgent'])

In [26]:
all_results = []
for metric_name, agent_values in cumulative_results.items():
    df_cumulative = pd.DataFrame(columns=["Model", 5, 10, 20, 50, 100])
    df_cumulative["Model"] = list(agent_values.keys())
    df_cumulative.set_index("Model", inplace=True)
    for agent_name, values in agent_values.items():
        df_cumulative.loc[agent_name] = [
            np.nanmean(list(metric_values.values())) for metric_values in values
        ]
    df_cumulative.columns = pd.MultiIndex.from_product([[metric_name], df_cumulative.columns])
    all_results.append(df_cumulative)

In [27]:
all_results = pd.concat(all_results, axis=1)
all_results

Unnamed: 0_level_0,Hits,Hits,Hits,Hits,Hits,EPC,EPC,EPC,EPC,EPC,Recall,Recall,Recall,Recall,Recall,ILD,ILD,ILD,ILD,ILD
Unnamed: 0_level_1,5,10,20,50,100,5,10,20,50,100,5,10,20,50,100,5,10,20,50,100
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
LinearUCB,2.058201,3.544974,6.396825,13.915344,22.941799,0.913138,0.922726,0.93468,0.947699,0.957061,0.061996,0.105512,0.179007,0.345512,0.527375,0.243138,0.280695,0.305294,0.329127,0.351707
MostPopular,1.666667,2.904762,5.126984,10.063492,16.703704,0.90456,0.911084,0.922276,0.938327,0.947497,0.04906,0.085504,0.13688,0.232438,0.365345,0.259048,0.282381,0.27854,0.291894,0.313466
EGreedy,1.132275,2.10582,3.650794,7.931217,13.724868,0.944704,0.947987,0.951702,0.956016,0.959761,0.027352,0.04694,0.078794,0.172411,0.300665,0.320149,0.321986,0.328036,0.34481,0.359484
EnsembleAgent,2.0,3.52381,6.402116,13.952381,22.925926,0.913026,0.920501,0.932376,0.945662,0.95518,0.056846,0.1033,0.179556,0.348687,0.533001,0.237881,0.278025,0.305048,0.328898,0.35107
