In [16]:
import logging
from pathlib import Path
import sys

from obp.dataset.real import OpenBanditDataset
from obp.policy import EpsilonGreedy, BernoulliTS, Random
from sklearn.linear_model import LinearRegression

from sd_bandits.obp_extensions.dataset import DeezerDataset
from sd_bandits.obp_extensions.policy import ExploreThenCommit, SegmentPolicy
from sd_bandits.experiment import DeezerExperiment, OBDExperiment

In [2]:
logging.basicConfig(
    format="%(asctime)s %(levelname)s: %(message)s",
    level=logging.INFO,
    handlers=[
        logging.StreamHandler(sys.stdout),
    ],
    datefmt="%-I:%M:%S",
)

# Ad hoc deezer experiments


In [3]:
deezer_dataset = DeezerDataset("../data/deezer_carousel_bandits/user_features.csv","../data/deezer_carousel_bandits/playlist_features.csv")

In [17]:
policies = [
    Random(
        n_actions=deezer_dataset.n_actions,
        len_list=deezer_dataset.len_list,
        batch_size=1,
        random_state=0,
    ),
    EpsilonGreedy(
        n_actions=deezer_dataset.n_actions,
        len_list=deezer_dataset.len_list,
        batch_size=1,
        random_state=0,
        epsilon=0.01,
        policy_name="egreedy_exploit",
    ),
    EpsilonGreedy(
        n_actions=deezer_dataset.n_actions,
        len_list=deezer_dataset.len_list,
        batch_size=1,
        random_state=0,
        epsilon=0.1,
        policy_name="egreedy_explore",
    ),
    BernoulliTS(
        n_actions=deezer_dataset.n_actions,
        len_list=deezer_dataset.len_list,
        batch_size=1,
        random_state=0,
        alpha=1,
        beta=1,
        policy_name="ts_naive",
    ),
    BernoulliTS(
        n_actions=deezer_dataset.n_actions,
        len_list=deezer_dataset.len_list,
        batch_size=1,
        random_state=0,
        alpha=100,
        beta=100,
        policy_name="ts_pessimistic",
    ),
    SegmentPolicy(
        EpsilonGreedy(
            n_actions=deezer_dataset.n_actions,
            len_list=deezer_dataset.len_list,
            batch_size=1,
            random_state=0,
            epsilon=0.1,
        ),
        n_segments=100,
        policy_name="seg_egreedy_explore",
    ),
    SegmentPolicy(
        EpsilonGreedy(
            n_actions=deezer_dataset.n_actions,
            len_list=deezer_dataset.len_list,
            batch_size=1,
            random_state=0,
            epsilon=0.01,
        ),
        n_segments=100,
        policy_name="seg_egreedy_exploit",
    ),
    SegmentPolicy(
        BernoulliTS(
            n_actions=deezer_dataset.n_actions,
            len_list=deezer_dataset.len_list,
            batch_size=1,
            random_state=0,
            alpha=1,
            beta=1,
        ),
        n_segments=100,
        policy_name="seg_ts_naive",
    ),
    SegmentPolicy(
        BernoulliTS(
            n_actions=deezer_dataset.n_actions,
            len_list=deezer_dataset.len_list,
            batch_size=1,
            random_state=0,
            alpha=100,
            beta=100,
        ),
        n_segments=100,
        policy_name="seg_ts_pessimistic",
    ),
    ExploreThenCommit(
        n_actions=deezer_dataset.n_actions,
        len_list=deezer_dataset.len_list,
        batch_size=1,
        random_state=0,
        min_n=20,
        policy_name="etc_exploit",
    ),
    ExploreThenCommit(
        n_actions=deezer_dataset.n_actions,
        len_list=deezer_dataset.len_list,
        batch_size=1,
        random_state=0,
        min_n=100,
        policy_name="etc_explore",
    ),
    SegmentPolicy(
        ExploreThenCommit(
            n_actions=deezer_dataset.n_actions,
            len_list=deezer_dataset.len_list,
            batch_size=1,
            random_state=0,
            min_n=20,
        ),
        n_segments=100,
        policy_name="seg_etc_exploit",
    ),
    SegmentPolicy(
        ExploreThenCommit(
            n_actions=deezer_dataset.n_actions,
            len_list=deezer_dataset.len_list,
            batch_size=1,
            random_state=0,
            min_n=100,
        ),
        n_segments=100,
        policy_name="seg_etc_explore",
    ),
]


In [18]:
deezer_experiment = DeezerExperiment(
    dataset=deezer_dataset,
    policies=[(policy, {"users_per_batch": 1000}) for policy in policies],
)

In [19]:
deezer_experiment.run_experiment()

11:24:14 INFO: Running experiment
11:24:14 INFO: Learning and obtaining policy feedback
11:24:14 INFO: [1 of 13] Learning and obtaining random feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:18<00:00, 5448.56it/s]


11:24:34 INFO: [2 of 13] Learning and obtaining egreedy_exploit feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:16<00:00, 5909.27it/s]


11:24:53 INFO: [3 of 13] Learning and obtaining egreedy_explore feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:17<00:00, 5599.48it/s]


11:25:12 INFO: [4 of 13] Learning and obtaining ts_naive feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:29<00:00, 3342.72it/s]


11:25:44 INFO: [5 of 13] Learning and obtaining ts_pessimistic feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:31<00:00, 3159.59it/s]


11:26:17 INFO: [6 of 13] Learning and obtaining seg_egreedy_explore feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:21<00:00, 4679.99it/s]


11:26:40 INFO: [7 of 13] Learning and obtaining seg_egreedy_exploit feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:21<00:00, 4671.34it/s]


11:27:03 INFO: [8 of 13] Learning and obtaining seg_ts_naive feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:33<00:00, 2961.82it/s]


11:27:38 INFO: [9 of 13] Learning and obtaining seg_ts_pessimistic feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:33<00:00, 2944.02it/s]


11:28:14 INFO: [10 of 13] Learning and obtaining etc_exploit feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:26<00:00, 3757.36it/s]


11:28:42 INFO: [11 of 13] Learning and obtaining etc_explore feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:29<00:00, 3413.01it/s]


11:29:12 INFO: [12 of 13] Learning and obtaining seg_etc_exploit feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:32<00:00, 3108.95it/s]


11:29:48 INFO: [13 of 13] Learning and obtaining seg_etc_explore feedback


Simulating online learning: 100%|██████████| 100000/100000 [00:30<00:00, 3285.35it/s]


11:30:20 INFO: Done in 365.3 seconds
11:30:20 INFO: Estimating reward confidence interval for random baseline feedback
11:30:20 INFO: [1 of 13] Estimating reward confindence interval for random feedback
11:30:21 INFO: [2 of 13] Estimating reward confindence interval for egreedy_exploit feedback
11:30:21 INFO: [3 of 13] Estimating reward confindence interval for egreedy_explore feedback
11:30:22 INFO: [4 of 13] Estimating reward confindence interval for ts_naive feedback
11:30:23 INFO: [5 of 13] Estimating reward confindence interval for ts_pessimistic feedback
11:30:23 INFO: [6 of 13] Estimating reward confindence interval for seg_egreedy_explore feedback
11:30:24 INFO: [7 of 13] Estimating reward confindence interval for seg_egreedy_exploit feedback
11:30:25 INFO: [8 of 13] Estimating reward confindence interval for seg_ts_naive feedback
11:30:26 INFO: [9 of 13] Estimating reward confindence interval for seg_ts_pessimistic feedback
11:30:26 INFO: [10 of 13] Estimating reward confinden

In [24]:
import pickle
with open("../experiments/ad_hoc/most_deezer.pickle","wb") as f:
    pickle.dump(deezer_experiment.output, f)