In [4]:
import sys
import os
import numpy as np
import argparse
from pathlib import Path
from datetime import datetime
import yaml

root_dir = os.path.expanduser('~/sd_bandits')
sys.path.append(root_dir)

In [7]:
import sd_bandits.utils

In [8]:
from obp.policy import BernoulliTS, EpsilonGreedy, Random, LinEpsilonGreedy,\
                       LinTS, LinUCB, LogisticEpsilonGreedy, LogisticTS, LogisticUCB

from obp.ope.estimators import DirectMethod, DoublyRobust, DoublyRobustWithShrinkage,\
                               InverseProbabilityWeighting, ReplayMethod,\
                               SelfNormalizedDoublyRobust, SelfNormalizedInverseProbabilityWeighting,\
                               SwitchDoublyRobust, SwitchInverseProbabilityWeighting

from obp.dataset import OpenBanditDataset
from sd_bandits.deezer.dataset import DeezerDataset

In [18]:
policy_dict = {'BernoulliTS':BernoulliTS,
               'EpsilonGreedy':EpsilonGreedy,
               'Random':Random,
               'LinEpsilonGreedy':LinEpsilonGreedy,
               'LinTS':LinTS,
               'LinUCB':LinUCB,
               'LogisticEpsilonGreedy':LogisticEpsilonGreedy,
               'LogisticTS':LogisticTS,
               'LogisticUCB':LogisticUCB}

def build_policy_spec(policy_key, parameter_dict, policy_name=None, output='./policy_yamls/'):
    '''
    Constructs a yaml output file specifying the type of 
    policy and the policy paramters
    
    Parameters
    ------------
    policy_key: str
        The policy name
    parameter_dict: dict
        The dict containing {kwarg: value}
    policy_name: str
        The name of the policy+configuration, if not
        specified, will be automatically generated
        via timestamp
    output: str
        Path to directory to store
    
    Returns
    ------------
    policy: policy
        The policy with specified parameters
    '''
    #Set name via timestamp if not specified
    if policy_name==None:
        now = datetime.now()
        current_time = now.strftime("%H%M%S")
        policy_name = '{}_{}'.format(policy_key, current_time)
    
    #Build dict structure
    yaml_dict = {}
    yaml_dict['name'] = policy_name
    yaml_dict['policy_type'] = policy_key
    yaml_dict['parameters'] = parameter_dict
    
    #Set output folder
    output_folder = os.path.join(output, policy_name)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    with open(os.path.join(output_folder, 'policy_spec.yaml'), 'w') as file:
        yaml.dump(yaml_dict, file)
        
    policy = policy_dict[policy_key](**parameter_dict)
    
    return policy
    
    

In [22]:
policy_key_test = 'BernoulliTS'
policy_name_test = 'Bernoulli_test'
policy_params_test = {'n_actions':5, 'len_list':3, 'batch_size':5}

policy_test = build_policy_spec(policy_key_test, policy_params_test,\
                  policy_name=policy_name_test)

print(policy_test)

BernoulliTS(n_actions=5, len_list=3, batch_size=5, random_state=None, alpha=array([1., 1., 1., 1., 1.]), beta=array([1., 1., 1., 1., 1.]), is_zozotown_prior=False, campaign=None, policy_name='bts')


In [25]:
def load_policy_from_spec(policy_folder):
    '''
    Constructs a yaml output file specifying the type of 
    policy and the policy paramters
    
    Parameters
    ------------
    policy_folder: str
        The folder containing the policy spec
    Returns
    ------------
    policy: policy
        The policy loaded from the spec yaml
    '''
    with open(os.path.join(policy_folder, 'policy_spec.yaml'), 'r') as file:
        yaml_dict = yaml.load(file)
    
    policy_key = yaml_dict['policy_type']
    parameter_dict = yaml_dict['parameters']
    
    policy = policy_dict[policy_key](**parameter_dict)
   
    return policy
    

In [29]:
policy_load_test = './policy_yamls/Bernoulli_test'

policy_load = load_policy_from_spec(policy_load_test)
print(policy_load)

BernoulliTS(n_actions=5, len_list=3, batch_size=5, random_state=None, alpha=array([1., 1., 1., 1., 1.]), beta=array([1., 1., 1., 1., 1.]), is_zozotown_prior=False, campaign=None, policy_name='bts')


  yaml_dict = yaml.load(file)


In [11]:
estimator_dict = {'DirectMethod':DirectMethod,
                  'DoublyRobust':DoublyRobust,
                  'DoublyRobustWithShrinkage':DoublyRobustWithShrinkage,
                  'InverseProbabilityWeighting':InverseProbabilityWeighting,
                  'ReplayMethod':ReplayMethod,
                  'SelfNormalizedDoublyRobust':SelfNormalizedDoublyRobust,
                  'SelfNormalizedInverseProbabilityWeighting':SelfNormalizedInverseProbabilityWeighting,
                  'SwitchDoublyRobust':SwitchDoublyRobust,
                  'SwitchInverseProbabilityWeighting':SwitchInverseProbabilityWeighting}

policy_dict = {'BernoulliTS':BernoulliTS,
               'EpsilonGreedy':EpsilonGreedy,
               'Random':Random,
               'LinEpsilonGreedy':LinEpsilonGreedy,
               'LinTS':LinTS,
               'LinUCB':LinUCB,
               'LogisticEpsilonGreedy':LogisticEpsilonGreedy,
               'LogisticTS':LogisticTS,
               'LogisticUCB':LogisticUCB}

dataset_dict = {'obp': OpenBanditDataset,
                'deezer': DeezerDataset}


def build_obj_spec(obj_key, parameter_dict, experiment_name=None, obj_type='policy', output='./policy_yamls/'):
    '''
    Constructs a yaml output file specifying the type of 
    policy and the policy paramters
    
    Parameters
    ------------
    obj_key: str
        The policy/estimator/dataset name
    parameter_dict: dict
        The dict containing parameters for the
        obp object {kwarg: value}
    experiment_name: str
        The associated experiment name, if not
        specified, will be automatically generated
        via timestamp
    obj_type: str
        The type of OBP object, should be 'policy'
        or 'estimator', throw error otherwise
    output: str
        Path to directory to store
    
    Returns
    ------------
    obj_dict: dict
        The constructor dict for the object
    '''
    #Set name via timestamp if not specified
    if obj_type not in ['policy','estimator', 'dataset']:
        print('Invalid type: {}'.format(obj_type))
        return None
    
    if experiment_name==None:
        now = datetime.now()
        current_time = now.strftime("%H%M%S")
        experiment_name = 'experiment_{}'.format(current_time)
    
    #Build dict structure
    obj_dict = {}
    obj_dict['name'] = experiment_name
    obj_dict['type'] = obj_type
    obj_dict['key'] = obj_key
    obj_dict['parameters'] = parameter_dict
    
    #Set output folder
    output_folder = os.path.join(output, experiment_name)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    with open(os.path.join(output_folder, '{}_spec.yaml'.format(obj_type)), 'w') as file:
        yaml.dump(obj_dict, file)
        
    return obj_dict

In [31]:
def load_obj_from_spec(obj_dict_path):
    '''
    Loads policy/estimator from spec dict
    
    Parameters
    ------------
    obj_dict_path: str
        Path to configuration dict from build_obj_spec()
    Returns
    ------------
    obj: obp.policy/obp.estimator/dataset
        The policy/estimator loaded from the spec dict
    '''
    with open(obj_dict_path, 'r') as file:
        obj_dict = yaml.load(file, Loader=yaml.FullLoader)
        
    obj_name = obj_dict['name']
    obj_type = obj_dict['type']
    obj_key = obj_dict['key']
    parameter_dict = obj_dict['parameters']
    
    if obj_type=='policy':
        policy = policy_dict[obj_key](**parameter_dict)
        return policy
    elif obj_type=='estimator':
        estimator = estimator_dict[obj_key](**parameter_dict)
        return estimator
    elif obj_type=='dataset':
        parameter_dict['data_path'] = Path(parameter_dict['data_path'])
        dataset = dataset_dict[obj_key](**parameter_dict)
        return dataset
   

In [32]:
policy_key_test = 'BernoulliTS'
policy_name_test = 'save_obj_test'
policy_params_test = {'n_actions':5, 'len_list':3, 'batch_size':5}

policy_test = build_obj_spec(policy_key_test, policy_params_test, experiment_name=policy_name_test)

print(policy_test)
print(load_obj_from_spec('policy_yamls/save_obj_test/policy_spec.yaml'))

{'name': 'save_obj_test', 'type': 'policy', 'key': 'BernoulliTS', 'parameters': {'n_actions': 5, 'len_list': 3, 'batch_size': 5}}
BernoulliTS(n_actions=5, len_list=3, batch_size=5, random_state=None, alpha=array([1., 1., 1., 1., 1.]), beta=array([1., 1., 1., 1., 1.]), is_zozotown_prior=False, campaign=None, policy_name='bts')


In [33]:
est_key_test = 'DoublyRobustWithShrinkage'
est_name_test = 'save_obj_test'
est_params_test = {'lambda_':1}

est_test = build_obj_spec(est_key_test, est_params_test, experiment_name=est_name_test, obj_type='estimator')

print(est_test)
print(load_obj_from_spec('policy_yamls/save_obj_test/estimator_spec.yaml'))

{'name': 'save_obj_test', 'type': 'estimator', 'key': 'DoublyRobustWithShrinkage', 'parameters': {'lambda_': 1}}
DoublyRobustWithShrinkage(estimator_name='dr-os', lambda_=1)


In [34]:
dataset_key_test = 'obp'
dataset_name_test = 'save_obj_test'
dataset_params_test = {'data_path': '~/sd_bandits/data/obd',
                       'campaign': 'all',
                       'behavior_policy': 'bts'}

dataset_test = build_obj_spec(dataset_key_test, dataset_params_test, experiment_name=dataset_name_test, obj_type='dataset')

print(dataset_test)
print(load_obj_from_spec('policy_yamls/save_obj_test/dataset_spec.yaml'))

{'name': 'save_obj_test', 'type': 'dataset', 'key': 'obp', 'parameters': {'data_path': '~/sd_bandits/data/obd', 'campaign': 'all', 'behavior_policy': 'bts'}}
OpenBanditDataset(behavior_policy='bts', campaign='all', data_path=PosixPath('~/sd_bandits/data/obd/bts/all'), dataset_name='obd')


In [35]:
def build_experiment(experiment_name, policy, estimator, dataset, policy_params, estimator_params, dataset_params):
    '''
    Builds full experiment spec folder w/ policy, estimator, and dataset, as well as an
    
    Parameters
    ------------
    obj_dict: dict
        Obj configuration dict from build_obj_spec()
    Returns
    ------------
    obj: obp.policy/obp.estimator
        The policy/estimator loaded from the spec dict
    ''' 