diff --git a/README.md b/README.md index decc491..4115ee4 100644 --- a/README.md +++ b/README.md @@ -10,16 +10,33 @@ be easily configured that vary assumptions about: user preferences and item familiarity; user latent state and its dynamics; and choice models and other user response behavior. We outline how RecSim offers value to RL and RS researchers and practitioners, and how it can serve as a vehicle for -academic-industrial collaboration. +academic-industrial collaboration. For a detailed description of the RecSim +architecture please read [Ie et al](https://arxiv.org/abs/1909.04847). Please +cite the paper if you use the code from this repository in your work. + +### Bibtex + +``` +@article{ie2019recsim, + title={RecSim: A Configurable Simulation Platform for Recommender Systems}, + author={Eugene Ie and Chih-wei Hsu and Martin Mladenov and Vihan Jain and Sanmit Narvekar and Jing Wang and Rui Wu and Craig Boutilier}, + year={2019}, + eprint={1909.04847}, + archivePrefix={arXiv}, + primaryClass={cs.LG} +} +``` + ## Disclaimer This is not an officially supported Google product. ## What's new -* **12/13/2019:** Added (abstract) classes for both multi-user environments and - agents. Added bandit algorithms for generalized linear models. + +* **12/13/2019:** Added (abstract) classes for both multi-user environments + and agents. Added bandit algorithms for generalized linear models. ## Installation and Sample Usage @@ -64,10 +81,12 @@ You could also find the simulated logs in /tmp/recsim/episode_logs.tfrecord ## Tutorials -To get started, please check out our Colab tutorials. In [**RecSim: -Overview**](recsim/colab/RecSim_Overview.ipynb), we give a brief overview about -RecSim. We then talk about each configurable component: -[**environment**](recsim/colab/RecSim_Developing_an_Environment.ipynb) and +To get started, please check out our Colab tutorials. In +[**RecSim: Overview**](recsim/colab/RecSim_Overview.ipynb), +we give a brief overview about RecSim. We then talk about each configurable +component: +[**environment**](recsim/colab/RecSim_Developing_an_Environment.ipynb) +and [**recommender agent**](recsim/colab/RecSim_Developing_an_Agent.ipynb). ## Documentation diff --git a/docs/api_docs/python/_redirects.yaml b/docs/api_docs/python/_redirects.yaml index ebf9185..2bf1a1e 100644 --- a/docs/api_docs/python/_redirects.yaml +++ b/docs/api_docs/python/_redirects.yaml @@ -1,3 +1,9 @@ redirects: +- from: /recsim/api_docs/python/recsim/environments/interest_exploration/FLAGS + to: /recsim/api_docs/python/recsim/environments/interest_evolution/FLAGS +- from: /recsim/api_docs/python/recsim/environments/long_term_satisfaction/FLAGS + to: /recsim/api_docs/python/recsim/environments/interest_evolution/FLAGS - from: /recsim/api_docs/python/recsim/simulator/environment/SingleUserEnvironment to: /recsim/api_docs/python/recsim/simulator/environment/Environment +- from: /recsim/api_docs/python/recsim/simulator/runner_lib/FLAGS + to: /recsim/api_docs/python/recsim/environments/interest_evolution/FLAGS diff --git a/docs/api_docs/python/_toc.yaml b/docs/api_docs/python/_toc.yaml index 15d1b10..8ef5af0 100644 --- a/docs/api_docs/python/_toc.yaml +++ b/docs/api_docs/python/_toc.yaml @@ -201,6 +201,8 @@ toc: path: /recsim/api_docs/python/recsim/environments/interest_evolution/clicked_watchtime_reward - title: create_environment path: /recsim/api_docs/python/recsim/environments/interest_evolution/create_environment + - title: FLAGS + path: /recsim/api_docs/python/recsim/environments/interest_evolution/FLAGS - title: IEvResponse path: /recsim/api_docs/python/recsim/environments/interest_evolution/IEvResponse - title: IEvUserDistributionSampler diff --git a/docs/api_docs/python/index.md b/docs/api_docs/python/index.md index 58f1905..d0ecd1b 100644 --- a/docs/api_docs/python/index.md +++ b/docs/api_docs/python/index.md @@ -77,6 +77,7 @@ * recsim.document.CandidateSet * recsim.environments * recsim.environments.interest_evolution +* recsim.environments.interest_evolution.FLAGS * recsim.environments.interest_evolution.IEvResponse * recsim.environments.interest_evolution.IEvUserDistributionSampler * recsim.environments.interest_evolution.IEvUserModel @@ -89,6 +90,7 @@ * recsim.environments.interest_evolution.create_environment * recsim.environments.interest_evolution.total_clicks_reward * recsim.environments.interest_exploration +* recsim.environments.interest_exploration.FLAGS * recsim.environments.interest_exploration.IEClusterUserSampler * recsim.environments.interest_exploration.IEDocument * recsim.environments.interest_exploration.IEResponse @@ -98,6 +100,7 @@ * recsim.environments.interest_exploration.create_environment * recsim.environments.interest_exploration.total_clicks_reward * recsim.environments.long_term_satisfaction +* recsim.environments.long_term_satisfaction.FLAGS * recsim.environments.long_term_satisfaction.LTSDocument * recsim.environments.long_term_satisfaction.LTSDocumentSampler * recsim.environments.long_term_satisfaction.LTSResponse @@ -116,6 +119,7 @@ * recsim.simulator.recsim_gym.RecSimGymEnv * recsim.simulator.runner_lib * recsim.simulator.runner_lib.EvalRunner +* recsim.simulator.runner_lib.FLAGS * recsim.simulator.runner_lib.Runner * recsim.simulator.runner_lib.TrainRunner * recsim.simulator.runner_lib.load_gin_configs diff --git a/docs/api_docs/python/recsim.md b/docs/api_docs/python/recsim.md index 6b09e9f..fa6dc20 100644 --- a/docs/api_docs/python/recsim.md +++ b/docs/api_docs/python/recsim.md @@ -5,7 +5,10 @@ # Module: recsim + + +
View diff --git a/docs/api_docs/python/recsim/agent.md b/docs/api_docs/python/recsim/agent.md index feaddf9..9d41786 100644 --- a/docs/api_docs/python/recsim/agent.md +++ b/docs/api_docs/python/recsim/agent.md @@ -5,7 +5,10 @@ # Module: recsim.agent + + +
View diff --git a/docs/api_docs/python/recsim/agent/AbstractEpisodicRecommenderAgent.md b/docs/api_docs/python/recsim/agent/AbstractEpisodicRecommenderAgent.md index 1505ec4..dd328f0 100644 --- a/docs/api_docs/python/recsim/agent/AbstractEpisodicRecommenderAgent.md +++ b/docs/api_docs/python/recsim/agent/AbstractEpisodicRecommenderAgent.md @@ -1,7 +1,6 @@
- @@ -12,50 +11,67 @@ # recsim.agent.AbstractEpisodicRecommenderAgent - + +
View source -## Class `AbstractEpisodicRecommenderAgent` - - Abstract class for recommender systems that solves episodic tasks. Inherits From: [`AbstractRecommenderAgent`](../../recsim/agent/AbstractRecommenderAgent.md) - - -

__init__

- -View -source - -```python -__init__( - action_space, - summary_writer=None + -Initializes AbstractEpisodicRecommenderAgent. - -#### Args: + -* `action_space`: A gym.spaces object that specifies the format of - actions. -* `summary_writer`: A Tensorflow summary writer to pass to the agent - for in-agent training statistics in Tensorboard. + + + + + + + + + + + + + +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
+`summary_writer` + +A Tensorflow summary writer to pass to the agent +for in-agent training statistics in Tensorboard. +
-## Properties + -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -64,121 +80,251 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation=None) -``` + Returns the agent's first action for this episode. -#### Args: + -* `observation`: numpy array, the environment's initial observation. + + + -#### Returns: + + + + +
Args
+`observation` + +numpy array, the environment's initial observation. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

bundle_and_checkpoint

View source -```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. -#### Args: + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
-* `checkpoint_dir`: A string that represents the path to the checkpoint - and is used when we save TensorFlow objects by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. + -#### Returns: + + + + + + -A dictionary containing additional Python objects to be checkpointed by the -experiment. Each key is a string for the object name and the value is actual -object. If the checkpoint directory does not exist, returns empty dictionary. +
Returns
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -```python -end_episode( - reward, - observation=None + Signals the end of the episode to the agent. -#### Args: - -* `reward`: An float that is the last reward from the environment. -* `observation`: numpy array that represents the last observation of - the episode. + + + + + + + + + + + + + +
Args
+`reward` + +An float that is the last reward from the environment. +
+`observation` + +numpy array that represents the last observation of the +episode. +

step

View source -```python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations. - -#### Returns: + + + + + + + + + + + + + +
Args
+`reward` + +The reward received from the agent's most recent action as a +float. +
+`observation` + +A dictionary that includes the most recent observations. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

unbundle

View source -```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - and is used when we save TensorFlow objects by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agent/AbstractHierarchicalAgentLayer.md b/docs/api_docs/python/recsim/agent/AbstractHierarchicalAgentLayer.md index 4f799e2..79de22a 100644 --- a/docs/api_docs/python/recsim/agent/AbstractHierarchicalAgentLayer.md +++ b/docs/api_docs/python/recsim/agent/AbstractHierarchicalAgentLayer.md @@ -1,7 +1,6 @@
- @@ -12,48 +11,59 @@ # recsim.agent.AbstractHierarchicalAgentLayer - + +
View source -## Class `AbstractHierarchicalAgentLayer` - - Parent class for stackable agent layers. Inherits From: [`AbstractRecommenderAgent`](../../recsim/agent/AbstractRecommenderAgent.md) - - -

__init__

- -View -source - -```python -__init__( - action_space, - *base_agent_ctors + + + -Initializes AbstractRecommenderAgent. + -#### Args: + + + -* `action_space`: A gym.spaces object that specifies the format of - actions. + + + + +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
-## Properties + -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -62,102 +72,189 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation=None) -``` +

bundle_and_checkpoint

View source -```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. -#### Args: + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string for the directory where objects will be saved. +
+`iteration_number` + +An integer of iteration number to use for naming the +checkpoint file. +
-* `checkpoint_dir`: A string for the directory where objects will be - saved. -* `iteration_number`: An integer of iteration number to use for naming - the checkpoint file. + -#### Returns: + + + + + + -A dictionary containing additional Python objects to be checkpointed by the -experiment. Each key is a string for the object name and the value is actual -object. If the checkpoint directory does not exist, returns empty dictionary. +
Returns
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -```python -end_episode( - reward, - observation +

step

View source -```python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations. - -#### Returns: + + + + + + + + + + + + + +
Args
+`reward` + +The reward received from the agent's most recent action as a +float. +
+`observation` + +A dictionary that includes the most recent observations. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

unbundle

View source -```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - saved by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint saved +by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agent/AbstractMultiUserEpisodicRecommenderAgent.md b/docs/api_docs/python/recsim/agent/AbstractMultiUserEpisodicRecommenderAgent.md index efa4b0f..b8a7493 100644 --- a/docs/api_docs/python/recsim/agent/AbstractMultiUserEpisodicRecommenderAgent.md +++ b/docs/api_docs/python/recsim/agent/AbstractMultiUserEpisodicRecommenderAgent.md @@ -1,7 +1,6 @@
- @@ -12,46 +11,59 @@ # recsim.agent.AbstractMultiUserEpisodicRecommenderAgent - + +
View source -## Class `AbstractMultiUserEpisodicRecommenderAgent` - - - Abstract class to model a recommender agent handling multiple users. Inherits From: [`AbstractEpisodicRecommenderAgent`](../../recsim/agent/AbstractEpisodicRecommenderAgent.md) - - -

__init__

- -View -source + -```python -__init__(action_space) -``` + -Initializes AbstractMultiUserEpisodicRecommenderAgent. + -#### Args: + + + -* `action_space`: A gym.spaces object that specifies the format of - actions. + + + + +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
-## Properties + -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -60,121 +72,251 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation=None) -``` + Returns the agent's first action for this episode. -#### Args: + -* `observation`: numpy array, the environment's initial observation. + + + -#### Returns: + + + + +
Args
+`observation` + +numpy array, the environment's initial observation. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

bundle_and_checkpoint

View source -```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. -#### Args: + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
-* `checkpoint_dir`: A string that represents the path to the checkpoint - and is used when we save TensorFlow objects by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. + -#### Returns: + + + + + + -A dictionary containing additional Python objects to be checkpointed by the -experiment. Each key is a string for the object name and the value is actual -object. If the checkpoint directory does not exist, returns empty dictionary. +
Returns
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -```python -end_episode( - reward, - observation=None + Signals the end of the episode to the agent. -#### Args: - -* `reward`: An float that is the last reward from the environment. -* `observation`: numpy array that represents the last observation of - the episode. + + + + + + + + + + + + + +
Args
+`reward` + +An float that is the last reward from the environment. +
+`observation` + +numpy array that represents the last observation of the +episode. +

step

View source -```python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations. - -#### Returns: + + + + + + + + + + + + + +
Args
+`reward` + +The reward received from the agent's most recent action as a +float. +
+`observation` + +A dictionary that includes the most recent observations. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

unbundle

View source -```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - and is used when we save TensorFlow objects by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agent/AbstractRecommenderAgent.md b/docs/api_docs/python/recsim/agent/AbstractRecommenderAgent.md index 6ad6178..e8b10ef 100644 --- a/docs/api_docs/python/recsim/agent/AbstractRecommenderAgent.md +++ b/docs/api_docs/python/recsim/agent/AbstractRecommenderAgent.md @@ -1,7 +1,6 @@
- @@ -10,42 +9,56 @@ # recsim.agent.AbstractRecommenderAgent - + +
View source -## Class `AbstractRecommenderAgent` - - Abstract class to model a recommender system agent. - - -

__init__

- -View -source + -```python -__init__(action_space) -``` + -Initializes AbstractRecommenderAgent. + -#### Args: + + + -* `action_space`: A gym.spaces object that specifies the format of - actions. + + + + +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
-## Properties + -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -54,12 +67,12 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. @@ -67,60 +80,115 @@ This is used for checkpointing. It will return a dictionary containing all non-TensorFlow objects (to be saved into a file by the caller), and it saves all TensorFlow objects into a checkpoint file. -#### Args: + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string for the directory where objects will be saved. +
+`iteration_number` + +An integer of iteration number to use for naming the +checkpoint file. +
-* `checkpoint_dir`: A string for the directory where objects will be - saved. -* `iteration_number`: An integer of iteration number to use for naming - the checkpoint file. + -#### Returns: + + + + + + -A dictionary containing additional Python objects to be checkpointed by the -experiment. Each key is a string for the object name and the value is actual -object. If the checkpoint directory does not exist, returns empty dictionary. +
Returns
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

step

View source -```python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations. - -#### Returns: + + + + + + + + + + + + + +
Args
+`reward` + +The reward received from the agent's most recent action as a +float. +
+`observation` + +A dictionary that includes the most recent observations. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

unbundle

View source -```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. @@ -128,15 +196,48 @@ Restores the agent's Python objects to those specified in bundle_dict, and restores the TensorFlow objects to those specified in the checkpoint_dir. If the checkpoint_dir does not exist, will not reset the agent's state. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - saved by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint saved +by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents.md b/docs/api_docs/python/recsim/agents.md index d6528eb..a23cf68 100644 --- a/docs/api_docs/python/recsim/agents.md +++ b/docs/api_docs/python/recsim/agents.md @@ -5,7 +5,10 @@ # Module: recsim.agents + + +
View diff --git a/docs/api_docs/python/recsim/agents/agent_utils.md b/docs/api_docs/python/recsim/agents/agent_utils.md index 7e607ae..548ed20 100644 --- a/docs/api_docs/python/recsim/agents/agent_utils.md +++ b/docs/api_docs/python/recsim/agents/agent_utils.md @@ -5,7 +5,10 @@ # Module: recsim.agents.agent_utils + + +
View diff --git a/docs/api_docs/python/recsim/agents/agent_utils/GymSpaceWalker.md b/docs/api_docs/python/recsim/agents/agent_utils/GymSpaceWalker.md index 8cc9616..fe0901e 100644 --- a/docs/api_docs/python/recsim/agents/agent_utils/GymSpaceWalker.md +++ b/docs/api_docs/python/recsim/agents/agent_utils/GymSpaceWalker.md @@ -7,19 +7,23 @@ # recsim.agents.agent_utils.GymSpaceWalker - + +
View source -## Class `GymSpaceWalker` - - Class for recursively applying a given function to a gym space. + + Gym spaces have nested structure in terms of container spaces (e.g. Dict and @@ -30,26 +34,18 @@ the proces. E.g., given a gym space of the form Tuple((Box(1), Box(1)) and a leaf operator f, this class can is used to transform an observation (a, b) to [f(a), f(b)]. -#### Args: + + + + + + +
gym_space: An instance of an OpenAI Gym space. leaf_op: A function taking as arguments an OpenAI Gym space and an observation conforming to that space. There are no requirements on its output. -

__init__

- -View -source - -```python -__init__( - gym_space, - leaf_op -) -``` - -Initialize self. See help(type(self)) for accurate signature. - ## Methods

apply_and_flatten

@@ -57,6 +53,8 @@ Initialize self. See help(type(self)) for accurate signature. View source -```python -apply_and_flatten(gym_observations) -``` + diff --git a/docs/api_docs/python/recsim/agents/agent_utils/epsilon_greedy_exploration.md b/docs/api_docs/python/recsim/agents/agent_utils/epsilon_greedy_exploration.md index a7ec774..9395afe 100644 --- a/docs/api_docs/python/recsim/agents/agent_utils/epsilon_greedy_exploration.md +++ b/docs/api_docs/python/recsim/agents/agent_utils/epsilon_greedy_exploration.md @@ -5,30 +5,27 @@ # recsim.agents.agent_utils.epsilon_greedy_exploration - + +
View source - Epsilon greedy exploration. -```python -recsim.agents.agent_utils.epsilon_greedy_exploration( - state_action_iterator, - q_function, - epsilon + Either picks a slate uniformly at random with probability epsilon, or returns a -slate with maximal Q-value. TODO(mmladenov): more verbose doc. Args: -state_action_iterator: an iterator over slate, state_action_index tuples. -q_function: a container holding Q-values of state-action pairs. epsilon: -probability of random action. Returns: slate: the picked slate. sa_index: the -index of the picked slate in the Q-value table. +slate with maximal Q-value. Args: state_action_iterator: an iterator over slate, +state_action_index tuples. q_function: a container holding Q-values of +state-action pairs. epsilon: probability of random action. Returns: slate: the +picked slate. sa_index: the index of the picked slate in the Q-value table. diff --git a/docs/api_docs/python/recsim/agents/agent_utils/min_count_exploration.md b/docs/api_docs/python/recsim/agents/agent_utils/min_count_exploration.md index 6967b66..2ab2572 100644 --- a/docs/api_docs/python/recsim/agents/agent_utils/min_count_exploration.md +++ b/docs/api_docs/python/recsim/agents/agent_utils/min_count_exploration.md @@ -5,23 +5,22 @@ # recsim.agents.agent_utils.min_count_exploration - + +
View source - Minimum count exploration. -```python -recsim.agents.agent_utils.min_count_exploration( - state_action_iterator, - counts_function + diff --git a/docs/api_docs/python/recsim/agents/bandits.md b/docs/api_docs/python/recsim/agents/bandits.md index 980293a..437f6fc 100644 --- a/docs/api_docs/python/recsim/agents/bandits.md +++ b/docs/api_docs/python/recsim/agents/bandits.md @@ -5,7 +5,10 @@ # Module: recsim.agents.bandits + + +
View diff --git a/docs/api_docs/python/recsim/agents/bandits/algorithms.md b/docs/api_docs/python/recsim/agents/bandits/algorithms.md index 8eb5f15..ad11539 100644 --- a/docs/api_docs/python/recsim/agents/bandits/algorithms.md +++ b/docs/api_docs/python/recsim/agents/bandits/algorithms.md @@ -5,7 +5,10 @@ # Module: recsim.agents.bandits.algorithms + + +
View diff --git a/docs/api_docs/python/recsim/agents/bandits/algorithms/KLUCB.md b/docs/api_docs/python/recsim/agents/bandits/algorithms/KLUCB.md index 4008fe1..6642a9b 100644 --- a/docs/api_docs/python/recsim/agents/bandits/algorithms/KLUCB.md +++ b/docs/api_docs/python/recsim/agents/bandits/algorithms/KLUCB.md @@ -11,48 +11,61 @@ # recsim.agents.bandits.algorithms.KLUCB - + +
View source -## Class `KLUCB` - - Kullback-Leibler Upper Confidence Bounds (KL-UCB) algorithm. Inherits From: [`MABAlgorithm`](../../../../recsim/agents/bandits/algorithms/MABAlgorithm.md) + + See "The KL-UCB algorithm for bounded stochastic bandits and beyond" by Garivier and Cappe. -

__init__

- -View -source - -```python -__init__( - num_arms, - params, - seed=0 -) -``` - -Initializes MABAlgorithm. - -#### Args: - -* `num_arms`: Number of arms. Must be greater than one. -* `params`: A dictionary which includes additional parameters like - optimism_scaling. Default is an empty dictionary. -* `seed`: Random seed for this object. Default is zero. + + + + + + + + + + + + + + + + +
+`num_arms` + +Number of arms. Must be greater than one. +
+`params` + +A dictionary which includes additional parameters like +optimism_scaling. Default is an empty dictionary. +
+`seed` + +Random seed for this object. Default is zero. +
## Methods @@ -61,18 +74,22 @@ Initializes MABAlgorithm. View source -```python -get_arm(t) -``` +

get_score

View source -```python -get_score(t) -``` + Computes upper confidence bounds of reward / pulls at round t. @@ -81,31 +98,29 @@ Computes upper confidence bounds of reward / pulls at round t. View source -```python -@staticmethod -print() -``` +

set_state

View source -```python -set_state( - pulls, - reward +

update

View source -```python -update( - arm, - reward + diff --git a/docs/api_docs/python/recsim/agents/bandits/algorithms/MABAlgorithm.md b/docs/api_docs/python/recsim/agents/bandits/algorithms/MABAlgorithm.md index 4c47765..54cef67 100644 --- a/docs/api_docs/python/recsim/agents/bandits/algorithms/MABAlgorithm.md +++ b/docs/api_docs/python/recsim/agents/bandits/algorithms/MABAlgorithm.md @@ -8,55 +8,97 @@ # recsim.agents.bandits.algorithms.MABAlgorithm - + +
View source -## Class `MABAlgorithm` - - Base class for Multi-armed bandit (MAB) algorithms. + + We implement multi-armed bandit algorithms with confidence width tuning proposed in Hsu et al. https://arxiv.org/abs/1904.02664. -#### Attributes: - -* `pulls`: A numpy array which counts number of pulls of each arm -* `reward`: A numpy array which sums up reward of each arm -* `optimism_scaling`: A float specifying the confidence level. Default - value (1.0) corresponds to the exploration strategy presented in the - literature. A smaller number means less exploration and more exploitation. -* `_rng`: An instance of random.RandomState for random number - generation - -

__init__

- -View -source - -```python -__init__( - num_arms, - params, - seed=0 -) -``` - -Initializes MABAlgorithm. - -#### Args: + + + + + + + + + + + + + + + + +
+`num_arms` + +Number of arms. Must be greater than one. +
+`params` + +A dictionary which includes additional parameters like +optimism_scaling. Default is an empty dictionary. +
+`seed` + +Random seed for this object. Default is zero. +
-* `num_arms`: Number of arms. Must be greater than one. -* `params`: A dictionary which includes additional parameters like - optimism_scaling. Default is an empty dictionary. -* `seed`: Random seed for this object. Default is zero. + + + + + + + + + + + + + + + + + + + +
+`pulls` + +A numpy array which counts number of pulls of each arm +
+`reward` + +A numpy array which sums up reward of each arm +
+`optimism_scaling` + +A float specifying the confidence level. Default value +(1.0) corresponds to the exploration strategy presented in the literature. +A smaller number means less exploration and more exploitation. +
+`_rng` + +An instance of random.RandomState for random number generation +
## Methods @@ -65,21 +107,19 @@ Initializes MABAlgorithm. View source -```python -set_state( - pulls, - reward +

update

View source -```python -update( - arm, - reward + diff --git a/docs/api_docs/python/recsim/agents/bandits/algorithms/ThompsonSampling.md b/docs/api_docs/python/recsim/agents/bandits/algorithms/ThompsonSampling.md index 60c2eb7..9412f90 100644 --- a/docs/api_docs/python/recsim/agents/bandits/algorithms/ThompsonSampling.md +++ b/docs/api_docs/python/recsim/agents/bandits/algorithms/ThompsonSampling.md @@ -11,47 +11,60 @@ # recsim.agents.bandits.algorithms.ThompsonSampling - + +
View source -## Class `ThompsonSampling` - - Thompson Sampling algorithm for the Bernoulli bandit. Inherits From: [`MABAlgorithm`](../../../../recsim/agents/bandits/algorithms/MABAlgorithm.md) - - -See "Further Optimal Regret Bounds for Thompson Sampling" by Agrawal and Goyal. - -

__init__

- -View -source - -```python -__init__( - num_arms, - params, - seed=0 + -Initializes MABAlgorithm. + -#### Args: +See "Further Optimal Regret Bounds for Thompson Sampling" by Agrawal and Goyal. -* `num_arms`: Number of arms. Must be greater than one. -* `params`: A dictionary which includes additional parameters like - optimism_scaling. Default is an empty dictionary. -* `seed`: Random seed for this object. Default is zero. + + + + + + + + + + + + + + + + +
+`num_arms` + +Number of arms. Must be greater than one. +
+`params` + +A dictionary which includes additional parameters like +optimism_scaling. Default is an empty dictionary. +
+`seed` + +Random seed for this object. Default is zero. +
## Methods @@ -60,18 +73,22 @@ Initializes MABAlgorithm. View source -```python -get_arm(t) -``` +

get_score

View source -```python -get_score(t) -``` + Samples scores from the posterior distribution. @@ -80,31 +97,29 @@ Samples scores from the posterior distribution. View source -```python -@staticmethod -print() -``` +

set_state

View source -```python -set_state( - pulls, - reward +

update

View source -```python -update( - arm, - reward + diff --git a/docs/api_docs/python/recsim/agents/bandits/algorithms/UCB1.md b/docs/api_docs/python/recsim/agents/bandits/algorithms/UCB1.md index f3f8e63..663ec53 100644 --- a/docs/api_docs/python/recsim/agents/bandits/algorithms/UCB1.md +++ b/docs/api_docs/python/recsim/agents/bandits/algorithms/UCB1.md @@ -11,48 +11,61 @@ # recsim.agents.bandits.algorithms.UCB1 - + +
View source -## Class `UCB1` - - UCB1 algorithm. Inherits From: [`MABAlgorithm`](../../../../recsim/agents/bandits/algorithms/MABAlgorithm.md) + + See "Finite-time Analysis of the Multiarmed Bandit Problem" by Auer, Cesa-Bianchi, and Fischer. -

__init__

- -View -source - -```python -__init__( - num_arms, - params, - seed=0 -) -``` - -Initializes MABAlgorithm. - -#### Args: - -* `num_arms`: Number of arms. Must be greater than one. -* `params`: A dictionary which includes additional parameters like - optimism_scaling. Default is an empty dictionary. -* `seed`: Random seed for this object. Default is zero. + + + + + + + + + + + + + + + + +
+`num_arms` + +Number of arms. Must be greater than one. +
+`params` + +A dictionary which includes additional parameters like +optimism_scaling. Default is an empty dictionary. +
+`seed` + +Random seed for this object. Default is zero. +
## Methods @@ -61,18 +74,22 @@ Initializes MABAlgorithm. View source -```python -get_arm(t) -``` +

get_score

View source -```python -get_score(t) -``` + Computes upper confidence bounds of reward / pulls at round t. @@ -81,31 +98,29 @@ Computes upper confidence bounds of reward / pulls at round t. View source -```python -@staticmethod -print() -``` +

set_state

View source -```python -set_state( - pulls, - reward +

update

View source -```python -update( - arm, - reward + diff --git a/docs/api_docs/python/recsim/agents/cluster_bandit_agent.md b/docs/api_docs/python/recsim/agents/cluster_bandit_agent.md index 99bdb53..023ca7b 100644 --- a/docs/api_docs/python/recsim/agents/cluster_bandit_agent.md +++ b/docs/api_docs/python/recsim/agents/cluster_bandit_agent.md @@ -5,7 +5,10 @@ # Module: recsim.agents.cluster_bandit_agent + + +
View diff --git a/docs/api_docs/python/recsim/agents/cluster_bandit_agent/ClusterBanditAgent.md b/docs/api_docs/python/recsim/agents/cluster_bandit_agent/ClusterBanditAgent.md index 0cc7567..9b4562f 100644 --- a/docs/api_docs/python/recsim/agents/cluster_bandit_agent/ClusterBanditAgent.md +++ b/docs/api_docs/python/recsim/agents/cluster_bandit_agent/ClusterBanditAgent.md @@ -1,7 +1,6 @@
- @@ -12,22 +11,27 @@ # recsim.agents.cluster_bandit_agent.ClusterBanditAgent - + +
View source -## Class `ClusterBanditAgent` - - An agent that recommends items with the highest UCBs of topic affinities. Inherits From: [`AbstractClickBanditLayer`](../../../recsim/agents/layers/abstract_click_bandit/AbstractClickBanditLayer.md) + + This agent assumes no knowledge of user's affinity for each topic but receives @@ -35,35 +39,73 @@ observations of user's past responses for each topic. When creating a slate, it utilizes a bandit algorithm to pick the best topics. Within the same best topic, we pick documents with the best document quality scores. -

__init__

- -```python -__init__( - *args, - **kwargs -) -``` - -Initializes a new bandit agent for clustered arm exploration. - -#### Args: - -* `observation_space`: Instance of a gym space corresponding to the - observation format. -* `action_space`: A gym.spaces object that specifies the format of - actions. -* `alg_ctor`: A class of an MABAlgorithm for exploration, default to - UCB1. -* `ci_scaling`: A floating number specifying the scaling of confidence - bound. -* `random_seed`: An integer for random seed. -* `**kwargs`: currently unused arguments. + + + + + + + + + + + + + + + + + + + + + + + + + +
+`observation_space` + +Instance of a gym space corresponding to the +observation format. +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
+`alg_ctor` + +A class of an MABAlgorithm for exploration, default to UCB1. +
+`ci_scaling` + +A floating number specifying the scaling of confidence bound. +
+`random_seed` + +An integer for random seed. +
+`**kwargs` + +currently unused arguments. +
-## Properties + -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -72,104 +114,191 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation=None) -``` +

bundle_and_checkpoint

View source -```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. -#### Args: + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string for the directory where objects will be saved. +
+`iteration_number` + +An integer of iteration number to use for naming the +checkpoint file. +
-* `checkpoint_dir`: A string for the directory where objects will be - saved. -* `iteration_number`: An integer of iteration number to use for naming - the checkpoint file. + -#### Returns: + + + + + + -A dictionary containing additional Python objects to be checkpointed by the -experiment. Each key is a string for the object name and the value is actual -object. If the checkpoint directory does not exist, returns empty dictionary. +
Returns
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -```python -end_episode( - reward, - observation +

step

View source -```python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: Unused. -* `observation`: A dictionary that includes the most recent - observations and should have the following fields: - - user: A dictionary representing user's observed state. Assumes - observation['user']['sufficient_statics'] is a dictionary containing - base agent impression counts and base agent click counts. - -#### Returns: + + + + + + + + + + + + + +
Args
+`reward` + +Unused. +
+`observation` + +A dictionary that includes the most recent observations and +should have the following fields: +- user: A dictionary representing user's observed state. Assumes +observation['user']['sufficient_statics'] is a dictionary containing +base agent impression counts and base agent click counts. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

unbundle

View source -```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - saved by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint saved +by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents/cluster_bandit_agent/GreedyClusterAgent.md b/docs/api_docs/python/recsim/agents/cluster_bandit_agent/GreedyClusterAgent.md index 25709c0..24a99e8 100644 --- a/docs/api_docs/python/recsim/agents/cluster_bandit_agent/GreedyClusterAgent.md +++ b/docs/api_docs/python/recsim/agents/cluster_bandit_agent/GreedyClusterAgent.md @@ -1,7 +1,6 @@
- @@ -12,53 +11,66 @@ # recsim.agents.cluster_bandit_agent.GreedyClusterAgent - + +
View source -## Class `GreedyClusterAgent` - - Simple agent sorting all documents of a topic according to quality. Inherits From: [`AbstractEpisodicRecommenderAgent`](../../../recsim/agent/AbstractEpisodicRecommenderAgent.md) - - - -

__init__

- -View source - -``` python -__init__( - observation_space, - action_space, - cluster_id, - **kwargs + -#### Args: - - -* `action_space`: A gym.spaces object that specifies the format of actions. -* `summary_writer`: A Tensorflow summary writer to pass to the agent - for in-agent training statistics in Tensorboard. + + + + + + + + + + + + + + +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
+`summary_writer` + +A Tensorflow summary writer to pass to the agent +for in-agent training statistics in Tensorboard. +
-## Properties + -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -66,137 +78,246 @@ Returns boolean indicating whether this agent serves multiple users. View source -``` python -begin_episode(observation=None) -``` + Returns the agent's first action for this episode. + -#### Args: - - -* `observation`: numpy array, the environment's initial observation. - - -#### Returns: + + + + + + + +
Args
+`observation` + +numpy array, the environment's initial observation. +
-* `slate`: An integer array of size _slate_size, where each element is an - index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

bundle_and_checkpoint

View source -``` python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
-#### Args: - - -* `checkpoint_dir`: A string that represents the path to the checkpoint and is - used when we save TensorFlow objects by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version and is - used when restoring replay buffer. - - -#### Returns: + + + + + + + +
Returns
A dictionary containing additional Python objects to be checkpointed by - the experiment. Each key is a string for the object name and the value - is actual object. If the checkpoint directory does not exist, returns - empty dictionary. +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -``` python -end_episode( - reward, - observation=None + Signals the end of the episode to the agent. - -#### Args: - - -* `reward`: An float that is the last reward from the environment. -* `observation`: numpy array that represents the last observation of the - episode. + + + + + + + + + + + + + +
Args
+`reward` + +An float that is the last reward from the environment. +
+`observation` + +numpy array that represents the last observation of the +episode. +

step

View source -``` python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - - -* `reward`: The reward received from the agent's most recent action as a - float. -* `observation`: A dictionary that includes the most recent observations. - - -#### Returns: - + + + + + + + + + + + + + +
Args
+`reward` + +The reward received from the agent's most recent action as a +float. +
+`observation` + +A dictionary that includes the most recent observations. +
-* `slate`: An integer array of size _slate_size, where each element is an - index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

unbundle

View source -``` python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Args: - - -* `checkpoint_dir`: A string that represents the path to the checkpoint and is - used when we save TensorFlow objects by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version and is - used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by the - agent. Each key is an object name and the value is the actual object. - - -#### Returns: + + + + + + + - - - +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents/dopamine.md b/docs/api_docs/python/recsim/agents/dopamine.md index 3f1e8c8..5ab93f0 100644 --- a/docs/api_docs/python/recsim/agents/dopamine.md +++ b/docs/api_docs/python/recsim/agents/dopamine.md @@ -5,7 +5,10 @@ # Module: recsim.agents.dopamine + + +
View diff --git a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent.md b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent.md index 979f4d4..76cb45a 100644 --- a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent.md +++ b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent.md @@ -5,7 +5,10 @@ # Module: recsim.agents.dopamine.dqn_agent + + +
View diff --git a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/DQNAgentRecSim.md b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/DQNAgentRecSim.md index 331af58..5e00f7e 100644 --- a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/DQNAgentRecSim.md +++ b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/DQNAgentRecSim.md @@ -11,110 +11,259 @@ # recsim.agents.dopamine.dqn_agent.DQNAgentRecSim - + +
View source -## Class `DQNAgentRecSim` - - RecSim-specific Dopamine DQN agent that converts the observation space. - - -

__init__

- -View -source - -```python -__init__( - sess, - observation_space, - num_actions, - stack_size, - optimizer_name, - eval_mode, + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+`sess` + +`tf.compat.v1.Session`, for executing ops. +
+`num_actions` + +int, number of actions the agent can take at any state. +
+`observation_shape` + +tuple of ints describing the observation shape. +
+`observation_dtype` + +tf.DType, specifies the type of the observations. Note +that if your inputs are continuous, you should set this to tf.float32. +
+`stack_size` + +int, number of frames to use in state stack. +
+`network` + +tf.Keras.Model, expecting 2 parameters: num_actions, +network_type. A call to this object will return an instantiation of the +network provided. The network returned can be run with different inputs +to create different outputs. See +dopamine.discrete_domains.atari_lib.NatureDQNNetwork as an example. +
+`gamma` + +float, discount factor with the usual RL meaning. +
+`update_horizon` + +int, horizon at which updates are performed, the 'n' in +n-step update. +
+`min_replay_history` + +int, number of transitions that should be experienced +before the agent begins training its value function. +
+`update_period` + +int, period between DQN updates. +
+`target_update_period` + +int, update period for the target network. +
+`epsilon_fn` + +function expecting 4 parameters: +(decay_period, step, warmup_steps, epsilon). This function should return +the epsilon value used for exploration during training. +
+`epsilon_train` + +float, the value to which the agent's epsilon is eventually +decayed during training. +
+`epsilon_eval` + +float, epsilon used when evaluating the agent. +
+`epsilon_decay_period` + +int, length of the epsilon decay schedule. +
+`tf_device` + +str, Tensorflow device on which the agent's graph is executed. +
+`eval_mode` + +bool, True for evaluation and False for training. +
+`use_staging` + +bool, when True use a staging area to prefetch the next +training batch, speeding training up by about 30%. +
+`max_tf_checkpoints_to_keep` + +int, the number of TensorFlow checkpoints to +keep. +
+`optimizer` + +`tf.compat.v1.train.Optimizer`, for training the value +function. +
+`summary_writer` + +SummaryWriter object for outputting training statistics. +Summary writing disabled if set to None. +
+`summary_writing_frequency` + +int, frequency with which summaries will be +written. Lower values will result in slower training. +
+`allow_partial_reload` + +bool, whether we allow reloading a partial agent +(for instance, only the network parameters). +
## Methods

begin_episode

-```python -begin_episode(observation) -``` + Returns the agent's first action for this episode. -#### Args: + -* `observation`: numpy array, the environment's initial observation. + + + -#### Returns: + + + + +
Args
+`observation` + +numpy array, the environment's initial observation. +
+ + + + + + + + + +
Returns
int, the selected action. +

bundle_and_checkpoint

-```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. @@ -122,66 +271,129 @@ This is used for checkpointing. It will return a dictionary containing all non-TensorFlow objects (to be saved into a file by the caller), and it saves all TensorFlow objects into a checkpoint file. -#### Args: - -* `checkpoint_dir`: str, directory where TensorFlow objects will be - saved. -* `iteration_number`: int, iteration number to use for naming the - checkpoint file. + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +str, directory where TensorFlow objects will be saved. +
+`iteration_number` + +int, iteration number to use for naming the checkpoint +file. +
-#### Returns: + + + + + + + + +
Returns
A dict containing additional Python objects to be checkpointed by the experiment. If the checkpoint directory does not exist, returns None. +

end_episode

-```python -end_episode(reward) -``` + Signals the end of the episode to the agent. We store the observation of the current time step, which is the last observation of the episode. -#### Args: + + + + + -* `reward`: float, the last reward from the environment. + + + + +
Args
+`reward` + +float, the last reward from the environment. +

step

-```python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: float, the reward received from the agent's most recent - action. -* `observation`: numpy array, the most recent observation. + + + + + + + + + + + + + +
Args
+`reward` + +float, the reward received from the agent's most recent action. +
+`observation` + +numpy array, the most recent observation. +
-#### Returns: + + + + + + + + +
Returns
int, the selected action. +

unbundle

-```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dictionary + Restores the agent from a checkpoint. @@ -189,14 +401,47 @@ Restores the agent's Python objects to those specified in bundle_dictionary, and restores the TensorFlow objects to those specified in the checkpoint_dir. If the checkpoint_dir does not exist, will not reset the agent's state. -#### Args: - -* `checkpoint_dir`: str, path to the checkpoint saved by tf.Save. -* `iteration_number`: int, checkpoint version, used when restoring the - replay buffer. -* `bundle_dictionary`: dict, containing additional Python objects owned - by the agent. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +str, path to the checkpoint saved by tf.Save. +
+`iteration_number` + +int, checkpoint version, used when restoring the replay +buffer. +
+`bundle_dictionary` + +dict, containing additional Python objects owned by +the agent. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/DQNNetworkType.md b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/DQNNetworkType.md index 35487a2..38213ed 100644 --- a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/DQNNetworkType.md +++ b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/DQNNetworkType.md @@ -1,46 +1,38 @@
-
# recsim.agents.dopamine.dqn_agent.DQNNetworkType - + +
View source -## Class `DQNNetworkType` - - dqn_network(q_values,) - - -

__new__

- -```python -@staticmethod -__new__( - _cls, + + + + + + + + +
`q_values` +
diff --git a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/ObservationAdapter.md b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/ObservationAdapter.md index 02f62d3..5cb981e 100644 --- a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/ObservationAdapter.md +++ b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/ObservationAdapter.md @@ -1,44 +1,45 @@
-
# recsim.agents.dopamine.dqn_agent.ObservationAdapter - + +
View source -## Class `ObservationAdapter` - - An adapter to convert between user/doc observation and images. - - -

__init__

- -```python -__init__( - *args, - **kwargs + -Initialize self. See help(type(self)) for accurate signature. - -## Properties + + -

output_observation_space

+ + + + + + + +
+`output_observation_space` + The output observation space of the adapter. +
## Methods @@ -47,8 +48,10 @@ The output observation space of the adapter. View source -```python -encode(observation) -``` + Encode user observation and document observations to an image. diff --git a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/ResponseAdapter.md b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/ResponseAdapter.md index 3a4c25a..bddbfef 100644 --- a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/ResponseAdapter.md +++ b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/ResponseAdapter.md @@ -1,55 +1,65 @@
- - -
# recsim.agents.dopamine.dqn_agent.ResponseAdapter - + +
View source -## Class `ResponseAdapter` - - Custom flattening of responses to accommodate dopamine replay buffer. - - -

__init__

- -View -source + -```python -__init__(input_response_space) -``` + -Init function for ResponseAdapter. + + + + + + + + + + +
+`input_response_space` + +this is assumed to be an instance of +gym.spaces.Tuple; each element of the tuple is has to be an instance +of gym.spaces.Dict consisting of feature_name: 0-d gym.spaces.Box +(single float) key-value pairs. +
-#### Args: + -* `input_response_space`: this is assumed to be an instance of - gym.spaces.Tuple; each element of the tuple is has to be an instance of - gym.spaces.Dict consisting of feature_name: 0-d gym.spaces.Box (single - float) key-value pairs. + + + -## Properties + + +
`response_dtype` -

response_dtype

+
`response_names` -

response_names

+
`response_shape` -

response_shape

+
## Methods @@ -58,6 +68,8 @@ Init function for ResponseAdapter. View source -```python -encode(responses) -``` + diff --git a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/recsim_dqn_network.md b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/recsim_dqn_network.md index 8703e62..00a59e9 100644 --- a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/recsim_dqn_network.md +++ b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/recsim_dqn_network.md @@ -5,22 +5,19 @@ # recsim.agents.dopamine.dqn_agent.recsim_dqn_network - + +
View source - - -```python -recsim.agents.dopamine.dqn_agent.recsim_dqn_network( - user, - doc, - scope + diff --git a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/wrapped_replay_buffer.md b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/wrapped_replay_buffer.md index b988380..592c297 100644 --- a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/wrapped_replay_buffer.md +++ b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/wrapped_replay_buffer.md @@ -5,18 +5,19 @@ # recsim.agents.dopamine.dqn_agent.wrapped_replay_buffer - + +
View source - - -```python -recsim.agents.dopamine.dqn_agent.wrapped_replay_buffer(**kwargs) -``` + diff --git a/docs/api_docs/python/recsim/agents/full_slate_q_agent.md b/docs/api_docs/python/recsim/agents/full_slate_q_agent.md index ecbfd34..ea7a1f8 100644 --- a/docs/api_docs/python/recsim/agents/full_slate_q_agent.md +++ b/docs/api_docs/python/recsim/agents/full_slate_q_agent.md @@ -5,7 +5,10 @@ # Module: recsim.agents.full_slate_q_agent + + +
View diff --git a/docs/api_docs/python/recsim/agents/full_slate_q_agent/FullSlateQAgent.md b/docs/api_docs/python/recsim/agents/full_slate_q_agent/FullSlateQAgent.md index 2c70a3e..ad17479 100644 --- a/docs/api_docs/python/recsim/agents/full_slate_q_agent/FullSlateQAgent.md +++ b/docs/api_docs/python/recsim/agents/full_slate_q_agent/FullSlateQAgent.md @@ -1,7 +1,6 @@
- @@ -12,56 +11,100 @@ # recsim.agents.full_slate_q_agent.FullSlateQAgent - + +
View source -## Class `FullSlateQAgent` - - A recommender agent implements full slate Q-learning based on DQN agent. Inherits From: [`DQNAgentRecSim`](../../../recsim/agents/dopamine/dqn_agent/DQNAgentRecSim.md), [`AbstractEpisodicRecommenderAgent`](../../../recsim/agent/AbstractEpisodicRecommenderAgent.md) - - -This is a standard, nondecomposed Q-learning method that treats each slate -atomically (i.e., holistically) as a single action. - -

__init__

- -```python -__init__( - *args, + -Initializes a FullSlateQAgent. + -#### Args: +This is a standard, nondecomposed Q-learning method that treats each slate +atomically (i.e., holistically) as a single action. -* `sess`: a Tensorflow session. -* `observation_space`: A gym.spaces object that specifies the format of - observations. -* `action_space`: A gym.spaces object that specifies the format of - actions. -* `optimizer_name`: The name of the optimizer. -* `eval_mode`: A bool for whether the agent is in training or - evaluation mode. -* `**kwargs`: Keyword arguments to the DQNAgent. + + + + + + + + + + + + + + + + + + + + + + + + + +
+`sess` + +a Tensorflow session. +
+`observation_space` + +A gym.spaces object that specifies the format of +observations. +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
+`optimizer_name` + +The name of the optimizer. +
+`eval_mode` + +A bool for whether the agent is in training or evaluation mode. +
+`**kwargs` + +Keyword arguments to the DQNAgent. +
-## Properties + -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -70,29 +113,51 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation) -``` + Returns the agent's first action for this episode. -#### Args: + + + + + -* `observation`: numpy array, the environment's initial observation. + + + + +
Args
+`observation` + +numpy array, the environment's initial observation. +
-#### Returns: + -An integer array of size _slate_size, the selected slated, each element of which -is an index in the list of doc_obs. + + + + + + + +
Returns
+An integer array of size _slate_size, the selected slated, each +element of which is an index in the list of doc_obs. +

bundle_and_checkpoint

-```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. @@ -100,79 +165,147 @@ This is used for checkpointing. It will return a dictionary containing all non-TensorFlow objects (to be saved into a file by the caller), and it saves all TensorFlow objects into a checkpoint file. -#### Args: - -* `checkpoint_dir`: str, directory where TensorFlow objects will be - saved. -* `iteration_number`: int, iteration number to use for naming the - checkpoint file. + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +str, directory where TensorFlow objects will be saved. +
+`iteration_number` + +int, iteration number to use for naming the checkpoint +file. +
-#### Returns: + + + + + + + + +
Returns
A dict containing additional Python objects to be checkpointed by the experiment. If the checkpoint directory does not exist, returns None. +

end_episode

View source -```python -end_episode( - reward, - observation + Signals the end of the episode to the agent. We store the observation of the current time step, which is the last observation of the episode. -#### Args: - -* `reward`: float, the last reward from the environment. -* `observation`: numpy array, the environment's initial observation. + + + + + + + + + + + + + +
Args
+`reward` + +float, the last reward from the environment. +
+`observation` + +numpy array, the environment's initial observation. +

step

View source -```python -step( - reward, - observation + Receives observations of environment and returns a slate. -#### Args: - -* `reward`: A double representing the overall reward to the recommended - slate. -* `observation`: A dictionary that stores all the observations - including: - - user: A list of floats representing the user's observed state - - doc: A list of observations of document features - - response: A vector valued response signal that represent user's response - to each document - -#### Returns: + + + + + + + + + + + + + +
Args
+`reward` + +A double representing the overall reward to the recommended slate. +
+`observation` + +A dictionary that stores all the observations including: +- user: A list of floats representing the user's observed state +- doc: A list of observations of document features +- response: A vector valued response signal that represent user's +response to each document +
-* `slate`: An integer array of size _slate_size, where each element is - an index in the list of document observvations. + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index in the list of document observvations. +

unbundle

-```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dictionary + Restores the agent from a checkpoint. @@ -180,14 +313,47 @@ Restores the agent's Python objects to those specified in bundle_dictionary, and restores the TensorFlow objects to those specified in the checkpoint_dir. If the checkpoint_dir does not exist, will not reset the agent's state. -#### Args: - -* `checkpoint_dir`: str, path to the checkpoint saved by tf.Save. -* `iteration_number`: int, checkpoint version, used when restoring the - replay buffer. -* `bundle_dictionary`: dict, containing additional Python objects owned - by the agent. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +str, path to the checkpoint saved by tf.Save. +
+`iteration_number` + +int, checkpoint version, used when restoring the replay +buffer. +
+`bundle_dictionary` + +dict, containing additional Python objects owned by +the agent. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents/greedy_pctr_agent.md b/docs/api_docs/python/recsim/agents/greedy_pctr_agent.md index d374dc3..171a929 100644 --- a/docs/api_docs/python/recsim/agents/greedy_pctr_agent.md +++ b/docs/api_docs/python/recsim/agents/greedy_pctr_agent.md @@ -5,7 +5,10 @@ # Module: recsim.agents.greedy_pctr_agent + + +
View diff --git a/docs/api_docs/python/recsim/agents/greedy_pctr_agent/GreedyPCTRAgent.md b/docs/api_docs/python/recsim/agents/greedy_pctr_agent/GreedyPCTRAgent.md index 3d0990a..a10b861 100644 --- a/docs/api_docs/python/recsim/agents/greedy_pctr_agent/GreedyPCTRAgent.md +++ b/docs/api_docs/python/recsim/agents/greedy_pctr_agent/GreedyPCTRAgent.md @@ -1,7 +1,6 @@
- @@ -13,22 +12,27 @@ # recsim.agents.greedy_pctr_agent.GreedyPCTRAgent - + +
View source -## Class `GreedyPCTRAgent` - - An agent that recommends slates with the highest pCTR items. Inherits From: [`AbstractEpisodicRecommenderAgent`](../../../recsim/agent/AbstractEpisodicRecommenderAgent.md) + + This agent assumes knowledge of the true underlying choice model. Note that this @@ -36,36 +40,52 @@ implicitly means it receives observations of the true user and document states. This agent myopically creates slates with items that have the highest probability of being clicked under the given choice model. -

__init__

- -View -source - -```python -__init__( - action_space, - belief_state, - choice_model=cm.MultinomialLogitChoiceModel({'no_click_mass': 5}) -) -``` - -Initializes a new greedy pCTR agent. - -#### Args: - -* `action_space`: A gym.spaces object that specifies the format of - actions -* `belief_state`: An instantiation of AbstractUserState assumed by the - agent -* `choice_model`: An instantiation of AbstractChoiceModel assumed by - the agent Default to a multinomial logit choice model with - no_click_mass = 5. + + + + + + + + + + + + + + + + +
+`action_space` + +A gym.spaces object that specifies the format of actions +
+`belief_state` + +An instantiation of AbstractUserState assumed by the agent +
+`choice_model` + +An instantiation of AbstractChoiceModel assumed by the agent +Default to a multinomial logit choice model with no_click_mass = 5. +
-## Properties + -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -74,142 +94,298 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation=None) -``` + Returns the agent's first action for this episode. -#### Args: + -* `observation`: numpy array, the environment's initial observation. + + + -#### Returns: + + + + +
Args
+`observation` + +numpy array, the environment's initial observation. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

bundle_and_checkpoint

View source -```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. -#### Args: + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
-* `checkpoint_dir`: A string that represents the path to the checkpoint - and is used when we save TensorFlow objects by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. + -#### Returns: + + + + + + -A dictionary containing additional Python objects to be checkpointed by the -experiment. Each key is a string for the object name and the value is actual -object. If the checkpoint directory does not exist, returns empty dictionary. +
Returns
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -```python -end_episode( - reward, - observation=None + Signals the end of the episode to the agent. -#### Args: - -* `reward`: An float that is the last reward from the environment. -* `observation`: numpy array that represents the last observation of - the episode. + + + + + + + + + + + + + +
Args
+`reward` + +An float that is the last reward from the environment. +
+`observation` + +numpy array that represents the last observation of the +episode. +

findBestDocuments

View source -```python -findBestDocuments(scores) -``` + Returns the indices of the highest scores in sorted order. -#### Args: + -* `scores`: A list of floats representing unnormalized document scores + + + -#### Returns: + + + + +
Args
+`scores` + +A list of floats representing unnormalized document scores +
-* `sorted_indices`: A list of integers indexing the highest scores, in - sorted order + + + + + + + + + + +
Returns
+`sorted_indices` + +A list of integers indexing the highest scores, in sorted +order +

step

View source -```python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: Unused. -* `observation`: A dictionary that includes the most recent - observations and should have the following fields: - - user: A list of floats representing the user's observed state - - doc: A list of observations of document features - -#### Returns: + + + + + + + + + + + + + +
Args
+`reward` + +Unused. +
+`observation` + +A dictionary that includes the most recent observations and +should have the following fields: +- user: A list of floats representing the user's observed state +- doc: A list of observations of document features +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

unbundle

View source -```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - and is used when we save TensorFlow objects by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents/layers.md b/docs/api_docs/python/recsim/agents/layers.md index 049ccef..8a7696a 100644 --- a/docs/api_docs/python/recsim/agents/layers.md +++ b/docs/api_docs/python/recsim/agents/layers.md @@ -5,7 +5,10 @@ # Module: recsim.agents.layers + + +
View diff --git a/docs/api_docs/python/recsim/agents/layers/abstract_click_bandit.md b/docs/api_docs/python/recsim/agents/layers/abstract_click_bandit.md index b187052..6d6210e 100644 --- a/docs/api_docs/python/recsim/agents/layers/abstract_click_bandit.md +++ b/docs/api_docs/python/recsim/agents/layers/abstract_click_bandit.md @@ -5,8 +5,10 @@ # Module: recsim.agents.layers.abstract_click_bandit + +
View source diff --git a/docs/api_docs/python/recsim/agents/layers/abstract_click_bandit/AbstractClickBanditLayer.md b/docs/api_docs/python/recsim/agents/layers/abstract_click_bandit/AbstractClickBanditLayer.md index ef5b859..584e51a 100644 --- a/docs/api_docs/python/recsim/agents/layers/abstract_click_bandit/AbstractClickBanditLayer.md +++ b/docs/api_docs/python/recsim/agents/layers/abstract_click_bandit/AbstractClickBanditLayer.md @@ -1,7 +1,6 @@
- @@ -12,22 +11,28 @@ # recsim.agents.layers.abstract_click_bandit.AbstractClickBanditLayer - + +
View source -## Class `AbstractClickBanditLayer` - - A hierarchical bandit layer which treats a set of base agents as arms. Inherits From: [`AbstractHierarchicalAgentLayer`](../../../../recsim/agent/AbstractHierarchicalAgentLayer.md) + + This layer consumes a list of base agents with apriori unknown mean payoffs @@ -40,36 +45,81 @@ confidence bound as index, the AbstractClickBandit will put the partial slate of the highest-UCB base agent in first place, then the second, until the slate is complete. -

__init__

- -``` python -__init__( - *args, - **kwargs -) -``` - -Initializes a new bandit agent for clustered arm exploration. - - -#### Args: - - -* `observation_space`: Instance of a gym space corresponding to the - observation format. -* `action_space`: A gym.spaces object that specifies the format of actions. -* `arm_base_agent_ctors`: a list of agent constructors, each agent corresponds - to a bandit arm. -* `alg_ctor`: A class of an MABAlgorithm for exploration, default to UCB1. -* `ci_scaling`: A floating number specifying the scaling of confidence bound. -* `random_seed`: An integer for random seed. -* `**kwargs`: arguments for base agents. + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+`observation_space` + +Instance of a gym space corresponding to the +observation format. +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
+`arm_base_agent_ctors` + +a list of agent constructors, each agent corresponds +to a bandit arm. +
+`alg_ctor` + +A class of an MABAlgorithm for exploration, default to UCB1. +
+`ci_scaling` + +A floating number specifying the scaling of confidence bound. +
+`random_seed` + +An integer for random seed. +
+`**kwargs` + +arguments for base agents. +
-## Properties + -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -77,120 +127,187 @@ Returns boolean indicating whether this agent serves multiple users. View source -``` python -begin_episode(observation=None) -``` - - - +

bundle_and_checkpoint

View source -``` python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string for the directory where objects will be saved. +
+`iteration_number` + +An integer of iteration number to use for naming the +checkpoint file. +
-#### Args: - - -* `checkpoint_dir`: A string for the directory where objects will be saved. -* `iteration_number`: An integer of iteration number to use for naming the - checkpoint file. - - -#### Returns: + + + + + + + +
Returns
A dictionary containing additional Python objects to be checkpointed by - the experiment. Each key is a string for the object name and the value - is actual object. If the checkpoint directory does not exist, returns - empty dictionary. +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -``` python -end_episode( - reward, - observation +

step

View source -``` python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - - -* `reward`: Unused. -* `observation`: A dictionary that includes the most recent observations and - should have the following fields: - - user: A dictionary representing user's observed state. Assumes - observation['user']['sufficient_statics'] is a dictionary containing - base agent impression counts and base agent click counts. - - -#### Returns: - + + + + + + + + + + + + + +
Args
+`reward` + +Unused. +
+`observation` + +A dictionary that includes the most recent observations and +should have the following fields: +- user: A dictionary representing user's observed state. Assumes +observation['user']['sufficient_statics'] is a dictionary containing +base agent impression counts and base agent click counts. +
-* `slate`: An integer array of size _slate_size, where each element is an - index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

unbundle

View source -``` python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint saved +by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Args: - - -* `checkpoint_dir`: A string that represents the path to the checkpoint saved - by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version and is - used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by the - agent. Each key is an object name and the value is the actual object. - - -#### Returns: + + + + + + + - - - +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents/layers/cluster_click_statistics.md b/docs/api_docs/python/recsim/agents/layers/cluster_click_statistics.md index c303c18..67fb790 100644 --- a/docs/api_docs/python/recsim/agents/layers/cluster_click_statistics.md +++ b/docs/api_docs/python/recsim/agents/layers/cluster_click_statistics.md @@ -5,7 +5,10 @@ # Module: recsim.agents.layers.cluster_click_statistics + + +
View diff --git a/docs/api_docs/python/recsim/agents/layers/cluster_click_statistics/ClusterClickStatsLayer.md b/docs/api_docs/python/recsim/agents/layers/cluster_click_statistics/ClusterClickStatsLayer.md index 408c2a9..d37f970 100644 --- a/docs/api_docs/python/recsim/agents/layers/cluster_click_statistics/ClusterClickStatsLayer.md +++ b/docs/api_docs/python/recsim/agents/layers/cluster_click_statistics/ClusterClickStatsLayer.md @@ -1,8 +1,6 @@
- - @@ -13,22 +11,26 @@ # recsim.agents.layers.cluster_click_statistics.ClusterClickStatsLayer - + +
View source -## Class `ClusterClickStatsLayer` - - Track impressions and clicks on a per-cluster basis and pass down to agent. Inherits From: [`SufficientStatisticsLayer`](../../../../recsim/agents/layers/sufficient_statistics/SufficientStatisticsLayer.md) + + This module assumes each document belongs to single cluster and we know the @@ -36,39 +38,56 @@ number of possible clusters. Every time we increase impression count for a cluster if the agent recommends a document from that cluster. We also increase click count for a cluster if user responds a click. -

__init__

- -View -source - -```python -__init__( - base_agent_ctor, - observation_space, - action_space, - **kwargs -) -``` - -Initializes a ClusterClickStatsLayer object. - -#### Args: - -* `base_agent_ctor`: a constructor for the base agent. -* `observation_space`: a gym.spaces object specifying the format of - observations. -* `action_space`: A gym.spaces object that specifies the format of - actions. -* `**kwargs`: arguments to pass to the downstream agent at construction - time. + + + + + + + + + + + + + + + + + + + +
+`base_agent_ctor` + +a constructor for the base agent. +
+`observation_space` + +a gym.spaces object specifying the format of +observations. +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
+`**kwargs` + +arguments to pass to the downstream agent at construction time. +
-## Properties + -

multi_user

+ + + -Returns boolean indicating whether this agent serves multiple users. + + +
`multi_user` Returns boolean indicating whether this agent +serves multiple users.
`observation_space` -

observation_space

+
## Methods @@ -77,102 +96,188 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation=None) -``` +

bundle_and_checkpoint

View source -```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. -#### Args: + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string for the directory where objects will be saved. +
+`iteration_number` + +An integer of iteration number to use for naming the +checkpoint file. +
-* `checkpoint_dir`: A string for the directory where objects will be - saved. -* `iteration_number`: An integer of iteration number to use for naming - the checkpoint file. + -#### Returns: + + + + + + -A dictionary containing additional Python objects to be checkpointed by the -experiment. Each key is a string for the object name and the value is actual -object. If the checkpoint directory does not exist, returns empty dictionary. +
Returns
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -```python -end_episode( - reward, - observation +

step

View source -```python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations. - -#### Returns: + + + + + + + + + + + + + +
Args
+`reward` + +The reward received from the agent's most recent action as a +float. +
+`observation` + +A dictionary that includes the most recent observations. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

unbundle

View source -```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - saved by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint saved +by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents/layers/fixed_length_history.md b/docs/api_docs/python/recsim/agents/layers/fixed_length_history.md index f59dfed..c32196f 100644 --- a/docs/api_docs/python/recsim/agents/layers/fixed_length_history.md +++ b/docs/api_docs/python/recsim/agents/layers/fixed_length_history.md @@ -5,7 +5,10 @@ # Module: recsim.agents.layers.fixed_length_history + + +
View diff --git a/docs/api_docs/python/recsim/agents/layers/fixed_length_history/FixedLengthHistoryLayer.md b/docs/api_docs/python/recsim/agents/layers/fixed_length_history/FixedLengthHistoryLayer.md index b74a665..908a990 100644 --- a/docs/api_docs/python/recsim/agents/layers/fixed_length_history/FixedLengthHistoryLayer.md +++ b/docs/api_docs/python/recsim/agents/layers/fixed_length_history/FixedLengthHistoryLayer.md @@ -1,8 +1,6 @@
- - @@ -13,22 +11,27 @@ # recsim.agents.layers.fixed_length_history.FixedLengthHistoryLayer - + +
View source -## Class `FixedLengthHistoryLayer` - - Creates a buffer of the last k rewards and observations. Inherits From: [`SufficientStatisticsLayer`](../../../../recsim/agents/layers/sufficient_statistics/SufficientStatisticsLayer.md) + + This module introduces sufficient statistics in the form of a buffer holding the @@ -39,51 +42,87 @@ are not enough observations to fill the buffer, so they will be filled with None. Each non-vacuous element of the tuple is an instance of (a subset of) observation_space. -

__init__

- -View -source - -```python -__init__( - base_agent_ctor, - observation_space, - action_space, - history_length, - remember_user=True, - remember_response=True, - remember_doc=False, - **kwargs -) -``` - -Initializes a FixedLengthHistoryLayer object. - -#### Args: - -* `base_agent_ctor`: a constructor for the base agent. -* `observation_space`: a gym.spaces object specifying the format of - observations. -* `action_space`: A gym.spaces object that specifies the format of - actions. -* `history_length`: positive integer number of observations to - remember. -* `remember_user`: boolean, indicates whether to track - observation_space[\'user\']. -* `remember_response`: boolean, indicates whether to track - observation_space[\'response\']. -* `remember_doc`: boolean, indicates whether to track - observation_space[\'doc\']. -* `**kwargs`: arguments to pass to the downstream agent at construction - time. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+`base_agent_ctor` + +a constructor for the base agent. +
+`observation_space` + +a gym.spaces object specifying the format of +observations. +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
+`history_length` + +positive integer number of observations to remember. +
+`remember_user` + +boolean, indicates whether to track +observation_space[\'user\']. +
+`remember_response` + +boolean, indicates whether to track +observation_space[\'response\']. +
+`remember_doc` + +boolean, indicates whether to track +observation_space[\'doc\']. +
+`**kwargs` + +arguments to pass to the downstream agent at construction time. +
-## Properties + -

multi_user

+ + + -Returns boolean indicating whether this agent serves multiple users. + + +
`multi_user` Returns boolean indicating whether this agent +serves multiple users.
`observation_space` -

observation_space

+
## Methods @@ -92,102 +131,188 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation=None) -``` +

bundle_and_checkpoint

View source -```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. -#### Args: + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string for the directory where objects will be saved. +
+`iteration_number` + +An integer of iteration number to use for naming the +checkpoint file. +
-* `checkpoint_dir`: A string for the directory where objects will be - saved. -* `iteration_number`: An integer of iteration number to use for naming - the checkpoint file. + -#### Returns: + + + + + + -A dictionary containing additional Python objects to be checkpointed by the -experiment. Each key is a string for the object name and the value is actual -object. If the checkpoint directory does not exist, returns empty dictionary. +
Returns
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -```python -end_episode( - reward, - observation +

step

View source -```python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations. - -#### Returns: + + + + + + + + + + + + + +
Args
+`reward` + +The reward received from the agent's most recent action as a +float. +
+`observation` + +A dictionary that includes the most recent observations. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

unbundle

View source -```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - saved by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint saved +by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents/layers/sufficient_statistics.md b/docs/api_docs/python/recsim/agents/layers/sufficient_statistics.md index 2290cb9..3ddbf09 100644 --- a/docs/api_docs/python/recsim/agents/layers/sufficient_statistics.md +++ b/docs/api_docs/python/recsim/agents/layers/sufficient_statistics.md @@ -5,7 +5,10 @@ # Module: recsim.agents.layers.sufficient_statistics + + +
View diff --git a/docs/api_docs/python/recsim/agents/layers/sufficient_statistics/SufficientStatisticsLayer.md b/docs/api_docs/python/recsim/agents/layers/sufficient_statistics/SufficientStatisticsLayer.md index c658c97..e109ac6 100644 --- a/docs/api_docs/python/recsim/agents/layers/sufficient_statistics/SufficientStatisticsLayer.md +++ b/docs/api_docs/python/recsim/agents/layers/sufficient_statistics/SufficientStatisticsLayer.md @@ -1,8 +1,6 @@
- - @@ -13,22 +11,27 @@ # recsim.agents.layers.sufficient_statistics.SufficientStatisticsLayer - + +
View source -## Class `SufficientStatisticsLayer` - - A module to log user responses on different clusters. Inherits From: [`AbstractHierarchicalAgentLayer`](../../../../recsim/agent/AbstractHierarchicalAgentLayer.md) + + This module assumes each document belongs to single cluster and we know the @@ -36,42 +39,64 @@ number of possible clusters. Every time we increase impression count for a cluster if the agent recommends a document from that cluster. We also increase click count for a cluster if user responds a click. -

__init__

- -View -source - -```python -__init__( - base_agent_ctor, - observation_space, - action_space, - sufficient_statistics_space, - **kwargs -) -``` - -Initializes a UserClusterHistory object. - -#### Args: - -* `base_agent_ctor`: a constructor for the base agent. -* `observation_space`: a gym.spaces object specifying the format of - observations. -* `action_space`: A gym.spaces object that specifies the format of - actions. -* `sufficient_statistics_space`: a gym.spaces object specifying the - format of the created sufficient statistics. -* `**kwargs`: arguments to pass to the downstream agent at construction - time. + + + + + + + + + + + + + + + + + + + + + + +
+`base_agent_ctor` + +a constructor for the base agent. +
+`observation_space` + +a gym.spaces object specifying the format of +observations. +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
+`sufficient_statistics_space` + +a gym.spaces object specifying the format of +the created sufficient statistics. +
+`**kwargs` + +arguments to pass to the downstream agent at construction time. +
-## Properties + -

multi_user

+ + + -Returns boolean indicating whether this agent serves multiple users. + + +
`multi_user` Returns boolean indicating whether this agent +serves multiple users.
`observation_space` -

observation_space

+
## Methods @@ -80,102 +105,188 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation=None) -``` +

bundle_and_checkpoint

View source -```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. -#### Args: + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string for the directory where objects will be saved. +
+`iteration_number` + +An integer of iteration number to use for naming the +checkpoint file. +
-* `checkpoint_dir`: A string for the directory where objects will be - saved. -* `iteration_number`: An integer of iteration number to use for naming - the checkpoint file. + -#### Returns: + + + + + + -A dictionary containing additional Python objects to be checkpointed by the -experiment. Each key is a string for the object name and the value is actual -object. If the checkpoint directory does not exist, returns empty dictionary. +
Returns
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -```python -end_episode( - reward, - observation +

step

View source -```python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations. - -#### Returns: + + + + + + + + + + + + + +
Args
+`reward` + +The reward received from the agent's most recent action as a +float. +
+`observation` + +A dictionary that includes the most recent observations. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

unbundle

View source -```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - saved by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint saved +by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents/layers/temporal_aggregation.md b/docs/api_docs/python/recsim/agents/layers/temporal_aggregation.md index abcd284..da261bd 100644 --- a/docs/api_docs/python/recsim/agents/layers/temporal_aggregation.md +++ b/docs/api_docs/python/recsim/agents/layers/temporal_aggregation.md @@ -5,7 +5,10 @@ # Module: recsim.agents.layers.temporal_aggregation + + +
View diff --git a/docs/api_docs/python/recsim/agents/layers/temporal_aggregation/TemporalAggregationLayer.md b/docs/api_docs/python/recsim/agents/layers/temporal_aggregation/TemporalAggregationLayer.md index 460c2a8..9ca4ead 100644 --- a/docs/api_docs/python/recsim/agents/layers/temporal_aggregation/TemporalAggregationLayer.md +++ b/docs/api_docs/python/recsim/agents/layers/temporal_aggregation/TemporalAggregationLayer.md @@ -1,7 +1,6 @@
- @@ -12,22 +11,27 @@ # recsim.agents.layers.temporal_aggregation.TemporalAggregationLayer - + +
View source -## Class `TemporalAggregationLayer` - - Temporally aggregated reinforcement learning agent. Inherits From: [`AbstractHierarchicalAgentLayer`](../../../../recsim/agent/AbstractHierarchicalAgentLayer.md) + + A reinforcement learning agent that implements learns a temporally aggregated @@ -48,49 +52,92 @@ becomes non-Markovian. The two methods are not mutually exclusive and may be used in conjunction by specifying a non-unit aggregation_period and a non-zero switching_cost. -

__init__

- -View -source - -```python -__init__( - base_agent_ctor, - observation_space, - action_space, - gamma=0.0, - aggregation_period=1, - switching_cost=1.0, - document_comparison_fcn=None, - **kwargs -) -``` - -TemporallyAggregatedAgent init. - -#### Args: - -* `base_agent_ctor`: a constructor for the base agent. -* `observation_space`: a gym.spaces object specifying the format of - observations. -* `action_space`: A gym.spaces object that specifies the format of - actions. -* `gamma`: geometric discounting factor between [0, 1) for the - event-level objective. -* `aggregation_period`: number of time steps to hold an action fixed. -* `switching_cost`: a non-negative penalty for switching an action. -* `document_comparison_fcn`: a function taking two document - observations and returning a Boolean value that indicates if they are - considered equivalent. This is useful for making decisions at a higher - abstraction level (e.g. comparing only document topics). If not provided, - this will default to direct observation equality. -* `**kwargs`: base_agent initialization args. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+`base_agent_ctor` + +a constructor for the base agent. +
+`observation_space` + +a gym.spaces object specifying the format of +observations. +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
+`gamma` + +geometric discounting factor between [0, 1) for the event-level +objective. +
+`aggregation_period` + +number of time steps to hold an action fixed. +
+`switching_cost` + +a non-negative penalty for switching an action. +
+`document_comparison_fcn` + +a function taking two document observations and +returning a Boolean value that indicates if they are considered +equivalent. This is useful for making decisions at a higher abstraction +level (e.g. comparing only document topics). If not provided, this will +default to direct observation equality. +
+`**kwargs` + +base_agent initialization args. +
-## Properties + -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -99,110 +146,209 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation=None) -``` +

bundle_and_checkpoint

View source -```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. -#### Args: + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string for the directory where objects will be saved. +
+`iteration_number` + +An integer of iteration number to use for naming the +checkpoint file. +
-* `checkpoint_dir`: A string for the directory where objects will be - saved. -* `iteration_number`: An integer of iteration number to use for naming - the checkpoint file. + -#### Returns: + + + + + + -A dictionary containing additional Python objects to be checkpointed by the -experiment. Each key is a string for the object name and the value is actual -object. If the checkpoint directory does not exist, returns empty dictionary. +
Returns
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -```python -end_episode( - reward, - observation +

step

View source -```python -step( - reward, - observation + Preprocesses the reward and observation and calls base agent. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations and should have the following fields: - - user: A NumPy array representing user's observed state. Assumes it is a - concatenation of topic pull counts and topic click counts. - - doc: A NumPy array representing observations of document features. - Assumes it is a concatenation of one-hot encoding of topic_id and - document quality. - -#### Returns: - -* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs. + + + + + + + + + + + + + +
Args
+`reward` + +The reward received from the agent's most recent action as a +float. +
+`observation` + +A dictionary that includes the most recent observations and +should have the following fields: +- user: A NumPy array representing user's observed state. Assumes it is +a concatenation of topic pull counts and topic click counts. +- doc: A NumPy array representing observations of document features. +Assumes it is a concatenation of one-hot encoding of topic_id and +document quality. +
-#### Raises: + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs. +
-* `RuntimeError`: if the agent has to hold a slate with given features - fixed for k steps but the documents needed to reconstruct that slate become - unavailable. + + + + + + + + + + +
Raises
+`RuntimeError` + +if the agent has to hold a slate with given features fixed +for k steps but the documents needed to reconstruct that slate +become unavailable. +

unbundle

View source -```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - saved by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint saved +by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents/random_agent.md b/docs/api_docs/python/recsim/agents/random_agent.md index 064738a..a8f0f8f 100644 --- a/docs/api_docs/python/recsim/agents/random_agent.md +++ b/docs/api_docs/python/recsim/agents/random_agent.md @@ -5,7 +5,10 @@ # Module: recsim.agents.random_agent + + +
View diff --git a/docs/api_docs/python/recsim/agents/random_agent/RandomAgent.md b/docs/api_docs/python/recsim/agents/random_agent/RandomAgent.md index 700ac72..8249fae 100644 --- a/docs/api_docs/python/recsim/agents/random_agent/RandomAgent.md +++ b/docs/api_docs/python/recsim/agents/random_agent/RandomAgent.md @@ -1,7 +1,6 @@
- @@ -12,50 +11,67 @@ # recsim.agents.random_agent.RandomAgent - + +
View source -## Class `RandomAgent` - - An agent that recommends a random slate of documents. Inherits From: [`AbstractEpisodicRecommenderAgent`](../../../recsim/agent/AbstractEpisodicRecommenderAgent.md) - - -

__init__

- -View -source - -```python -__init__( - action_space, - random_seed=0 + -Initializes AbstractEpisodicRecommenderAgent. - -#### Args: + -* `action_space`: A gym.spaces object that specifies the format of - actions. -* `summary_writer`: A Tensorflow summary writer to pass to the agent - for in-agent training statistics in Tensorboard. + + + + + + + + + + + + + +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
+`summary_writer` + +A Tensorflow summary writer to pass to the agent +for in-agent training statistics in Tensorboard. +
-## Properties + -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -64,121 +80,250 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation=None) -``` + Returns the agent's first action for this episode. -#### Args: + -* `observation`: numpy array, the environment's initial observation. + + + -#### Returns: + + + + +
Args
+`observation` + +numpy array, the environment's initial observation. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

bundle_and_checkpoint

View source -```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. -#### Args: + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
-* `checkpoint_dir`: A string that represents the path to the checkpoint - and is used when we save TensorFlow objects by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. + -#### Returns: + + + + + + -A dictionary containing additional Python objects to be checkpointed by the -experiment. Each key is a string for the object name and the value is actual -object. If the checkpoint directory does not exist, returns empty dictionary. +
Returns
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -```python -end_episode( - reward, - observation=None + Signals the end of the episode to the agent. -#### Args: - -* `reward`: An float that is the last reward from the environment. -* `observation`: numpy array that represents the last observation of - the episode. + + + + + + + + + + + + + +
Args
+`reward` + +An float that is the last reward from the environment. +
+`observation` + +numpy array that represents the last observation of the +episode. +

step

View source -```python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: Unused. -* `observation`: A dictionary that includes the most recent - observation. Should include 'doc' field that includes observation of all - candidates. - -#### Returns: + + + + + + + + + + + + + +
Args
+`reward` + +Unused. +
+`observation` + +A dictionary that includes the most recent observation. +Should include 'doc' field that includes observation of all candidates. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

unbundle

View source -```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - and is used when we save TensorFlow objects by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent.md index 38d625c..f6284c9 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent.md @@ -5,7 +5,10 @@ # Module: recsim.agents.slate_decomp_q_agent + + +
View diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/SlateDecompQAgent.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/SlateDecompQAgent.md index 49936f1..feeccdd 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/SlateDecompQAgent.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/SlateDecompQAgent.md @@ -1,7 +1,6 @@
- @@ -12,56 +11,118 @@ # recsim.agents.slate_decomp_q_agent.SlateDecompQAgent - + +
View source -## Class `SlateDecompQAgent` - - A recommender agent implements DQN using slate decomposition techniques. Inherits From: [`DQNAgentRecSim`](../../../recsim/agents/dopamine/dqn_agent/DQNAgentRecSim.md), [`AbstractEpisodicRecommenderAgent`](../../../recsim/agent/AbstractEpisodicRecommenderAgent.md) - - -

__init__

- -```python -__init__( - *args, - **kwargs + -#### Args: + -* `sess`: a Tensorflow session. -* `observation_space`: A gym.spaces object that specifies the format of - observations. -* `action_space`: A gym.spaces object that specifies the format of - actions. -* `optimizer_name`: The name of the optimizer. -* `select_slate_fn`: A function that selects the slate. -* `compute_target_fn`: A function that omputes the target q value. -* `stack_size`: The stack size for the replay buffer. -* `eval_mode`: A bool for whether the agent is in training or - evaluation mode. -* `**kwargs`: Keyword arguments to the DQNAgent. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+`sess` + +a Tensorflow session. +
+`observation_space` + +A gym.spaces object that specifies the format of +observations. +
+`action_space` + +A gym.spaces object that specifies the format of actions. +
+`optimizer_name` + +The name of the optimizer. +
+`select_slate_fn` + +A function that selects the slate. +
+`compute_target_fn` + +A function that omputes the target q value. +
+`stack_size` + +The stack size for the replay buffer. +
+`eval_mode` + +A bool for whether the agent is in training or evaluation mode. +
+`**kwargs` + +Keyword arguments to the DQNAgent. +
-## Properties + -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -70,29 +131,51 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation) -``` + Returns the agent's first action for this episode. -#### Args: + + + + + -* `observation`: numpy array, the environment's initial observation. + + + + +
Args
+`observation` + +numpy array, the environment's initial observation. +
+ + -#### Returns: + + + + + + -An integer array of size _slate_size, the selected slated, each element of which -is an index in the list of doc_obs. +
Returns
+An integer array of size _slate_size, the selected slated, each +element of which is an index in the list of doc_obs. +

bundle_and_checkpoint

-```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. @@ -100,76 +183,143 @@ This is used for checkpointing. It will return a dictionary containing all non-TensorFlow objects (to be saved into a file by the caller), and it saves all TensorFlow objects into a checkpoint file. -#### Args: - -* `checkpoint_dir`: str, directory where TensorFlow objects will be - saved. -* `iteration_number`: int, iteration number to use for naming the - checkpoint file. + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +str, directory where TensorFlow objects will be saved. +
+`iteration_number` + +int, iteration number to use for naming the checkpoint +file. +
-#### Returns: + + + + + + + + +
Returns
A dict containing additional Python objects to be checkpointed by the experiment. If the checkpoint directory does not exist, returns None. +

end_episode

View source -```python -end_episode( - reward, - observation + Signals the end of the episode to the agent. We store the observation of the current time step, which is the last observation of the episode. -#### Args: - -* `reward`: float, the last reward from the environment. -* `observation`: numpy array, the environment's initial observation. + + + + + + + + + + + + + +
Args
+`reward` + +float, the last reward from the environment. +
+`observation` + +numpy array, the environment's initial observation. +

step

View source -```python -step( - reward, - observation + Records the transition and returns the agent's next action. It uses document-level user response instead of overral reward as the reward of the problem. -#### Args: - -* `reward`: unused. -* `observation`: a space.Dict that includes observation of the user - state observation, documents and user responses. + + + + + + + + + + + + + +
Args
+`reward` + +unused. +
+`observation` + +a space.Dict that includes observation of the user state +observation, documents and user responses. +
-#### Returns: + + + + + + + + +
Returns
Array, the selected action. +

unbundle

-```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dictionary + Restores the agent from a checkpoint. @@ -177,14 +327,47 @@ Restores the agent's Python objects to those specified in bundle_dictionary, and restores the TensorFlow objects to those specified in the checkpoint_dir. If the checkpoint_dir does not exist, will not reset the agent's state. -#### Args: - -* `checkpoint_dir`: str, path to the checkpoint saved by tf.Save. -* `iteration_number`: int, checkpoint version, used when restoring the - replay buffer. -* `bundle_dictionary`: dict, containing additional Python objects owned - by the agent. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +str, path to the checkpoint saved by tf.Save. +
+`iteration_number` + +int, checkpoint version, used when restoring the replay +buffer. +
+`bundle_dictionary` + +dict, containing additional Python objects owned by +the agent. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_probs_tf.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_probs_tf.md index 0defb2d..4510b39 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_probs_tf.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_probs_tf.md @@ -5,37 +5,68 @@ # recsim.agents.slate_decomp_q_agent.compute_probs_tf - + +
View source - Computes the selection probability and returns selected index. -```python -recsim.agents.slate_decomp_q_agent.compute_probs_tf( - slate, - scores_tf, - score_no_click_tf + This assumes scores are normalizable, e.g., scores cannot be negative. -#### Args: + + + + + -* `slate`: a list of integers that represents the video slate. -* `scores_tf`: a float tensor that stores the scores of all documents. -* `score_no_click_tf`: a float tensor that represents the score for the - action of picking no document. + + + + + + + + + + +
+`slate` + +a list of integers that represents the video slate. +
+`scores_tf` + +a float tensor that stores the scores of all documents. +
+`score_no_click_tf` + +a float tensor that represents the score for the action +of picking no document. +
-#### Returns: + -A float tensor that represents the probabilities of selecting each document in -the slate. + + + + + + + +
+A float tensor that represents the probabilities of selecting each document +in the slate. +
diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_greedy_q.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_greedy_q.md index 4fd726b..5c49e2c 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_greedy_q.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_greedy_q.md @@ -5,45 +5,90 @@ # recsim.agents.slate_decomp_q_agent.compute_target_greedy_q - + +
View source - Computes the optimal target Q value with the adaptive greedy algorithm. -```python -recsim.agents.slate_decomp_q_agent.compute_target_greedy_q( - reward, - gamma, - next_actions, - next_q_values, - next_states, - terminals + This algorithm corresponds to the method "GT" in Ie et al. https://arxiv.org/abs/1905.12767.. -#### Args: + + + + + -* `reward`: [batch_size] tensor, the immediate reward. -* `gamma`: float, discount factor with the usual RL meaning. -* `next_actions`: [batch_size, slate_size] tensor, the next slate. -* `next_q_values`: [batch_size, num_of_documents] tensor, the q values - of the documents in the next step. -* `next_states`: [batch_size, 1 + num_of_documents] tensor, the - features for the user and the docuemnts in the next step. -* `terminals`: [batch_size] tensor, indicating if this is a terminal - step. + + + + + + + + + + + + + + + + + + + +
+`reward` + +[batch_size] tensor, the immediate reward. +
+`gamma` + +float, discount factor with the usual RL meaning. +
+`next_actions` + +[batch_size, slate_size] tensor, the next slate. +
+`next_q_values` + +[batch_size, num_of_documents] tensor, the q values of the +documents in the next step. +
+`next_states` + +[batch_size, 1 + num_of_documents] tensor, the features for the +user and the docuemnts in the next step. +
+`terminals` + +[batch_size] tensor, indicating if this is a terminal step. +
-#### Returns: + + + + + + + + +
[batch_size] tensor, the target q values. +
diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_optimal_q.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_optimal_q.md index c98c69d..ca87973 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_optimal_q.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_optimal_q.md @@ -5,45 +5,90 @@ # recsim.agents.slate_decomp_q_agent.compute_target_optimal_q - + +
View source - Builds an op used as a target for the Q-value. -```python -recsim.agents.slate_decomp_q_agent.compute_target_optimal_q( - reward, - gamma, - next_actions, - next_q_values, - next_states, - terminals + This algorithm corresponds to the method "OT" in Ie et al. https://arxiv.org/abs/1905.12767.. -#### Args: + + + + + -* `reward`: [batch_size] tensor, the immediate reward. -* `gamma`: float, discount factor with the usual RL meaning. -* `next_actions`: [batch_size, slate_size] tensor, the next slate. -* `next_q_values`: [batch_size, num_of_documents] tensor, the q values - of the documents in the next step. -* `next_states`: [batch_size, 1 + num_of_documents] tensor, the - features for the user and the docuemnts in the next step. -* `terminals`: [batch_size] tensor, indicating if this is a terminal - step. + + + + + + + + + + + + + + + + + + + +
+`reward` + +[batch_size] tensor, the immediate reward. +
+`gamma` + +float, discount factor with the usual RL meaning. +
+`next_actions` + +[batch_size, slate_size] tensor, the next slate. +
+`next_q_values` + +[batch_size, num_of_documents] tensor, the q values of the +documents in the next step. +
+`next_states` + +[batch_size, 1 + num_of_documents] tensor, the features for the +user and the docuemnts in the next step. +
+`terminals` + +[batch_size] tensor, indicating if this is a terminal step. +
-#### Returns: + + + + + + + + +
[batch_size] tensor, the target q values. +
diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_sarsa.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_sarsa.md index 8ab69fc..61c5e4d 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_sarsa.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_sarsa.md @@ -5,42 +5,87 @@ # recsim.agents.slate_decomp_q_agent.compute_target_sarsa - + +
View source - Computes the SARSA target Q value. -```python -recsim.agents.slate_decomp_q_agent.compute_target_sarsa( - reward, - gamma, - next_actions, - next_q_values, - next_states, - terminals + -#### Args: + + + + + -* `reward`: [batch_size] tensor, the immediate reward. -* `gamma`: float, discount factor with the usual RL meaning. -* `next_actions`: [batch_size, slate_size] tensor, the next slate. -* `next_q_values`: [batch_size, num_of_documents] tensor, the q values - of the documents in the next step. -* `next_states`: [batch_size, 1 + num_of_documents] tensor, the - features for the user and the docuemnts in the next step. -* `terminals`: [batch_size] tensor, indicating if this is a terminal - step. + + + + + + + + + + + + + + + + + + + +
+`reward` + +[batch_size] tensor, the immediate reward. +
+`gamma` + +float, discount factor with the usual RL meaning. +
+`next_actions` + +[batch_size, slate_size] tensor, the next slate. +
+`next_q_values` + +[batch_size, num_of_documents] tensor, the q values of the +documents in the next step. +
+`next_states` + +[batch_size, 1 + num_of_documents] tensor, the features for the +user and the docuemnts in the next step. +
+`terminals` + +[batch_size] tensor, indicating if this is a terminal step. +
-#### Returns: + + + + + + + + +
[batch_size] tensor, the target q values. +
diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_topk_q.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_topk_q.md index f3fb44b..703429b 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_topk_q.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/compute_target_topk_q.md @@ -5,45 +5,90 @@ # recsim.agents.slate_decomp_q_agent.compute_target_topk_q - + +
View source - Computes the optimal target Q value with the greedy algorithm. -```python -recsim.agents.slate_decomp_q_agent.compute_target_topk_q( - reward, - gamma, - next_actions, - next_q_values, - next_states, - terminals + This algorithm corresponds to the method "TT" in Ie et al. https://arxiv.org/abs/1905.12767. -#### Args: + + + + + -* `reward`: [batch_size] tensor, the immediate reward. -* `gamma`: float, discount factor with the usual RL meaning. -* `next_actions`: [batch_size, slate_size] tensor, the next slate. -* `next_q_values`: [batch_size, num_of_documents] tensor, the q values - of the documents in the next step. -* `next_states`: [batch_size, 1 + num_of_documents] tensor, the - features for the user and the docuemnts in the next step. -* `terminals`: [batch_size] tensor, indicating if this is a terminal - step. + + + + + + + + + + + + + + + + + + + +
+`reward` + +[batch_size] tensor, the immediate reward. +
+`gamma` + +float, discount factor with the usual RL meaning. +
+`next_actions` + +[batch_size, slate_size] tensor, the next slate. +
+`next_q_values` + +[batch_size, num_of_documents] tensor, the q values of the +documents in the next step. +
+`next_states` + +[batch_size, 1 + num_of_documents] tensor, the features for the +user and the docuemnts in the next step. +
+`terminals` + +[batch_size] tensor, indicating if this is a terminal step. +
-#### Returns: + + + + + + + + +
[batch_size] tensor, the target q values. +
diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/create_agent.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/create_agent.md index 3cfc886..22d7ed1 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/create_agent.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/create_agent.md @@ -5,24 +5,21 @@ # recsim.agents.slate_decomp_q_agent.create_agent - + +
View source - - Creates a slate decomposition agent given agent name. -```python -recsim.agents.slate_decomp_q_agent.create_agent( - agent_name, - sess, - **kwargs + diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/score_documents.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/score_documents.md index 919e770..62dc055 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/score_documents.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/score_documents.md @@ -5,44 +5,84 @@ # recsim.agents.slate_decomp_q_agent.score_documents - + +
View source - Computes unnormalized scores given both user and document observations. -```python -recsim.agents.slate_decomp_q_agent.score_documents( - user_obs, - doc_obs, - no_click_mass=1.0, - is_mnl=False, - min_normalizer=-1.0 + Similar to score_documents_tf but works on NumPy objects. -#### Args: + + + + + -* `user_obs`: An instance of AbstractUserState. -* `doc_obs`: A numpy array that represents the observation of all - documents in the candidate set. -* `no_click_mass`: a float indicating the mass given to a no click - option -* `is_mnl`: whether to use a multinomial logit model instead of a - multinomial proportional model. -* `min_normalizer`: A float (<= 0) used to offset the scores to be - positive when using multinomial proportional model. + + + + + + + + + + + + + + + + +
+`user_obs` + +An instance of AbstractUserState. +
+`doc_obs` + +A numpy array that represents the observation of all documents in +the candidate set. +
+`no_click_mass` + +a float indicating the mass given to a no click option +
+`is_mnl` + +whether to use a multinomial logit model instead of a multinomial +proportional model. +
+`min_normalizer` + +A float (<= 0) used to offset the scores to be positive when +using multinomial proportional model. +
-#### Returns: + -A float array that stores unnormalzied scores of documents and a float number -that represents the score for the action of picking no document. + + + + + + + +
+A float array that stores unnormalzied scores of documents and a float +number that represents the score for the action of picking no document. +
diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/score_documents_tf.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/score_documents_tf.md index 2d8804e..2fbbd80 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/score_documents_tf.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/score_documents_tf.md @@ -5,26 +5,22 @@ # recsim.agents.slate_decomp_q_agent.score_documents_tf - + +
View source - Computes unnormalized scores given both user and document observations. -```python -recsim.agents.slate_decomp_q_agent.score_documents_tf( - user_obs, - doc_obs, - no_click_mass=1.0, - is_mnl=False, - min_normalizer=-1.0 + @@ -32,19 +28,63 @@ This implements both multinomial proportional model and multinormial logit model given some parameters. We also assume scores are based on inner products of user_obs and doc_obs. -#### Args: + + + + + -* `user_obs`: An instance of AbstractUserState. -* `doc_obs`: A numpy array that represents the observation of all - documents in the candidate set. -* `no_click_mass`: a float indicating the mass given to a no click - option -* `is_mnl`: whether to use a multinomial logit model instead of a - multinomial proportional model. -* `min_normalizer`: A float (<= 0) used to offset the scores to be - positive when using multinomial proportional model. + + + + + + + + + + + + + + + + +
+`user_obs` + +An instance of AbstractUserState. +
+`doc_obs` + +A numpy array that represents the observation of all documents in +the candidate set. +
+`no_click_mass` + +a float indicating the mass given to a no click option +
+`is_mnl` + +whether to use a multinomial logit model instead of a multinomial +proportional model. +
+`min_normalizer` + +A float (<= 0) used to offset the scores to be positive when +using multinomial proportional model. +
-#### Returns: + -A float tensor that stores unnormalzied scores of documents and a float tensor -that represents the score for the action of picking no document. + + + + + + + +
+A float tensor that stores unnormalzied scores of documents and a float +tensor that represents the score for the action of picking no document. +
diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/select_slate_greedy.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/select_slate_greedy.md index ec780f8..8b7679c 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/select_slate_greedy.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/select_slate_greedy.md @@ -5,38 +5,74 @@ # recsim.agents.slate_decomp_q_agent.select_slate_greedy - + +
View source - Selects the slate using the adaptive greedy algorithm. -```python -recsim.agents.slate_decomp_q_agent.select_slate_greedy( - slate_size, - s_no_click, - s, - q + This algorithm corresponds to the method "GS" in Ie et al. https://arxiv.org/abs/1905.12767. -#### Args: + + + + + -* `slate_size`: int, the size of the recommendation slate. -* `s_no_click`: float tensor, the score for not clicking any document. -* `s`: [num_of_documents] tensor, the scores for clicking documents. -* `q`: [num_of_documents] tensor, the predicted q values for documents. + + + + + + + + + + + + + +
+`slate_size` + +int, the size of the recommendation slate. +
+`s_no_click` + +float tensor, the score for not clicking any document. +
+`s` + +[num_of_documents] tensor, the scores for clicking documents. +
+`q` + +[num_of_documents] tensor, the predicted q values for documents. +
-#### Returns: + + + + + + + + +
[slate_size] tensor, the selected slate. +
diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/select_slate_optimal.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/select_slate_optimal.md index 84992bf..59aa687 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/select_slate_optimal.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/select_slate_optimal.md @@ -5,38 +5,74 @@ # recsim.agents.slate_decomp_q_agent.select_slate_optimal - + +
View source - Selects the slate using exhaustive search. -```python -recsim.agents.slate_decomp_q_agent.select_slate_optimal( - slate_size, - s_no_click, - s, - q + This algorithm corresponds to the method "OS" in Ie et al. https://arxiv.org/abs/1905.12767. -#### Args: + + + + + -* `slate_size`: int, the size of the recommendation slate. -* `s_no_click`: float tensor, the score for not clicking any document. -* `s`: [num_of_documents] tensor, the scores for clicking documents. -* `q`: [num_of_documents] tensor, the predicted q values for documents. + + + + + + + + + + + + + +
+`slate_size` + +int, the size of the recommendation slate. +
+`s_no_click` + +float tensor, the score for not clicking any document. +
+`s` + +[num_of_documents] tensor, the scores for clicking documents. +
+`q` + +[num_of_documents] tensor, the predicted q values for documents. +
-#### Returns: + + + + + + + + +
[slate_size] tensor, the selected slate. +
diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/select_slate_topk.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/select_slate_topk.md index 7885a6c..1ff6d55 100644 --- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/select_slate_topk.md +++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/select_slate_topk.md @@ -5,38 +5,74 @@ # recsim.agents.slate_decomp_q_agent.select_slate_topk - + +
View source - Selects the slate using the top-K algorithm. -```python -recsim.agents.slate_decomp_q_agent.select_slate_topk( - slate_size, - s_no_click, - s, - q + This algorithm corresponds to the method "TS" in Ie et al. https://arxiv.org/abs/1905.12767. -#### Args: + + + + + -* `slate_size`: int, the size of the recommendation slate. -* `s_no_click`: float tensor, the score for not clicking any document. -* `s`: [num_of_documents] tensor, the scores for clicking documents. -* `q`: [num_of_documents] tensor, the predicted q values for documents. + + + + + + + + + + + + + +
+`slate_size` + +int, the size of the recommendation slate. +
+`s_no_click` + +float tensor, the score for not clicking any document. +
+`s` + +[num_of_documents] tensor, the scores for clicking documents. +
+`q` + +[num_of_documents] tensor, the predicted q values for documents. +
-#### Returns: + + + + + + + + +
[slate_size] tensor, the selected slate. +
diff --git a/docs/api_docs/python/recsim/agents/tabular_q_agent.md b/docs/api_docs/python/recsim/agents/tabular_q_agent.md index 261c21e..4f136df 100644 --- a/docs/api_docs/python/recsim/agents/tabular_q_agent.md +++ b/docs/api_docs/python/recsim/agents/tabular_q_agent.md @@ -5,7 +5,10 @@ # Module: recsim.agents.tabular_q_agent + + +
View diff --git a/docs/api_docs/python/recsim/agents/tabular_q_agent/TabularQAgent.md b/docs/api_docs/python/recsim/agents/tabular_q_agent/TabularQAgent.md index 714cdae..ead99ec 100644 --- a/docs/api_docs/python/recsim/agents/tabular_q_agent/TabularQAgent.md +++ b/docs/api_docs/python/recsim/agents/tabular_q_agent/TabularQAgent.md @@ -1,7 +1,6 @@
- @@ -12,22 +11,29 @@ # recsim.agents.tabular_q_agent.TabularQAgent - + +
View source -## Class `TabularQAgent` - - Tabular Q-learning agent with universal function approximation. Inherits From: [`AbstractEpisodicRecommenderAgent`](../../../recsim/agent/AbstractEpisodicRecommenderAgent.md) + + This agent provides a tabular implementation of the Q-learning algorithm. To @@ -46,63 +52,130 @@ Q-function. Producing ground truth Q-functions is the main intended use of this agent, since discretization is prohibitively expensive in high-dimensional environments. -

__init__

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+`observation_space` + +a gym.spaces object specifying the format of +observations. +
+`action_space` + +a gym.spaces object that specifies the format of actions. +
+`eval_mode` + +Boolean indicating whether the agent is in training or eval +mode. +
+`ignore_response` + +Boolean indicating whether the agent should ignore the +response part of the observation. +
+`discretization_bounds` + +pair of real numbers indicating the min and max +value for continuous attributes discretization. Values below the min +will all be grouped in the first bin, while values above the max will +all be grouped in the last bin. See the documentation of numpy.digitize +for further details. +
+`number_bins` + +positive integer number of bins used to discretize continuous +attributes. +
+`exploration_policy` + +either one of ['epsilon_greedy', 'min_count'] or a +custom function. +function. +
+`exploration_temperature` + +a real number passed as parameter to the +exploration policy. +
+`learning_rate` + +a real number between 0 and 1 indicating how much to update +Q-values, i.e. Q_t+1(s,a) = (1 - learning_rate) * Q_t(s, a) ++ learning_rate * (R(s,a) + ...). +
+`gamma` + +real value between 0 and 1 indicating the discount factor of the +MDP. +
+`ordinal_slates` + +boolean indicating whether slate ordering matters, e.g. +whether the slates (1, 2) and (2, 1) should be considered different +actions. Using ordinal slates increases complexity factorially. +
+`**kwargs` + +additional arguments like eval_mode. +
-View -source + -```python -__init__( - observation_space, - action_space, - eval_mode=False, - ignore_response=True, - discretization_bounds=(0.0, 10.0), - number_bins=100, - exploration_policy='epsilon_greedy', - exploration_temperature=0.99, - learning_rate=0.1, - gamma=0.99, - **kwargs -) -``` - -TabularQAgent init. - -#### Args: - -* `observation_space`: a gym.spaces object specifying the format of - observations. -* `action_space`: a gym.spaces object that specifies the format of - actions. -* `eval_mode`: Boolean indicating whether the agent is in training or - eval mode. -* `ignore_response`: Boolean indicating whether the agent should ignore - the response part of the observation. -* `discretization_bounds`: pair of real numbers indicating the min and - max value for continuous attributes discretization. Values below the min - will all be grouped in the first bin, while values above the max will all be - grouped in the last bin. See the documentation of numpy.digitize for further - details. -* `number_bins`: positive integer number of bins used to discretize - continuous attributes. -* `exploration_policy`: either one of ['epsilon_greedy', 'min_count'] - or a custom function. TODO(mmladenov): formalize requirements of this - function. -* `exploration_temperature`: a real number passed as parameter to the - exploration policy. -* `learning_rate`: a real number between 0 and 1 indicating how much to - update Q-values, i.e. Q_t+1(s,a) = (1 - learning_rate) * Q_t(s, a) + - learning_rate * (R(s,a) + ...). -* `gamma`: real value between 0 and 1 indicating the discount factor of - the MDP. -* `**kwargs`: additional arguments like eval_mode. - -## Properties - -

multi_user

+ + + + + + + +
+`multi_user` + Returns boolean indicating whether this agent serves multiple users. +
## Methods @@ -111,130 +184,271 @@ Returns boolean indicating whether this agent serves multiple users. View source -```python -begin_episode(observation=None) -``` + Returns the agent's first action for this episode. -#### Args: + -* `observation`: numpy array, the environment's initial observation. + + + -#### Returns: + + + + +
Args
+`observation` + +numpy array, the environment's initial observation. +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +

bundle_and_checkpoint

View source -```python -bundle_and_checkpoint( - checkpoint_dir, - iteration_number + Returns a self-contained bundle of the agent's state. -#### Args: + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string for the directory where objects will be saved. +
+`iteration_number` + +An integer of iteration number to use for naming the +checkpoint file. +
-* `checkpoint_dir`: A string for the directory where objects will be - saved. -* `iteration_number`: An integer of iteration number to use for naming - the checkpoint file. + -#### Returns: + + + + + + -A dictionary containing additional Python objects to be checkpointed by the -experiment. Each key is a string for the object name and the value is actual -object. If the checkpoint directory does not exist, returns empty dictionary. +
Returns
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. +

end_episode

View source -```python -end_episode( - reward, - observation + Signals the end of the episode to the agent. -#### Args: - -* `reward`: An float that is the last reward from the environment. -* `observation`: numpy array that represents the last observation of - the episode. + + + + + + + + + + + + + +
Args
+`reward` + +An float that is the last reward from the environment. +
+`observation` + +numpy array that represents the last observation of the +episode. +

step

View source -```python -step( - reward, - observation + Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations and should have the following fields: - - user: A NumPy array representing user's observed state. Assumes it is a - concatenation of topic pull counts and topic click counts. - - doc: A NumPy array representing observations of document features. - Assumes it is a concatenation of one-hot encoding of topic_id and - document quality. + + + + + + + + + + + + + +
Args
+`reward` + +The reward received from the agent's most recent action as a +float. +
+`observation` + +A dictionary that includes the most recent observations and +should have the following fields: +- user: A NumPy array representing user's observed state. Assumes it is +a concatenation of topic pull counts and topic click counts. +- doc: A NumPy array representing observations of document features. +Assumes it is a concatenation of one-hot encoding of topic_id and +document quality. +
-#### Returns: + + + + + + + + + + +
Returns
+`slate` + +An integer array of size _slate_size, where each element is an +index into the list of doc_obs +
-* `slate`: An integer array of size _slate_size, where each element is - an index into the list of doc_obs + -#### Raises: + + + -* `ValueError`: if reward is not in [0, 1]. + + + + +
Raises
+`ValueError` + +if reward is not in [0, 1]. +

unbundle

View source -```python -unbundle( - checkpoint_dir, - iteration_number, - bundle_dict + Restores the agent from a checkpoint. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - saved by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + + + + + + + + + + + + + + + +
Args
+`checkpoint_dir` + +A string that represents the path to the checkpoint saved +by tf.Save. +
+`iteration_number` + +An integer that represents the checkpoint version and is +used when restoring replay buffer. +
+`bundle_dict` + +A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. +
-#### Returns: + + + + + + + + +
Returns
bool, True if unbundling was successful. +
diff --git a/docs/api_docs/python/recsim/choice_model.md b/docs/api_docs/python/recsim/choice_model.md index 409b650..ca55c75 100644 --- a/docs/api_docs/python/recsim/choice_model.md +++ b/docs/api_docs/python/recsim/choice_model.md @@ -5,7 +5,10 @@ # Module: recsim.choice_model + + +
View diff --git a/docs/api_docs/python/recsim/choice_model/AbstractChoiceModel.md b/docs/api_docs/python/recsim/choice_model/AbstractChoiceModel.md index afe8d62..20a3953 100644 --- a/docs/api_docs/python/recsim/choice_model/AbstractChoiceModel.md +++ b/docs/api_docs/python/recsim/choice_model/AbstractChoiceModel.md @@ -1,36 +1,40 @@
- -
# recsim.choice_model.AbstractChoiceModel - + +
View source -## Class `AbstractChoiceModel` - - Abstract class to represent the user choice model. Each user has a choice model. -## Properties + + + + + -

score_no_click

+ + +
`score_no_click` -

scores

+
`scores` + +
## Methods @@ -39,39 +43,88 @@ Each user has a choice model. View source -```python -choose_item() -``` + Returns selected index of document in the slate. -#### Returns: - -* `selected_index`: a integer indicating which item was chosen, or None - if none were selected. + + + + + + + + + + +
Returns
+`selected_index` + +a integer indicating which item was chosen, or None if +none were selected. +

score_documents

View source -```python -score_documents( - user_state, - doc_obs + Computes unnormalized scores of documents in the slate given user state. -#### Args: - -* `user_state`: An instance of AbstractUserState. -* `doc_obs`: A numpy array that represents the observation of all - documents in the slate. - -#### Attributes: + + + + + + + + + + + + + +
Args
+`user_state` + +An instance of AbstractUserState. +
+`doc_obs` + +A numpy array that represents the observation of all documents in +the slate. +
-* `scores`: A numpy array that stores the scores of all documents. -* `score_no_click`: A float that represents the score for the action of - picking no document. + + + + + + + + + + + + + +
Attributes
+`scores` + +A numpy array that stores the scores of all documents. +
+`score_no_click` + +A float that represents the score for the action of +picking no document. +
diff --git a/docs/api_docs/python/recsim/choice_model/CascadeChoiceModel.md b/docs/api_docs/python/recsim/choice_model/CascadeChoiceModel.md index be30760..b5b308b 100644 --- a/docs/api_docs/python/recsim/choice_model/CascadeChoiceModel.md +++ b/docs/api_docs/python/recsim/choice_model/CascadeChoiceModel.md @@ -1,8 +1,6 @@
- - @@ -10,52 +8,61 @@ # recsim.choice_model.CascadeChoiceModel - + +
View source -## Class `CascadeChoiceModel` - - The base class for cascade choice models. Inherits From: [`NormalizableChoiceModel`](../../recsim/choice_model/NormalizableChoiceModel.md) - - -#### Attributes: - -* `attention_prob`: The probability of examining a document i given - document i - 1 not clicked. -* `score_scaling`: A multiplicative factor to convert score of document - i to the click probability of examined document i. + -#### Raises: + -* `ValueError`: if either attention_prob or base_attention_prob is - invalid. + -

__init__

+ + + -View -source + + + + +
+`ValueError` + +if either attention_prob or base_attention_prob is invalid. +
-```python -__init__(choice_features) -``` + -Initialize self. See help(type(self)) for accurate signature. + + + -## Properties + + + + +
`attention_prob` The probability of examining a document i +given document i - 1 not clicked.
`score_scaling` A multiplicative factor to convert score of document i to the click +probability of examined document i.
`score_no_click` -

score_no_click

+
`scores` -

scores

+
## Methods @@ -64,39 +71,87 @@ Initialize self. See help(type(self)) for accurate signature. View source -```python -choose_item() -``` + Returns selected index of document in the slate. -#### Returns: - -* `selected_index`: a integer indicating which item was chosen, or None - if none were selected. + + + + + + + + + + +
Returns
+`selected_index` + +a integer indicating which item was chosen, or None if +none were selected. +

score_documents

View source -```python -score_documents( - user_state, - doc_obs + Computes unnormalized scores of documents in the slate given user state. -#### Args: - -* `user_state`: An instance of AbstractUserState. -* `doc_obs`: A numpy array that represents the observation of all - documents in the slate. - -#### Attributes: + + + + + + + + + + + + + +
Args
+`user_state` + +An instance of AbstractUserState. +
+`doc_obs` + +A numpy array that represents the observation of all documents in +the slate. +
-* `scores`: A numpy array that stores the scores of all documents. -* `score_no_click`: A float that represents the score for the action of - picking no document. + + + + + + + + + + + + + +
Attributes
+`scores` + +A numpy array that stores the scores of all documents. +
+`score_no_click` + +A float that represents the score for the action of +picking no document. +
diff --git a/docs/api_docs/python/recsim/choice_model/ExponentialCascadeChoiceModel.md b/docs/api_docs/python/recsim/choice_model/ExponentialCascadeChoiceModel.md index 78b5241..77243cf 100644 --- a/docs/api_docs/python/recsim/choice_model/ExponentialCascadeChoiceModel.md +++ b/docs/api_docs/python/recsim/choice_model/ExponentialCascadeChoiceModel.md @@ -1,8 +1,6 @@
- - @@ -10,44 +8,45 @@ # recsim.choice_model.ExponentialCascadeChoiceModel - + +
View source -## Class `ExponentialCascadeChoiceModel` - - An exponential cascade choice model. Inherits From: [`CascadeChoiceModel`](../../recsim/choice_model/CascadeChoiceModel.md) + + Clicks the item at position i according to p(i) = attention_prob * score_scaling * exp(score(i)) by going through the slate in order, and stopping once an item has been clicked. -

__init__

- -View -source - -```python -__init__(choice_features) -``` + -Initialize self. See help(type(self)) for accurate signature. + + + -## Properties + + +
`score_no_click` -

score_no_click

+
`scores` -

scores

+
## Methods @@ -56,39 +55,86 @@ Initialize self. See help(type(self)) for accurate signature. View source -```python -choose_item() -``` + Returns selected index of document in the slate. -#### Returns: - -* `selected_index`: a integer indicating which item was chosen, or None - if none were selected. + + + + + + + + + + +
Returns
+`selected_index` + +a integer indicating which item was chosen, or None if +none were selected. +

score_documents

View source -```python -score_documents( - user_state, - doc_obs + Computes unnormalized scores of documents in the slate given user state. -#### Args: - -* `user_state`: An instance of AbstractUserState. -* `doc_obs`: A numpy array that represents the observation of all - documents in the slate. - -#### Attributes: + + + + + + + + + + + + + +
Args
+`user_state` + +An instance of AbstractUserState. +
+`doc_obs` + +A numpy array that represents the observation of all documents in +the slate. +
-* `scores`: A numpy array that stores the scores of all documents. -* `score_no_click`: A float that represents the score for the action of - picking no document. + + + + + + + + + + + + + +
Attributes
+`scores` + +A numpy array that stores the scores of all documents. +
+`score_no_click` + +A float that represents the score for the action of +picking no document. +
diff --git a/docs/api_docs/python/recsim/choice_model/MultinomialLogitChoiceModel.md b/docs/api_docs/python/recsim/choice_model/MultinomialLogitChoiceModel.md index 1741a4e..547f5aa 100644 --- a/docs/api_docs/python/recsim/choice_model/MultinomialLogitChoiceModel.md +++ b/docs/api_docs/python/recsim/choice_model/MultinomialLogitChoiceModel.md @@ -1,8 +1,6 @@
- - @@ -10,48 +8,60 @@ # recsim.choice_model.MultinomialLogitChoiceModel - + +
View source -## Class `MultinomialLogitChoiceModel` - - A multinomial logit choice model. Inherits From: [`NormalizableChoiceModel`](../../recsim/choice_model/NormalizableChoiceModel.md) + + Samples item x in scores according to p(x) = exp(x) / Sum_{y in scores} exp(y) -#### Args: - -* `choice_features`: a dict that stores the features used in choice - model: `no_click_mass`: a float indicating the mass given to a no click - option. - -

__init__

- -View -source + + + + + + + + + + +
+`choice_features` + +a dict that stores the features used in choice model: +`no_click_mass`: a float indicating the mass given to a no click option. +
-```python -__init__(choice_features) -``` + -Initialize self. See help(type(self)) for accurate signature. + + + -## Properties + + +
`score_no_click` -

score_no_click

+
`scores` -

scores

+
## Methods @@ -60,39 +70,86 @@ Initialize self. See help(type(self)) for accurate signature. View source -```python -choose_item() -``` + Returns selected index of document in the slate. -#### Returns: - -* `selected_index`: a integer indicating which item was chosen, or None - if none were selected. + + + + + + + + + + +
Returns
+`selected_index` + +a integer indicating which item was chosen, or None if +none were selected. +

score_documents

View source -```python -score_documents( - user_state, - doc_obs + Computes unnormalized scores of documents in the slate given user state. -#### Args: - -* `user_state`: An instance of AbstractUserState. -* `doc_obs`: A numpy array that represents the observation of all - documents in the slate. - -#### Attributes: + + + + + + + + + + + + + +
Args
+`user_state` + +An instance of AbstractUserState. +
+`doc_obs` + +A numpy array that represents the observation of all documents in +the slate. +
-* `scores`: A numpy array that stores the scores of all documents. -* `score_no_click`: A float that represents the score for the action of - picking no document. + + + + + + + + + + + + + +
Attributes
+`scores` + +A numpy array that stores the scores of all documents. +
+`score_no_click` + +A float that represents the score for the action of +picking no document. +
diff --git a/docs/api_docs/python/recsim/choice_model/MultinomialProportionalChoiceModel.md b/docs/api_docs/python/recsim/choice_model/MultinomialProportionalChoiceModel.md index c279910..cbcc123 100644 --- a/docs/api_docs/python/recsim/choice_model/MultinomialProportionalChoiceModel.md +++ b/docs/api_docs/python/recsim/choice_model/MultinomialProportionalChoiceModel.md @@ -1,8 +1,6 @@
- - @@ -10,52 +8,49 @@ # recsim.choice_model.MultinomialProportionalChoiceModel - + +
View source -## Class `MultinomialProportionalChoiceModel` - - A multinomial proportional choice function. Inherits From: [`NormalizableChoiceModel`](../../recsim/choice_model/NormalizableChoiceModel.md) + + Samples item x in scores according to p(x) = x - min_normalizer / sum(x - min_normalizer) -#### Attributes: - -* `min_normalizer`: A float (<= 0) used to offset the scores to be - positive. Specifically, if the scores have negative elements, then they do - not form a valid probability distribution for sampling. Subtracting the - least expected element is one heuristic for normalization. -* `no_click_mass`: An optional float indicating the mass given to a no - click option - -

__init__

- -View -source + -```python -__init__(choice_features) -``` + + + -Initialize self. See help(type(self)) for accurate signature. + + +
`min_normalizer` A float (<= 0) used to offset the scores +to be positive. Specifically, if the scores have negative elements, then they do +not form a valid probability distribution for sampling. Subtracting the least +expected element is one heuristic for normalization.
+`no_click_mass` An optional float indicating the mass given to a no +click option
`score_no_click` -## Properties +
`scores` -

score_no_click

- -

scores

+
## Methods @@ -64,39 +59,86 @@ Initialize self. See help(type(self)) for accurate signature. View source -```python -choose_item() -``` + Returns selected index of document in the slate. -#### Returns: - -* `selected_index`: a integer indicating which item was chosen, or None - if none were selected. + + + + + + + + + + +
Returns
+`selected_index` + +a integer indicating which item was chosen, or None if +none were selected. +

score_documents

View source -```python -score_documents( - user_state, - doc_obs + Computes unnormalized scores of documents in the slate given user state. -#### Args: - -* `user_state`: An instance of AbstractUserState. -* `doc_obs`: A numpy array that represents the observation of all - documents in the slate. - -#### Attributes: + + + + + + + + + + + + + +
Args
+`user_state` + +An instance of AbstractUserState. +
+`doc_obs` + +A numpy array that represents the observation of all documents in +the slate. +
-* `scores`: A numpy array that stores the scores of all documents. -* `score_no_click`: A float that represents the score for the action of - picking no document. + + + + + + + + + + + + + +
Attributes
+`scores` + +A numpy array that stores the scores of all documents. +
+`score_no_click` + +A float that represents the score for the action of +picking no document. +
diff --git a/docs/api_docs/python/recsim/choice_model/NormalizableChoiceModel.md b/docs/api_docs/python/recsim/choice_model/NormalizableChoiceModel.md index 1c10121..0ca2d61 100644 --- a/docs/api_docs/python/recsim/choice_model/NormalizableChoiceModel.md +++ b/docs/api_docs/python/recsim/choice_model/NormalizableChoiceModel.md @@ -1,25 +1,21 @@
- -
# recsim.choice_model.NormalizableChoiceModel - + +
View source -## Class `NormalizableChoiceModel` - - A normalizable choice model. Inherits From: @@ -27,11 +23,19 @@ Inherits From: -## Properties + + + + + -

score_no_click

+ + +
`score_no_click` -

scores

+
`scores` + +
## Methods @@ -40,39 +44,87 @@ Inherits From: View source -```python -choose_item() -``` + Returns selected index of document in the slate. -#### Returns: - -* `selected_index`: a integer indicating which item was chosen, or None - if none were selected. + + + + + + + + + + +
Returns
+`selected_index` + +a integer indicating which item was chosen, or None if +none were selected. +

score_documents

View source -```python -score_documents( - user_state, - doc_obs + Computes unnormalized scores of documents in the slate given user state. -#### Args: - -* `user_state`: An instance of AbstractUserState. -* `doc_obs`: A numpy array that represents the observation of all - documents in the slate. - -#### Attributes: + + + + + + + + + + + + + +
Args
+`user_state` + +An instance of AbstractUserState. +
+`doc_obs` + +A numpy array that represents the observation of all documents in +the slate. +
-* `scores`: A numpy array that stores the scores of all documents. -* `score_no_click`: A float that represents the score for the action of - picking no document. + + + + + + + + + + + + + +
Attributes
+`scores` + +A numpy array that stores the scores of all documents. +
+`score_no_click` + +A float that represents the score for the action of +picking no document. +
diff --git a/docs/api_docs/python/recsim/choice_model/ProportionalCascadeChoiceModel.md b/docs/api_docs/python/recsim/choice_model/ProportionalCascadeChoiceModel.md index 651c8ed..5de0f24 100644 --- a/docs/api_docs/python/recsim/choice_model/ProportionalCascadeChoiceModel.md +++ b/docs/api_docs/python/recsim/choice_model/ProportionalCascadeChoiceModel.md @@ -1,8 +1,6 @@
- - @@ -10,44 +8,45 @@ # recsim.choice_model.ProportionalCascadeChoiceModel - + +
View source -## Class `ProportionalCascadeChoiceModel` - - A proportional cascade choice model. Inherits From: [`CascadeChoiceModel`](../../recsim/choice_model/CascadeChoiceModel.md) + + Clicks the item at position i according to attention_prob * score_scaling * (score(i) - min_normalizer) by going through the slate in order, and stopping once an item has been clicked. -

__init__

- -View -source - -```python -__init__(choice_features) -``` + -Initialize self. See help(type(self)) for accurate signature. + + + -## Properties + + +
`score_no_click` -

score_no_click

+
`scores` -

scores

+
## Methods @@ -56,39 +55,86 @@ Initialize self. See help(type(self)) for accurate signature. View source -```python -choose_item() -``` + Returns selected index of document in the slate. -#### Returns: - -* `selected_index`: a integer indicating which item was chosen, or None - if none were selected. + + + + + + + + + + +
Returns
+`selected_index` + +a integer indicating which item was chosen, or None if +none were selected. +

score_documents

View source -```python -score_documents( - user_state, - doc_obs + Computes unnormalized scores of documents in the slate given user state. -#### Args: - -* `user_state`: An instance of AbstractUserState. -* `doc_obs`: A numpy array that represents the observation of all - documents in the slate. - -#### Attributes: + + + + + + + + + + + + + +
Args
+`user_state` + +An instance of AbstractUserState. +
+`doc_obs` + +A numpy array that represents the observation of all documents in +the slate. +
-* `scores`: A numpy array that stores the scores of all documents. -* `score_no_click`: A float that represents the score for the action of - picking no document. + + + + + + + + + + + + + +
Attributes
+`scores` + +A numpy array that stores the scores of all documents. +
+`score_no_click` + +A float that represents the score for the action of +picking no document. +
diff --git a/docs/api_docs/python/recsim/choice_model/softmax.md b/docs/api_docs/python/recsim/choice_model/softmax.md index 69b5b65..e2d198e 100644 --- a/docs/api_docs/python/recsim/choice_model/softmax.md +++ b/docs/api_docs/python/recsim/choice_model/softmax.md @@ -5,20 +5,21 @@ # recsim.choice_model.softmax - + +
View source - - Computes the softmax of a vector. -```python -recsim.choice_model.softmax(vector) -``` + diff --git a/docs/api_docs/python/recsim/document.md b/docs/api_docs/python/recsim/document.md index e5cbf8d..d65332b 100644 --- a/docs/api_docs/python/recsim/document.md +++ b/docs/api_docs/python/recsim/document.md @@ -5,7 +5,10 @@ # Module: recsim.document + + +
View diff --git a/docs/api_docs/python/recsim/document/AbstractDocument.md b/docs/api_docs/python/recsim/document/AbstractDocument.md index e2d1aae..241da81 100644 --- a/docs/api_docs/python/recsim/document/AbstractDocument.md +++ b/docs/api_docs/python/recsim/document/AbstractDocument.md @@ -5,35 +5,29 @@ +
# recsim.document.AbstractDocument - + +
View source -## Class `AbstractDocument` - - Abstract class to represent a document and its properties. - - -

__init__

+ -View -source - -```python -__init__(doc_id) -``` - -Initialize self. See help(type(self)) for accurate signature. + ## Methods @@ -42,9 +36,10 @@ Initialize self. See help(type(self)) for accurate signature. View source -```python -create_observation() -``` + Returns observable properties of this document as a float array. @@ -53,9 +48,9 @@ Returns observable properties of this document as a float array. View source -```python -doc_id() -``` + Returns the document ID. @@ -64,9 +59,14 @@ Returns the document ID. View source -```python -@classmethod -observation_space(cls) -``` + Gym space that defines how documents are represented. + +## Class Variables + +* `NUM_FEATURES = None` diff --git a/docs/api_docs/python/recsim/document/AbstractDocumentSampler.md b/docs/api_docs/python/recsim/document/AbstractDocumentSampler.md index f9054e4..4a462f0 100644 --- a/docs/api_docs/python/recsim/document/AbstractDocumentSampler.md +++ b/docs/api_docs/python/recsim/document/AbstractDocumentSampler.md @@ -1,7 +1,6 @@
- @@ -11,40 +10,39 @@ # recsim.document.AbstractDocumentSampler - + +
View source -## Class `AbstractDocumentSampler` - - Abstract class to sample documents. - - -

__init__

- -View -source - -```python -__init__( - doc_ctor, - seed=0 + -## Properties + + -

num_clusters

+ + + + + + + +
+`num_clusters` + Returns the number of document clusters. Returns 0 if not applicable. +
## Methods @@ -53,9 +51,9 @@ Returns the number of document clusters. Returns 0 if not applicable. View source -```python -get_doc_ctor() -``` + Returns the constructor/class of the documents that will be sampled. @@ -64,18 +62,19 @@ Returns the constructor/class of the documents that will be sampled. View source -```python -reset_sampler() -``` +

sample_document

View source -```python -sample_document() -``` + Samples and return an instantiation of AbstractDocument. @@ -84,11 +83,10 @@ Samples and return an instantiation of AbstractDocument. View source -```python -update_state( - documents, - responses + Update document state (if needed) given user's (or users') responses. diff --git a/docs/api_docs/python/recsim/document/CandidateSet.md b/docs/api_docs/python/recsim/document/CandidateSet.md index 1626daa..b50f0d8 100644 --- a/docs/api_docs/python/recsim/document/CandidateSet.md +++ b/docs/api_docs/python/recsim/document/CandidateSet.md @@ -13,35 +13,26 @@ # recsim.document.CandidateSet - + +
View source -## Class `CandidateSet` - - Class to represent a collection of AbstractDocuments. + + The candidate set is represented as a hashmap (dictionary), with documents indexed by their document ID. -

__init__

- -View -source - -```python -__init__() -``` - -Initializes a document candidate set with 0 documents. - ## Methods

add_document

@@ -49,9 +40,11 @@ Initializes a document candidate set with 0 documents. View source -```python -add_document(document) -``` + Adds a document to the candidate set. @@ -60,9 +53,9 @@ Adds a document to the candidate set. View source -```python -create_observation() -``` + Returns a dictionary of observable features of documents. @@ -71,9 +64,9 @@ Returns a dictionary of observable features of documents. View source -```python -get_all_documents() -``` + Returns all documents. @@ -82,39 +75,64 @@ Returns all documents. View source -```python -get_documents(document_ids) -``` + Gets the documents associated with the specified document IDs. -#### Args: + + + + + + + + + + +
Args
+`document_ids` + +an array representing indices into the candidate set. +Indices can be integers or string-encoded integers. +
-* `document_ids`: an array representing indices into the candidate set. - Indices can be integers or string-encoded integers. + -#### Returns: + + + + + + -(documents) an ordered list of AbstractDocuments associated with the document -ids. +
Returns
+(documents) an ordered list of AbstractDocuments associated with the +document ids. +

observation_space

View source -```python -observation_space() -``` +

remove_document

View source -```python -remove_document(document) -``` + Removes a document from the set (to simulate a changing corpus). @@ -123,8 +141,8 @@ Removes a document from the set (to simulate a changing corpus). View source -```python -size() -``` + Returns an integer, the number of documents in this candidate set. diff --git a/docs/api_docs/python/recsim/environments.md b/docs/api_docs/python/recsim/environments.md index 8848e77..d490527 100644 --- a/docs/api_docs/python/recsim/environments.md +++ b/docs/api_docs/python/recsim/environments.md @@ -5,7 +5,10 @@ # Module: recsim.environments + + +
View diff --git a/docs/api_docs/python/recsim/environments/interest_evolution.md b/docs/api_docs/python/recsim/environments/interest_evolution.md index 701b449..0a6d013 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution.md @@ -1,12 +1,14 @@
-
# Module: recsim.environments.interest_evolution + + +
View @@ -42,6 +44,9 @@ Class that samples videos for utility model experiment. ## Functions +[`FLAGS(...)`](../../recsim/environments/interest_evolution/FLAGS.md): Registry +of 'Flag' objects. + [`clicked_watchtime_reward(...)`](../../recsim/environments/interest_evolution/clicked_watchtime_reward.md): Calculates the total clicked watchtime from a list of responses. @@ -50,7 +55,3 @@ Creates an interest evolution environment. [`total_clicks_reward(...)`](../../recsim/environments/interest_evolution/total_clicks_reward.md): Calculates the total number of clicks from a list of responses. - -## Other Members - -* `FLAGS` diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/FLAGS.md b/docs/api_docs/python/recsim/environments/interest_evolution/FLAGS.md new file mode 100644 index 0000000..cb0dc5c --- /dev/null +++ b/docs/api_docs/python/recsim/environments/interest_evolution/FLAGS.md @@ -0,0 +1,57 @@ +
+ + +
+ +# recsim.environments.interest_evolution.FLAGS + + + + + +
+ +Registry of 'Flag' objects. + + + + + + + +A 'FlagValues' can then scan command line arguments, passing flag arguments +through to the 'Flag' objects that it owns. It also provides easy access to the +flag values. Typically only one 'FlagValues' object is needed by an application: +flags.FLAGS + +This class is heavily overloaded: + +'Flag' objects are registered via __setitem__: FLAGS['longname'] = x # register +a new flag + +The .value attribute of the registered 'Flag' objects can be accessed as +attributes of this 'FlagValues' object, through __getattr__. Both the long and +short name of the original 'Flag' objects can be used to access its value: +FLAGS.longname # parsed flag value FLAGS.x # parsed flag value (short name) + +Command line arguments are scanned and passed to the registered 'Flag' objects +through the __call__ method. Unparsed arguments, including +argv[0](e.g. the program name) are returned. argv = FLAGS(sys.argv) # scan +command line arguments + +The original registered Flag objects can be retrieved through the use of the +dictionary-like operator, __getitem__: x = FLAGS['longname'] # access the +registered Flag object + +The str() operator of a 'FlagValues' object provides help for all of the +registered 'Flag' objects. diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/IEvResponse.md b/docs/api_docs/python/recsim/environments/interest_evolution/IEvResponse.md index 7a17de9..8346e36 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/IEvResponse.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/IEvResponse.md @@ -10,55 +10,114 @@ # recsim.environments.interest_evolution.IEvResponse - + +
View source -## Class `IEvResponse` - - Class to represent a user's response to a video. Inherits From: [`AbstractResponse`](../../../recsim/user/AbstractResponse.md) - - -#### Attributes: - -* `clicked`: A boolean indicating whether the video was clicked. -* `watch_time`: A float for fraction of the video watched. -* `liked`: A boolean indicating whether the video was liked. -* `quality`: A float indicating the quality of the video. -* `cluster_id`: A integer representing the cluster ID of the video. - -

__init__

- -View -source - -```python -__init__( - clicked=False, - watch_time=0.0, - liked=False, - quality=0.0, - cluster_id=0.0 + -Creates a new user response for a video. + -#### Args: + + + + + + + + + + + + + + + + + + + + + + +
+`clicked` + +A boolean indicating whether the video was clicked +
+`watch_time` + +A float for fraction of the video watched +
+`liked` + +A boolean indicating whether the video was liked +
+`quality` + +A float for document quality +
+`cluster_id` + +a integer for the cluster ID of the document. +
-* `clicked`: A boolean indicating whether the video was clicked -* `watch_time`: A float for fraction of the video watched -* `liked`: A boolean indicating whether the video was liked -* `quality`: A float for document quality -* `cluster_id`: a integer for the cluster ID of the document. + + + + + + + + + + + + + + + + + + + + + + +
+`clicked` + +A boolean indicating whether the video was clicked. +
+`watch_time` + +A float for fraction of the video watched. +
+`liked` + +A boolean indicating whether the video was liked. +
+`quality` + +A float indicating the quality of the video. +
+`cluster_id` + +A integer representing the cluster ID of the video. +
## Methods @@ -67,9 +126,9 @@ Creates a new user response for a video. View source -```python -create_observation() -``` + Creates a tensor observation of this response. @@ -78,14 +137,14 @@ Creates a tensor observation of this response. View source -```python -@classmethod -response_space(cls) -``` + ArraySpec that defines how a single response is represented. -## Class Members +## Class Variables * `MAX_QUALITY_SCORE = 100` * `MIN_QUALITY_SCORE = -100` diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserDistributionSampler.md b/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserDistributionSampler.md index 3c24e63..afa2452 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserDistributionSampler.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserDistributionSampler.md @@ -9,37 +9,27 @@ # recsim.environments.interest_evolution.IEvUserDistributionSampler - + +
View source -## Class `IEvUserDistributionSampler` - - Class to sample users by a hardcoded distribution. Inherits From: [`AbstractUserSampler`](../../../recsim/user/AbstractUserSampler.md) - - -

__init__

- -View -source - -```python -__init__( - user_ctor=recsim.environments.interest_evolution.IEvUserState, - **kwargs + -Creates a new user state sampler. + ## Methods @@ -48,9 +38,9 @@ Creates a new user state sampler. View source -```python -get_user_ctor() -``` + Returns the constructor/class of the user states that will be sampled. @@ -59,17 +49,17 @@ Returns the constructor/class of the user states that will be sampled. View source -```python -reset_sampler() -``` +

sample_user

View source -```python -sample_user() -``` + Samples a new user, with a new set of features. diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserModel.md b/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserModel.md index 203e0fe..be9a7a3 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserModel.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserModel.md @@ -15,65 +15,117 @@ # recsim.environments.interest_evolution.IEvUserModel - + +
View source -## Class `IEvUserModel` - - Class to model an interest evolution user. Inherits From: [`AbstractUserModel`](../../../recsim/user/AbstractUserModel.md) - - -Assumes the user state contains: - user_interests - time_budget - no_click_mass - -

__init__

- -View -source - -```python -__init__( - slate_size, - choice_model_ctor=None, + + + -Initializes a new user model. +Assumes the user state contains: - user_interests - time_budget - no_click_mass -#### Args: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+`slate_size` + +An integer representing the size of the slate +
+`choice_model_ctor` + +A contructor function to create user choice model. +
+`response_model_ctor` + +A constructor function to create response. The +function should take a string of doc ID as input and returns a +IEvResponse object. +
+`user_state_ctor` + +A constructor to create user state +
+`no_click_mass` + +A float that will be passed to compute probability of no +click. +
+`seed` + +A integer used as the seed of the choice model. +
+`alpha_x_intercept` + +A float for the x intercept of the line used to compute +interests update factor. +
+`alpha_y_intercept` + +A float for the y intercept of the line used to compute +interests update factor. +
-* `slate_size`: An integer representing the size of the slate -* `choice_model_ctor`: A contructor function to create user choice - model. -* `response_model_ctor`: A constructor function to create response. The - function should take a string of doc ID as input and returns a IEvResponse - object. -* `user_state_ctor`: A constructor to create user state -* `no_click_mass`: A float that will be passed to compute probability - of no click. -* `seed`: A integer used as the seed of the choice model. -* `alpha_x_intercept`: A float for the x intercept of the line used to - compute interests update factor. -* `alpha_y_intercept`: A float for the y intercept of the line used to - compute interests update factor. + -#### Raises: + + + -* `Exception`: if choice_model_ctor is not specified. + + + + +
+`Exception` + +if choice_model_ctor is not specified. +
## Methods @@ -82,9 +134,9 @@ Initializes a new user model. View source -```python -create_observation() -``` + Emits obesrvation about user's state. @@ -93,9 +145,9 @@ Emits obesrvation about user's state. View source -```python -get_response_model_ctor() -``` + Returns a constructor for the type of response this model will create. @@ -104,9 +156,9 @@ Returns a constructor for the type of response this model will create. View source -```python -is_terminal() -``` + Returns a boolean indicating if the session is over. @@ -115,9 +167,9 @@ Returns a boolean indicating if the session is over. View source -```python -observation_space() -``` + A Gym.spaces object that describes possible user observations. @@ -126,9 +178,9 @@ A Gym.spaces object that describes possible user observations. View source -```python -reset() -``` + Resets the user. @@ -137,9 +189,9 @@ Resets the user. View source -```python -reset_sampler() -``` + Resets the sampler. @@ -148,40 +200,65 @@ Resets the sampler. View source -```python -response_space() -``` +

simulate_response

View source -```python -simulate_response(documents) -``` + Simulates the user's response to a slate of documents with choice model. -#### Args: + -* `documents`: a list of IEvVideo objects + + + -#### Returns: + + + + +
Args
+`documents` + +a list of IEvVideo objects +
-* `responses`: a list of IEvResponse objects, one for each document + + + + + + + + + + +
Returns
+`responses` + +a list of IEvResponse objects, one for each document +

update_state

View source -```python -update_state( - slate_documents, - responses + Updates the user state based on responses to the slate. @@ -190,8 +267,26 @@ update the user's interests some small step size alpha based on the user's interest in that topic. The update is either towards the video's features or away, and is determined stochastically by the user's interest in that document. -#### Args: - -* `slate_documents`: a list of IEvVideos representing the slate -* `responses`: a list of IEvResponses representing the user's response - to each video in the slate. + + + + + + + + + + + + + +
Args
+`slate_documents` + +a list of IEvVideos representing the slate +
+`responses` + +a list of IEvResponses representing the user's response to each +video in the slate. +
diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserState.md b/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserState.md index de51318..7cfb3f6 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserState.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserState.md @@ -10,49 +10,30 @@ # recsim.environments.interest_evolution.IEvUserState - + +
View source -## Class `IEvUserState` - - Class to represent interest evolution users. Inherits From: [`AbstractUserState`](../../../recsim/user/AbstractUserState.md) - - -

__init__

- -View -source - -```python -__init__( - user_interests, - time_budget=None, - score_scaling=None, - attention_prob=None, - no_click_mass=None, - keep_interact_prob=None, - min_doc_utility=None, - user_update_alpha=None, - watched_videos=None, - impressed_videos=None, - liked_videos=None, - step_penalty=None, - min_normalizer=None, - user_quality_factor=None, - document_quality_factor=None + -Initializes a new user. + ## Methods @@ -61,9 +42,9 @@ Initializes a new user. View source -```python -create_observation() -``` + Return an observation of this user's observable state. @@ -72,10 +53,10 @@ Return an observation of this user's observable state. View source -```python -@classmethod -observation_space(cls) -``` + Gym.spaces object that defines how user states are represented. @@ -84,10 +65,12 @@ Gym.spaces object that defines how user states are represented. View source -```python -score_document(doc_obs) -``` + -## Class Members +## Class Variables * `NUM_FEATURES = 20` diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideo.md b/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideo.md index 4c0f226..54d2e5e 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideo.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideo.md @@ -11,47 +11,64 @@ # recsim.environments.interest_evolution.IEvVideo - + +
View source -## Class `IEvVideo` - - Class to represent a interest evolution Video. Inherits From: [`AbstractDocument`](../../../recsim/document/AbstractDocument.md) - - -#### Attributes: - -* `features`: A numpy array that stores video features. -* `cluster_id`: An integer that represents. -* `video_length`: A float for video length. -* `quality`: a float the represents document quality. - -

__init__

- -View -source - -```python -__init__( - doc_id, - features, - cluster_id=None, - video_length=None, - quality=None + -Generates a random set of features for this interest evolution Video. + + + + + + + + + + + + + + + + + + + + + +
+`features` + +A numpy array that stores video features. +
+`cluster_id` + +An integer that represents. +
+`video_length` + +A float for video length. +
+`quality` + +a float the represents document quality. +
## Methods @@ -60,9 +77,9 @@ Generates a random set of features for this interest evolution Video. View source -```python -create_observation() -``` + Returns observable properties of this document as a float array. @@ -71,9 +88,9 @@ Returns observable properties of this document as a float array. View source -```python -doc_id() -``` + Returns the document ID. @@ -82,14 +99,14 @@ Returns the document ID. View source -```python -@classmethod -observation_space(cls) -``` + Gym space that defines how documents are represented. -## Class Members +## Class Variables * `MAX_VIDEO_LENGTH = 100.0` * `NUM_FEATURES = 20` diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideoSampler.md b/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideoSampler.md index 9db8eba..9c75df2 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideoSampler.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideoSampler.md @@ -1,7 +1,6 @@
- @@ -11,57 +10,97 @@ # recsim.environments.interest_evolution.IEvVideoSampler - + +
View source -## Class `IEvVideoSampler` - - Class to sample interest_evolution videos. Inherits From: [`AbstractDocumentSampler`](../../../recsim/document/AbstractDocumentSampler.md) - - -

__init__

- -View -source - -```python -__init__( + -#### Args: + -* `doc_ctor`: A class/constructor for the type of videos that will be - sampled by this sampler. -* `min_feature_value`: A float for the min feature value. -* `max_feature_value`: A float for the max feature value. -* `video_length_mean`: A float for the mean of the video length. -* `video_length_std`: A float for the std deviation of video length. -* `**kwargs`: other keyword parameters for the video sampler. + + + + + + + + + + + + + + + + + + + + + + + + + +
+`doc_ctor` + +A class/constructor for the type of videos that will be sampled +by this sampler. +
+`min_feature_value` + +A float for the min feature value. +
+`max_feature_value` + +A float for the max feature value. +
+`video_length_mean` + +A float for the mean of the video length. +
+`video_length_std` + +A float for the std deviation of video length. +
+`**kwargs` + +other keyword parameters for the video sampler. +
-## Properties + -

num_clusters

+ + + + + + + +
+`num_clusters` + Returns the number of document clusters. Returns 0 if not applicable. +
## Methods @@ -70,9 +109,9 @@ Returns the number of document clusters. Returns 0 if not applicable. View source -```python -get_doc_ctor() -``` + Returns the constructor/class of the documents that will be sampled. @@ -81,18 +120,18 @@ Returns the constructor/class of the documents that will be sampled. View source -```python -reset_sampler() -``` +

sample_document

View source -```python -sample_document() -``` + Samples and return an instantiation of AbstractDocument. @@ -101,11 +140,10 @@ Samples and return an instantiation of AbstractDocument. View source -```python -update_state( - documents, - responses + Update document state (if needed) given user's (or users') responses. diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelUserSampler.md b/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelUserSampler.md index e453d6c..3536933 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelUserSampler.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelUserSampler.md @@ -9,34 +9,28 @@ # recsim.environments.interest_evolution.UtilityModelUserSampler - + +
View source -## Class `UtilityModelUserSampler` - - Class that samples users for utility model experiment. Inherits From: [`AbstractUserSampler`](../../../recsim/user/AbstractUserSampler.md) - - -

__init__

- -```python -__init__( - *args, - **kwargs + -Creates a new user state sampler. + ## Methods @@ -45,9 +39,9 @@ Creates a new user state sampler. View source -```python -get_user_ctor() -``` + Returns the constructor/class of the user states that will be sampled. @@ -56,17 +50,17 @@ Returns the constructor/class of the user states that will be sampled. View source -```python -reset_sampler() -``` +

sample_user

View source -```python -sample_user() -``` + Creates a new instantiation of this user's hidden state parameters. diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelVideoSampler.md b/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelVideoSampler.md index c7c8ab7..ed0b748 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelVideoSampler.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelVideoSampler.md @@ -1,7 +1,6 @@
- @@ -11,55 +10,89 @@ # recsim.environments.interest_evolution.UtilityModelVideoSampler - + +
View source -## Class `UtilityModelVideoSampler` - - Class that samples videos for utility model experiment. Inherits From: [`AbstractDocumentSampler`](../../../recsim/document/AbstractDocumentSampler.md) - - -

__init__

- -View -source - -```python -__init__( - doc_ctor=recsim.environments.interest_evolution.IEvVideo, - min_utility=-3.0, - max_utility=3.0, - video_length=4.0, - **kwargs + -#### Args: + -* `doc_ctor`: A class/constructor for the type of videos that will be - sampled by this sampler. -* `min_utility`: A float for the min utility score. -* `max_utility`: A float for the max utility score. -* `video_length`: A float for the video_length in minutes. -* `**kwargs`: other keyword parameters for the video sampler. + + + + + + + + + + + + + + + + + + + + + + +
+`doc_ctor` + +A class/constructor for the type of videos that will be sampled +by this sampler. +
+`min_utility` + +A float for the min utility score. +
+`max_utility` + +A float for the max utility score. +
+`video_length` + +A float for the video_length in minutes. +
+`**kwargs` + +other keyword parameters for the video sampler. +
-## Properties + -

num_clusters

+ + + + + + + +
+`num_clusters` + Returns the number of document clusters. Returns 0 if not applicable. +
## Methods @@ -68,9 +101,9 @@ Returns the number of document clusters. Returns 0 if not applicable. View source -```python -get_doc_ctor() -``` + Returns the constructor/class of the documents that will be sampled. @@ -79,18 +112,18 @@ Returns the constructor/class of the documents that will be sampled. View source -```python -reset_sampler() -``` +

sample_document

View source -```python -sample_document() -``` + Samples and return an instantiation of AbstractDocument. @@ -99,11 +132,10 @@ Samples and return an instantiation of AbstractDocument. View source -```python -update_state( - documents, - responses + Update document state (if needed) given user's (or users') responses. diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/clicked_watchtime_reward.md b/docs/api_docs/python/recsim/environments/interest_evolution/clicked_watchtime_reward.md index 003a81d..685cb22 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/clicked_watchtime_reward.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/clicked_watchtime_reward.md @@ -5,28 +5,53 @@ # recsim.environments.interest_evolution.clicked_watchtime_reward - + +
View source - Calculates the total clicked watchtime from a list of responses. -```python -recsim.environments.interest_evolution.clicked_watchtime_reward(responses) -``` + -#### Args: + -* `responses`: A list of IEvResponse objects + + + -#### Returns: + + + + +
+`responses` + +A list of IEvResponse objects +
-* `reward`: A float representing the total watch time from the - responses + + + + + + + + + + +
+`reward` + +A float representing the total watch time from the responses +
diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/create_environment.md b/docs/api_docs/python/recsim/environments/interest_evolution/create_environment.md index 8235ecb..e391fa6 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/create_environment.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/create_environment.md @@ -5,20 +5,21 @@ # recsim.environments.interest_evolution.create_environment - + +
View source - - Creates an interest evolution environment. -```python -recsim.environments.interest_evolution.create_environment(env_config) -``` + diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/total_clicks_reward.md b/docs/api_docs/python/recsim/environments/interest_evolution/total_clicks_reward.md index 80b3c5c..a89fb87 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/total_clicks_reward.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/total_clicks_reward.md @@ -5,27 +5,53 @@ # recsim.environments.interest_evolution.total_clicks_reward - + +
View source - Calculates the total number of clicks from a list of responses. -```python -recsim.environments.interest_evolution.total_clicks_reward(responses) -``` + -#### Args: + + + + + -* `responses`: A list of IEvResponse objects + + + + +
+`responses` + +A list of IEvResponse objects +
-#### Returns: + -* `reward`: A float representing the total clicks from the responses + + + + + + + + +
+`reward` + +A float representing the total clicks from the responses +
diff --git a/docs/api_docs/python/recsim/environments/interest_exploration.md b/docs/api_docs/python/recsim/environments/interest_exploration.md index 9a46aaa..169bd47 100644 --- a/docs/api_docs/python/recsim/environments/interest_exploration.md +++ b/docs/api_docs/python/recsim/environments/interest_exploration.md @@ -1,12 +1,14 @@
-
# Module: recsim.environments.interest_exploration + + +
View @@ -58,12 +60,11 @@ Class to represent users. ## Functions +[`FLAGS(...)`](../../recsim/environments/interest_evolution/FLAGS.md): Registry +of 'Flag' objects. + [`create_environment(...)`](../../recsim/environments/interest_exploration/create_environment.md): Creates an interest exploration environment. [`total_clicks_reward(...)`](../../recsim/environments/interest_exploration/total_clicks_reward.md): Calculates the total number of clicks from a list of responses. - -## Other Members - -* `FLAGS` diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/IEClusterUserSampler.md b/docs/api_docs/python/recsim/environments/interest_exploration/IEClusterUserSampler.md index e114c31..98a5132 100644 --- a/docs/api_docs/python/recsim/environments/interest_exploration/IEClusterUserSampler.md +++ b/docs/api_docs/python/recsim/environments/interest_exploration/IEClusterUserSampler.md @@ -10,22 +10,28 @@ # recsim.environments.interest_exploration.IEClusterUserSampler - + +
View source -## Class `IEClusterUserSampler` - - Samples users from predetermined types with type-specific parameters. Inherits From: [`AbstractUserSampler`](../../../recsim/user/AbstractUserSampler.md) + + This sampler consumes a distribution over user types and type-specific @@ -35,37 +41,71 @@ type-specific parameters. In this case, these are the mean and scale of a lognormal distribution, i.e. the affinity of user u of type U towards an document of type D is drawn according to lognormal(mean(U,D), scale(U,D)). -#### Args: - -* `user_type_distribution`: a non-negative array of dimension equal to - the number of user types, whose entries sum to one. -* `user_document_mean_affinity_matrix`: a non-negative two-dimensional - array with dimensions number of user types by number of document topics. - Represents the mean of the affinity score of a user type to a topic. -* `user_document_stddev_affinity_matrix`: a non-negative - two-dimensional array with dimensions number of user types by number of - document topics. Represents the scale of the affinity score of a user type - to a topic. -* `user_ctor`: constructor for a user state. - -

__init__

- -```python -__init__( - *args, - **kwargs -) -``` - -Creates a new user state sampler. - -User states of the type user_ctor are sampled. - -#### Args: + + + + + + + + + + + + + + + + + + + +
+`user_type_distribution` + +a non-negative array of dimension equal to the +number of user types, whose entries sum to one. +
+`user_document_mean_affinity_matrix` + +a non-negative two-dimensional array +with dimensions number of user types by number of document topics. +Represents the mean of the affinity score of a user type to a topic. +
+`user_document_stddev_affinity_matrix` + +a non-negative two-dimensional array +with dimensions number of user types by number of document topics. +Represents the scale of the affinity score of a user type to a topic. +
+`user_ctor` + +constructor for a user state. +
-* `user_ctor`: A class/constructor for the type of user states that - will be sampled. -* `seed`: An integer for a random seed. + + + + + + + + + + + + + +
+`user_ctor` + +A class/constructor for the type of user states that will be +sampled. +
+`seed` + +An integer for a random seed. +
## Methods @@ -74,18 +114,18 @@ User states of the type user_ctor are sampled. View source -```python -avg_affinity_given_topic() -``` +

get_user_ctor

View source -```python -get_user_ctor() -``` + Returns the constructor/class of the user states that will be sampled. @@ -94,17 +134,17 @@ Returns the constructor/class of the user states that will be sampled. View source -```python -reset_sampler() -``` +

sample_user

View source -```python -sample_user() -``` + Creates a new instantiation of this user's hidden state parameters. diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/IEDocument.md b/docs/api_docs/python/recsim/environments/interest_exploration/IEDocument.md index 87b7000..9a11838 100644 --- a/docs/api_docs/python/recsim/environments/interest_exploration/IEDocument.md +++ b/docs/api_docs/python/recsim/environments/interest_exploration/IEDocument.md @@ -6,48 +6,55 @@ +
# recsim.environments.interest_exploration.IEDocument - + +
View source -## Class `IEDocument` - - Class to represent an IE Document. Inherits From: [`AbstractDocument`](../../../recsim/document/AbstractDocument.md) - - -#### Attributes: - -* `cluster_id`: an integer representing the document cluster. -* `quality`: non-negative real number representing the quality of the - document. - -

__init__

- -View -source - -```python -__init__( - doc_id, - cluster_id, - quality + -Initialize self. See help(type(self)) for accurate signature. + + + + + + + + + + + + + + + +
+`cluster_id` + +an integer representing the document cluster. +
+`quality` + +non-negative real number representing the quality of the document. +
## Methods @@ -56,9 +63,9 @@ Initialize self. See help(type(self)) for accurate signature. View source -```python -create_observation() -``` + Returns observable properties of this document as a float array. @@ -67,9 +74,9 @@ Returns observable properties of this document as a float array. View source -```python -doc_id() -``` + Returns the document ID. @@ -78,13 +85,14 @@ Returns the document ID. View source -```python -@classmethod -observation_space(cls) -``` + Gym space that defines how documents are represented. -## Class Members +## Class Variables * `NUM_CLUSTERS = 0` +* `NUM_FEATURES = None` diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/IEResponse.md b/docs/api_docs/python/recsim/environments/interest_exploration/IEResponse.md index 69ce4cf..5f950cc 100644 --- a/docs/api_docs/python/recsim/environments/interest_exploration/IEResponse.md +++ b/docs/api_docs/python/recsim/environments/interest_exploration/IEResponse.md @@ -9,43 +9,56 @@ # recsim.environments.interest_exploration.IEResponse - + +
View source -## Class `IEResponse` - - Class to represent a user's response to a document. Inherits From: [`AbstractResponse`](../../../recsim/user/AbstractResponse.md) - - -#### Attributes: - -* `clicked`: boolean indicating whether the item was clicked or not. -* `quality`: a float indicating the quality of the document. -* `cluster_id`: an integer representing the topic ID of the document. - -

__init__

- -View -source - -```python -__init__( - clicked=False, - quality=0.0, - cluster_id=0 + -Initialize self. See help(type(self)) for accurate signature. + + + + + + + + + + + + + + + + + + +
+`clicked` + +boolean indicating whether the item was clicked or not. +
+`quality` + +a float indicating the quality of the document. +
+`cluster_id` + +an integer representing the topic ID of the document. +
## Methods @@ -54,9 +67,9 @@ Initialize self. See help(type(self)) for accurate signature. View source -```python -create_observation() -``` + Creates a tensor observation of this response. @@ -65,13 +78,13 @@ Creates a tensor observation of this response. View source -```python -@classmethod -response_space(cls) -``` + ArraySpec that defines how a single response is represented. -## Class Members +## Class Variables * `NUM_CLUSTERS = 0` diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/IETopicDocumentSampler.md b/docs/api_docs/python/recsim/environments/interest_exploration/IETopicDocumentSampler.md index 3c3f183..3cdfc27 100644 --- a/docs/api_docs/python/recsim/environments/interest_exploration/IETopicDocumentSampler.md +++ b/docs/api_docs/python/recsim/environments/interest_exploration/IETopicDocumentSampler.md @@ -1,7 +1,6 @@
- @@ -11,54 +10,89 @@ # recsim.environments.interest_exploration.IETopicDocumentSampler - + +
View source -## Class `IETopicDocumentSampler` - - Samples documents with topic-specific quality distribution. Inherits From: [`AbstractDocumentSampler`](../../../recsim/document/AbstractDocumentSampler.md) + + Consumes a distribution over document topics and topic-specific parameters for generating a quality score (according to a lognormal distribution). -#### Args: - -* `topic_distribution`: a non-negative array of dimension equal to the - number of topics, whose entries sum to one. -* `topic_quality_mean`: a non-negative array of dimension equal to the - number of topics, representing the mean of the topic quality score. -* `topic_quality_stddev`: a non-negative array of dimension equal to - the number of topics, representing the scale of the topic quality score. -* `doc_ctor`: A class/constructor for the type of videos that will be - sampled by this sampler. - -

__init__

- -```python -__init__( - *args, - **kwargs -) -``` - -Initialize self. See help(type(self)) for accurate signature. + + + + + + + + + + + + + + + + + + + +
+`topic_distribution` + +a non-negative array of dimension equal to the +number of topics, whose entries sum to one. +
+`topic_quality_mean` + +a non-negative array of dimension equal to the +number of topics, representing the mean of the topic quality score. +
+`topic_quality_stddev` + +a non-negative array of dimension equal to the +number of topics, representing the scale of the topic quality score. +
+`doc_ctor` + +A class/constructor for the type of videos that will be sampled +by this sampler. +
-## Properties + -

num_clusters

+ + + + + + + +
+`num_clusters` + Returns the number of document clusters. Returns 0 if not applicable. +
## Methods @@ -67,9 +101,9 @@ Returns the number of document clusters. Returns 0 if not applicable. View source -```python -get_doc_ctor() -``` + Returns the constructor/class of the documents that will be sampled. @@ -78,18 +112,18 @@ Returns the constructor/class of the documents that will be sampled. View source -```python -reset_sampler() -``` +

sample_document

View source -```python -sample_document() -``` + Samples the topic and then samples document features given the topic. @@ -98,11 +132,10 @@ Samples the topic and then samples document features given the topic. View source -```python -update_state( - documents, - responses + Update document state (if needed) given user's (or users') responses. diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/IEUserModel.md b/docs/api_docs/python/recsim/environments/interest_exploration/IEUserModel.md index d8eb990..ae88380 100644 --- a/docs/api_docs/python/recsim/environments/interest_exploration/IEUserModel.md +++ b/docs/api_docs/python/recsim/environments/interest_exploration/IEUserModel.md @@ -1,7 +1,6 @@
- @@ -16,21 +15,27 @@ # recsim.environments.interest_exploration.IEUserModel - + +
View source -## Class `IEUserModel` - - Class to model a user. Inherits From: [`AbstractUserModel`](../../../recsim/user/AbstractUserModel.md) + + The user in this scenario is completely characterized by a vector g of affinity @@ -42,48 +47,69 @@ on these scores. The state space consists of a vector of affinity scores which is unique to the user and static but not observable. -#### Args: - -slate_size: An integer representing the size of the slate. no_click_mass: A -float indicating the mass given to a no-click option. Must be positive, -otherwise CTR is always 1. choice_model_ctor: A contructor function to create -user choice model. user_state_ctor: A constructor to create user state. -response_model_ctor: A constructor function to create response. The function -should take a string of doc ID as input and returns a IEResponse object. seed: -an integer used as the seed in random sampling. - -

__init__

- -View -source - -```python -__init__( - slate_size, - no_click_mass=5, - choice_model_ctor=recsim.choice_model.MultinomialLogitChoiceModel, - user_state_ctor=None, - response_model_ctor=None, - seed=0 -) -``` + -Initializes a new user model. + + + -#### Args: +
-* `response_model_ctor`: A class/constructor representing the type of - responses this model will generate. -* `user_sampler`: An instance of AbstractUserSampler that can generate - initial user states from an inital state distribution. -* `slate_size`: integer number of documents that can be served to the - user at any interaction. +slate_size: An integer representing the size of the slate. no_click_mass: A +float indicating the mass given to a no-click option. choice_model_ctor: A +contructor function to create user choice model. user_state_ctor: A constructor +to create user state. response_model_ctor: A constructor function to create +response. The function should take a string of doc ID as input and returns a +IEResponse object. seed: an integer used as the seed in random sampling. + + + + + + + + + + + + + + + + + +
+`response_model_ctor` + +A class/constructor representing the type of +responses this model will generate. +
+`user_sampler` + +An instance of AbstractUserSampler that can generate +initial user states from an inital state distribution. +
+`slate_size` + +integer number of documents that can be served to the user at +any interaction. +
-## Properties + -

avg_user_state

+ + + + + + + +
+`avg_user_state` + Returns the prior of user state. +
## Methods @@ -92,9 +118,9 @@ Returns the prior of user state. View source -```python -create_observation() -``` + Emits obesrvation about user's state. @@ -103,9 +129,9 @@ Emits obesrvation about user's state. View source -```python -get_response_model_ctor() -``` + Returns a constructor for the type of response this model will create. @@ -114,9 +140,9 @@ Returns a constructor for the type of response this model will create. View source -```python -is_terminal() -``` + Returns a boolean indicating if the session is over. @@ -125,9 +151,9 @@ Returns a boolean indicating if the session is over. View source -```python -observation_space() -``` + A Gym.spaces object that describes possible user observations. @@ -136,9 +162,9 @@ A Gym.spaces object that describes possible user observations. View source -```python -reset() -``` + Resets the user. @@ -147,9 +173,9 @@ Resets the user. View source -```python -reset_sampler() -``` + Resets the sampler. @@ -158,46 +184,89 @@ Resets the sampler. View source -```python -response_space() -``` +

simulate_response

View source -```python -simulate_response(documents) -``` + Simulates the user's response to a slate of documents with choice model. -#### Args: + + + + + + + + + + +
Args
+`documents` + +a list of IEDocument objects in the slate. +
-* `documents`: a list of IEDocument objects in the slate. + -#### Returns: + + + -* `responses`: a list of IEResponse objects, one for each document. + + + + +
Returns
+`responses` + +a list of IEResponse objects, one for each document. +

update_state

View source -```python -update_state( - slate_documents, - responses + Updates the user's state based on the slate and document selected. -#### Args: + + + + + + + + + + + + + +
Args
+`slate_documents` + +A list of AbstractDocuments for items in the slate. +
+`responses` + +A list of AbstractResponses for each item in the slate. +
-* `slate_documents`: A list of AbstractDocuments for items in the - slate. -* `responses`: A list of AbstractResponses for each item in the slate. - Updates: The user's hidden state. +Updates: The user's hidden state. diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/IEUserState.md b/docs/api_docs/python/recsim/environments/interest_exploration/IEUserState.md index 7a67fec..cf12bf5 100644 --- a/docs/api_docs/python/recsim/environments/interest_exploration/IEUserState.md +++ b/docs/api_docs/python/recsim/environments/interest_exploration/IEUserState.md @@ -5,42 +5,48 @@ +
# recsim.environments.interest_exploration.IEUserState - + +
View source -## Class `IEUserState` - - Class to represent users. Inherits From: [`AbstractUserState`](../../../recsim/user/AbstractUserState.md) - - -#### Attributes: + -* `topic_affinity`: a nonnegative vector holds document type affinities - which are not temporal dynamics and hidden. - -

__init__

- -View -source - -```python -__init__(topic_affinity) -``` + -Initializes a new user. + + + + + + + + + + +
+`topic_affinity` + +a nonnegative vector holds document type affinities which +are not temporal dynamics and hidden. +
## Methods @@ -49,9 +55,9 @@ Initializes a new user. View source -```python -create_observation() -``` + User's topic_affinity is not observable. @@ -60,10 +66,10 @@ User's topic_affinity is not observable. View source -```python -@staticmethod -observation_space() -``` + Gym.spaces object that defines how user states are represented. @@ -72,8 +78,14 @@ Gym.spaces object that defines how user states are represented. View source -```python -score_document(doc_obs) -``` + Returns user document affinity plus document quality. + +## Class Variables + +* `NUM_FEATURES = None` diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/create_environment.md b/docs/api_docs/python/recsim/environments/interest_exploration/create_environment.md index a555897..f8050b5 100644 --- a/docs/api_docs/python/recsim/environments/interest_exploration/create_environment.md +++ b/docs/api_docs/python/recsim/environments/interest_exploration/create_environment.md @@ -5,20 +5,21 @@ # recsim.environments.interest_exploration.create_environment - + +
View source - - Creates an interest exploration environment. -```python -recsim.environments.interest_exploration.create_environment(env_config) -``` + diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/total_clicks_reward.md b/docs/api_docs/python/recsim/environments/interest_exploration/total_clicks_reward.md index 39685e8..17599a7 100644 --- a/docs/api_docs/python/recsim/environments/interest_exploration/total_clicks_reward.md +++ b/docs/api_docs/python/recsim/environments/interest_exploration/total_clicks_reward.md @@ -5,27 +5,53 @@ # recsim.environments.interest_exploration.total_clicks_reward - + +
View source - Calculates the total number of clicks from a list of responses. -```python -recsim.environments.interest_exploration.total_clicks_reward(responses) -``` + -#### Args: + + + + + -* `responses`: A list of IEResponse objects + + + + +
+`responses` + +A list of IEResponse objects +
-#### Returns: + -* `reward`: A float representing the total clicks from the responses + + + + + + + + +
+`reward` + +A float representing the total clicks from the responses +
diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction.md index 1107fc3..f1f7c33 100644 --- a/docs/api_docs/python/recsim/environments/long_term_satisfaction.md +++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction.md @@ -1,12 +1,14 @@
-
# Module: recsim.environments.long_term_satisfaction + + +
View @@ -46,12 +48,11 @@ Class to represent users. ## Functions +[`FLAGS(...)`](../../recsim/environments/interest_evolution/FLAGS.md): Registry +of 'Flag' objects. + [`clicked_engagement_reward(...)`](../../recsim/environments/long_term_satisfaction/clicked_engagement_reward.md): Calculates the total clicked watchtime from a list of responses. [`create_environment(...)`](../../recsim/environments/long_term_satisfaction/create_environment.md): Creates a long-term satisfaction environment. - -## Other Members - -* `FLAGS` diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSDocument.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSDocument.md index 5c4dc54..174e027 100644 --- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSDocument.md +++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSDocument.md @@ -5,46 +5,49 @@ +
# recsim.environments.long_term_satisfaction.LTSDocument - + +
View source -## Class `LTSDocument` - - Class to represent an LTS Document. Inherits From: [`AbstractDocument`](../../../recsim/document/AbstractDocument.md) - - -#### Attributes: - -* `clickbait_score`: real number in [0,1] representing the - clickbaitiness of a document. - -

__init__

- -View -source - -```python -__init__( - doc_id, - clickbait_score + -Initialize self. See help(type(self)) for accurate signature. + + + + + + + + + + + + +
+`clickbait_score` + +real number in [0,1] representing the clickbaitiness of a +document. +
## Methods @@ -53,9 +56,9 @@ Initialize self. See help(type(self)) for accurate signature. View source -```python -create_observation() -``` + Returns observable properties of this document as a float array. @@ -64,9 +67,9 @@ Returns observable properties of this document as a float array. View source -```python -doc_id() -``` + Returns the document ID. @@ -75,9 +78,13 @@ Returns the document ID. View source -```python -@staticmethod -observation_space() -``` + Gym space that defines how documents are represented. + +## Class Variables + +* `NUM_FEATURES = None` diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSDocumentSampler.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSDocumentSampler.md index 8e8ef14..a556dfb 100644 --- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSDocumentSampler.md +++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSDocumentSampler.md @@ -1,7 +1,6 @@
- @@ -11,48 +10,54 @@ # recsim.environments.long_term_satisfaction.LTSDocumentSampler - + +
View source -## Class `LTSDocumentSampler` - - Class to sample LTSDocument documents. Inherits From: [`AbstractDocumentSampler`](../../../recsim/document/AbstractDocumentSampler.md) - + -#### Args: + -doc_ctor: A class/constructor for the type of documents that will be sampled by -this sampler. + -

__init__

+ + + -View -source - -```python -__init__( - doc_ctor=recsim.environments.long_term_satisfaction.LTSDocument, - **kwargs -) -``` +
-Initialize self. See help(type(self)) for accurate signature. +doc_ctor: A class/constructor for the type of documents that will be sampled by +this sampler. -## Properties + -

num_clusters

+ + + + + + + +
+`num_clusters` + Returns the number of document clusters. Returns 0 if not applicable. +
## Methods @@ -61,9 +66,9 @@ Returns the number of document clusters. Returns 0 if not applicable. View source -```python -get_doc_ctor() -``` + Returns the constructor/class of the documents that will be sampled. @@ -72,18 +77,18 @@ Returns the constructor/class of the documents that will be sampled. View source -```python -reset_sampler() -``` +

sample_document

View source -```python -sample_document() -``` + Samples and return an instantiation of AbstractDocument. @@ -92,11 +97,10 @@ Samples and return an instantiation of AbstractDocument. View source -```python -update_state( - documents, - responses + Update document state (if needed) given user's (or users') responses. diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSResponse.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSResponse.md index 84f8123..ba9b8aa 100644 --- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSResponse.md +++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSResponse.md @@ -9,48 +9,74 @@ # recsim.environments.long_term_satisfaction.LTSResponse - + +
View source -## Class `LTSResponse` - - Class to represent a user's response to a document. Inherits From: [`AbstractResponse`](../../../recsim/user/AbstractResponse.md) - - -#### Attributes: - -* `engagement`: real number representing the degree of engagement with - a document (e.g. watch time). -* `clicked`: boolean indicating whether the item was clicked or not. - -

__init__

- -View -source - -```python -__init__( - clicked=False, - engagement=0.0 + -Creates a new user response for a document. + -#### Args: + + + + + + + + + + + + + +
+`clicked` + +boolean indicating whether the item was clicked or not. +
+`engagement` + +real number representing the degree of engagement with a +document (e.g. watch time). +
-* `clicked`: boolean indicating whether the item was clicked or not. -* `engagement`: real number representing the degree of engagement with - a document (e.g. watch time). + + + + + + + + + + + + + +
+`engagement` + +real number representing the degree of engagement with a +document (e.g. watch time). +
+`clicked` + +boolean indicating whether the item was clicked or not. +
## Methods @@ -59,9 +85,9 @@ Creates a new user response for a document. View source -```python -create_observation() -``` + Creates a tensor observation of this response. @@ -70,13 +96,13 @@ Creates a tensor observation of this response. View source -```python -@classmethod -response_space(cls) -``` + ArraySpec that defines how a single response is represented. -## Class Members +## Class Variables * `MAX_ENGAGEMENT_MAGNITUDE = 100.0` diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSStaticUserSampler.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSStaticUserSampler.md index 8402641..2c3dd01 100644 --- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSStaticUserSampler.md +++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSStaticUserSampler.md @@ -9,34 +9,29 @@ # recsim.environments.long_term_satisfaction.LTSStaticUserSampler - + +
View source -## Class `LTSStaticUserSampler` - - Generates user with identical predetermined parameters. Inherits From: [`AbstractUserSampler`](../../../recsim/user/AbstractUserSampler.md) - - -

__init__

- -```python -__init__( - *args, - **kwargs + -Creates a new user state sampler. + ## Methods @@ -45,9 +40,9 @@ Creates a new user state sampler. View source -```python -get_user_ctor() -``` + Returns the constructor/class of the user states that will be sampled. @@ -56,17 +51,17 @@ Returns the constructor/class of the user states that will be sampled. View source -```python -reset_sampler() -``` +

sample_user

View source -```python -sample_user() -``` + Creates a new instantiation of this user's hidden state parameters. diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSUserModel.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSUserModel.md index e5077d8..6f9cfc1 100644 --- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSUserModel.md +++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSUserModel.md @@ -16,21 +16,25 @@ # recsim.environments.long_term_satisfaction.LTSUserModel - + +
View source -## Class `LTSUserModel` - - Class to model a user with long-term satisfaction dynamics. Inherits From: [`AbstractUserModel`](../../../recsim/user/AbstractUserModel.md) + + Implements a controlled continuous Hidden Markov Model of the user having the @@ -60,30 +64,38 @@ A constructor to create user state. response_model_ctor: A constructor function to create response. The function should take a string of doc ID as input and returns a LTSResponse object. seed: an integer as the seed in random sampling. -

__init__

- -View -source - -```python -__init__( - slate_size, - user_state_ctor=None, - response_model_ctor=None, - seed=0 -) -``` - -Initializes a new user model. - -#### Args: - -* `response_model_ctor`: A class/constructor representing the type of - responses this model will generate. -* `user_sampler`: An instance of AbstractUserSampler that can generate - initial user states from an inital state distribution. -* `slate_size`: integer number of documents that can be served to the - user at any interaction. + + + + + + + + + + + + + + + + +
+`response_model_ctor` + +A class/constructor representing the type of +responses this model will generate. +
+`user_sampler` + +An instance of AbstractUserSampler that can generate +initial user states from an inital state distribution. +
+`slate_size` + +integer number of documents that can be served to the user at +any interaction. +
## Methods @@ -92,9 +104,9 @@ Initializes a new user model. View source -```python -create_observation() -``` + Emits obesrvation about user's state. @@ -103,29 +115,48 @@ Emits obesrvation about user's state. View source -```python -generate_response( - doc, - response + Generates a response to a clicked document. -#### Args: + + + + + + + + + + + + + +
Args
+`doc` + +an LTSDocument object. +
+`response` + +an LTSResponse for the document. +
-* `doc`: an LTSDocument object. -* `response`: an LTSResponse for the document. Updates: response, with - whether the document was clicked, liked, and how much of it was watched. +Updates: response, with whether the document was clicked, liked, and how much of +it was watched.

get_response_model_ctor

View source -```python -get_response_model_ctor() -``` + Returns a constructor for the type of response this model will create. @@ -134,9 +165,9 @@ Returns a constructor for the type of response this model will create. View source -```python -is_terminal() -``` + Returns a boolean indicating if the session is over. @@ -145,9 +176,9 @@ Returns a boolean indicating if the session is over. View source -```python -observation_space() -``` + A Gym.spaces object that describes possible user observations. @@ -156,9 +187,9 @@ A Gym.spaces object that describes possible user observations. View source -```python -reset() -``` + Resets the user. @@ -167,9 +198,9 @@ Resets the user. View source -```python -reset_sampler() -``` + Resets the sampler. @@ -178,45 +209,88 @@ Resets the sampler. View source -```python -response_space() -``` +

simulate_response

View source -```python -simulate_response(documents) -``` + Simulates the user's response to a slate of documents with choice model. -#### Args: + + + + + -* `documents`: a list of LTSDocument objects. + + + + +
Args
+`documents` + +a list of LTSDocument objects. +
+ + -#### Returns: + + + -* `responses`: a list of LTSResponse objects, one for each document. + + + + +
Returns
+`responses` + +a list of LTSResponse objects, one for each document. +

update_state

View source -```python -update_state( - slate_documents, - responses + Updates the user's latent state based on responses to the slate. -#### Args: - -* `slate_documents`: a list of LTSDocuments representing the slate -* `responses`: a list of LTSResponses representing the user's response - to each document in the slate. + + + + + + + + + + + + + +
Args
+`slate_documents` + +a list of LTSDocuments representing the slate +
+`responses` + +a list of LTSResponses representing the user's response to each +document in the slate. +
diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSUserState.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSUserState.md index f35d7d0..9d44cad 100644 --- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSUserState.md +++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSUserState.md @@ -5,25 +5,31 @@ +
# recsim.environments.long_term_satisfaction.LTSUserState - + +
View source -## Class `LTSUserState` - - Class to represent users. Inherits From: [`AbstractUserState`](../../../recsim/user/AbstractUserState.md) + + See the LTSUserModel class documentation for precise information about how the @@ -36,27 +42,6 @@ kale_mean: mean of engagement with non-clickbaity content. kale_stddev: standard deviation of engagement with non-clickbaity content. net_positive_exposure: starting value for NPE (NPE_0). time_budget: length of a user session. -

__init__

- -View -source - -```python -__init__( - memory_discount, - sensitivity, - innovation_stddev, - choc_mean, - choc_stddev, - kale_mean, - kale_stddev, - net_positive_exposure, - time_budget -) -``` - -Initializes a new user. - ## Methods

create_observation

@@ -64,9 +49,9 @@ Initializes a new user. View source -```python -create_observation() -``` + User's state is not observable. @@ -75,10 +60,10 @@ User's state is not observable. View source -```python -@staticmethod -observation_space() -``` + Gym.spaces object that defines how user states are represented. @@ -87,6 +72,12 @@ Gym.spaces object that defines how user states are represented. View source -```python -score_document(doc_obs) -``` + + +## Class Variables + +* `NUM_FEATURES = None` diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/clicked_engagement_reward.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/clicked_engagement_reward.md index 726ce10..f99f8cd 100644 --- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/clicked_engagement_reward.md +++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/clicked_engagement_reward.md @@ -5,28 +5,53 @@ # recsim.environments.long_term_satisfaction.clicked_engagement_reward - + +
View source - Calculates the total clicked watchtime from a list of responses. -```python -recsim.environments.long_term_satisfaction.clicked_engagement_reward(responses) -``` + -#### Args: + -* `responses`: A list of LTSResponse objects + + + -#### Returns: + + + + +
+`responses` + +A list of LTSResponse objects +
-* `reward`: A float representing the total watch time from the - responses + + + + + + + + + + +
+`reward` + +A float representing the total watch time from the responses +
diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/create_environment.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/create_environment.md index 14c5506..f3d9239 100644 --- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/create_environment.md +++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/create_environment.md @@ -5,20 +5,21 @@ # recsim.environments.long_term_satisfaction.create_environment - + +
View source - - Creates a long-term satisfaction environment. -```python -recsim.environments.long_term_satisfaction.create_environment(env_config) -``` + diff --git a/docs/api_docs/python/recsim/simulator.md b/docs/api_docs/python/recsim/simulator.md index 199d7f7..44d9547 100644 --- a/docs/api_docs/python/recsim/simulator.md +++ b/docs/api_docs/python/recsim/simulator.md @@ -5,7 +5,10 @@ # Module: recsim.simulator + + +
View diff --git a/docs/api_docs/python/recsim/simulator/environment.md b/docs/api_docs/python/recsim/simulator/environment.md index af23234..7381bf4 100644 --- a/docs/api_docs/python/recsim/simulator/environment.md +++ b/docs/api_docs/python/recsim/simulator/environment.md @@ -5,7 +5,10 @@ # Module: recsim.simulator.environment + + +
View diff --git a/docs/api_docs/python/recsim/simulator/environment/AbstractEnvironment.md b/docs/api_docs/python/recsim/simulator/environment/AbstractEnvironment.md index 24a6c54..353b6ce 100644 --- a/docs/api_docs/python/recsim/simulator/environment/AbstractEnvironment.md +++ b/docs/api_docs/python/recsim/simulator/environment/AbstractEnvironment.md @@ -1,10 +1,6 @@
- - - - @@ -13,70 +9,122 @@ # recsim.simulator.environment.AbstractEnvironment - + +
View source -## Class `AbstractEnvironment` - - Abstract class representing the recommender system environment. - - -#### Attributes: - -* `user_model`: An list or single instantiation of AbstractUserModel - representing the user/users. -* `document_sampler`: An instantiation of AbstractDocumentSampler. -* `num_candidates`: An integer representing the size of the - candidate_set. -* `slate_size`: An integer representing the slate size. -* `candidate_set`: An instantiation of CandidateSet. -* `num_clusters`: An integer representing the number of document - clusters. - -

__init__

- -View -source - -```python -__init__( - user_model, - document_sampler, - num_candidates, - slate_size, + -

candidate_set

- -

num_candidates

- -

slate_size

+ + + + + + + + + + + + + + + + + + + + + + + +
+`user_model` + +An instantiation of AbstractUserModel or list of such +instantiations +
+`document_sampler` + +An instantiation of AbstractDocumentSampler +
+`num_candidates` + +An integer representing the size of the candidate_set +
+`slate_size` + +An integer representing the slate size +
+`resample_documents` + +A boolean indicating whether to resample the candidate +set every step +
-

user_model

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+`user_model` + +An list or single instantiation of AbstractUserModel +representing the user/users. +
+`document_sampler` + +An instantiation of AbstractDocumentSampler. +
+`num_candidates` + +An integer representing the size of the candidate_set. +
+`slate_size` + +An integer representing the slate size. +
+`candidate_set` + +An instantiation of CandidateSet. +
+`num_clusters` + +An integer representing the number of document clusters. +
## Methods @@ -85,27 +133,46 @@ Initializes a new simulation environment. View source -```python -reset() -``` + Resets the environment and return the first observation. -#### Returns: - -* `user_obs`: An array of floats representing observations of the - user's current state -* `doc_obs`: An OrderedDict of document observations keyed by document - ids + + + + + + + + + + + + + +
Returns
+`user_obs` + +An array of floats representing observations of the user's +current state +
+`doc_obs` + +An OrderedDict of document observations keyed by document ids +

reset_sampler

View source -```python -reset_sampler() -``` + Resets the relevant samplers of documents and user/users. @@ -114,22 +181,65 @@ Resets the relevant samplers of documents and user/users. View source -```python -step(slate) -``` + Executes the action, returns next state observation and reward. -#### Args: - -* `slate`: An integer array of size slate_size (or list of such - arrays), where each element is an index into the set of current_documents - presented. - -#### Returns: + + + + + + + + + + +
Args
+`slate` + +An integer array of size slate_size (or list of such arrays), where +each element is an index into the set of current_documents presented. +
-* `user_obs`: A gym observation representing the user's next state -* `doc_obs`: A list of observations of the documents -* `responses`: A list of AbstractResponse objects for each item in the - slate -* `done`: A boolean indicating whether the episode has terminated + + + + + + + + + + + + + + + + + + + +
Returns
+`user_obs` + +A gym observation representing the user's next state +
+`doc_obs` + +A list of observations of the documents +
+`responses` + +A list of AbstractResponse objects for each item in the slate +
+`done` + +A boolean indicating whether the episode has terminated +
diff --git a/docs/api_docs/python/recsim/simulator/environment/Environment.md b/docs/api_docs/python/recsim/simulator/environment/Environment.md index 91dea36..eaa62d1 100644 --- a/docs/api_docs/python/recsim/simulator/environment/Environment.md +++ b/docs/api_docs/python/recsim/simulator/environment/Environment.md @@ -1,10 +1,6 @@
- - - - @@ -13,77 +9,132 @@ # recsim.simulator.environment.Environment - + +
View source -## Class `Environment` - - Class to represent the environment with one user. Inherits From: [`AbstractEnvironment`](../../../recsim/simulator/environment/AbstractEnvironment.md) -### Aliases: - -* Class `recsim.simulator.environment.SingleUserEnvironment` - - - -#### Attributes: - -* `user_model`: An instantiation of AbstractUserModel that represents a - user. -* `document_sampler`: An instantiation of AbstractDocumentSampler. -* `num_candidates`: An integer representing the size of the - candidate_set. -* `slate_size`: An integer representing the slate size. -* `candidate_set`: An instantiation of CandidateSet. -* `num_clusters`: An integer representing the number of document - clusters. - -

__init__

- -View -source - -```python -__init__( - user_model, - document_sampler, - num_candidates, - slate_size, + + + -## Properties - -

candidate_set

- -

num_candidates

- -

slate_size

+ + + + + + + + + + + + + + + + + + + + + + + +
+`user_model` + +An instantiation of AbstractUserModel or list of such +instantiations +
+`document_sampler` + +An instantiation of AbstractDocumentSampler +
+`num_candidates` + +An integer representing the size of the candidate_set +
+`slate_size` + +An integer representing the slate size +
+`resample_documents` + +A boolean indicating whether to resample the candidate +set every step +
-

user_model

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+`user_model` + +An instantiation of AbstractUserModel that represents a user. +
+`document_sampler` + +An instantiation of AbstractDocumentSampler. +
+`num_candidates` + +An integer representing the size of the candidate_set. +
+`slate_size` + +An integer representing the slate size. +
+`candidate_set` + +An instantiation of CandidateSet. +
+`num_clusters` + +An integer representing the number of document clusters. +
## Methods @@ -92,27 +143,44 @@ Initializes a new simulation environment. View source -```python -reset() -``` + Resets the environment and return the first observation. -#### Returns: - -* `user_obs`: An array of floats representing observations of the - user's current state -* `doc_obs`: An OrderedDict of document observations keyed by document - ids + + + + + + + + + + + + + +
Returns
+`user_obs` + +An array of floats representing observations of the user's +current state +
+`doc_obs` + +An OrderedDict of document observations keyed by document ids +

reset_sampler

View source -```python -reset_sampler() -``` + Resets the relevant samplers of documents and user/users. @@ -121,21 +189,64 @@ Resets the relevant samplers of documents and user/users. View source -```python -step(slate) -``` + Executes the action, returns next state observation and reward. -#### Args: - -* `slate`: An integer array of size slate_size, where each element is - an index into the set of current_documents presented - -#### Returns: + + + + + + + + + + +
Args
+`slate` + +An integer array of size slate_size, where each element is an index +into the set of current_documents presented +
-* `user_obs`: A gym observation representing the user's next state -* `doc_obs`: A list of observations of the documents -* `responses`: A list of AbstractResponse objects for each item in the - slate -* `done`: A boolean indicating whether the episode has terminated + + + + + + + + + + + + + + + + + + + +
Returns
+`user_obs` + +A gym observation representing the user's next state +
+`doc_obs` + +A list of observations of the documents +
+`responses` + +A list of AbstractResponse objects for each item in the slate +
+`done` + +A boolean indicating whether the episode has terminated +
diff --git a/docs/api_docs/python/recsim/simulator/environment/MultiUserEnvironment.md b/docs/api_docs/python/recsim/simulator/environment/MultiUserEnvironment.md index 366834f..27fd02e 100644 --- a/docs/api_docs/python/recsim/simulator/environment/MultiUserEnvironment.md +++ b/docs/api_docs/python/recsim/simulator/environment/MultiUserEnvironment.md @@ -1,11 +1,6 @@
- - - - - @@ -14,76 +9,131 @@ # recsim.simulator.environment.MultiUserEnvironment - + +
View source -## Class `MultiUserEnvironment` - - Class to represent environment with multiple users. Inherits From: [`AbstractEnvironment`](../../../recsim/simulator/environment/AbstractEnvironment.md) - - -#### Attributes: - -* `user_model`: A list of AbstractUserModel instances that represent - users. -* `num_users`: An integer representing the number of users. -* `document_sampler`: An instantiation of AbstractDocumentSampler. -* `num_candidates`: An integer representing the size of the - candidate_set. -* `slate_size`: An integer representing the slate size. -* `candidate_set`: An instantiation of CandidateSet. -* `num_clusters`: An integer representing the number of document - clusters. - -

__init__

- -View -source - -```python -__init__( - user_model, - document_sampler, - num_candidates, - slate_size, + -

candidate_set

- -

num_candidates

- -

num_users

- -

slate_size

+ + + + + + + + + + + + + + + + + + + + + + + +
+`user_model` + +An instantiation of AbstractUserModel or list of such +instantiations +
+`document_sampler` + +An instantiation of AbstractDocumentSampler +
+`num_candidates` + +An integer representing the size of the candidate_set +
+`slate_size` + +An integer representing the slate size +
+`resample_documents` + +A boolean indicating whether to resample the candidate +set every step +
-

user_model

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+`user_model` + +A list of AbstractUserModel instances that represent users. +
+`num_users` + +An integer representing the number of users. +
+`document_sampler` + +An instantiation of AbstractDocumentSampler. +
+`num_candidates` + +An integer representing the size of the candidate_set. +
+`slate_size` + +An integer representing the slate size. +
+`candidate_set` + +An instantiation of CandidateSet. +
+`num_clusters` + +An integer representing the number of document clusters. +
## Methods @@ -92,27 +142,44 @@ Initializes a new simulation environment. View source -```python -reset() -``` + Resets the environment and return the first observation. -#### Returns: - -* `user_obs`: An array of floats representing observations of the - user's current state -* `doc_obs`: An OrderedDict of document observations keyed by document - ids + + + + + + + + + + + + + +
Returns
+`user_obs` + +An array of floats representing observations of the user's +current state +
+`doc_obs` + +An OrderedDict of document observations keyed by document ids +

reset_sampler

View source -```python -reset_sampler() -``` + Resets the relevant samplers of documents and user/users. @@ -121,23 +188,65 @@ Resets the relevant samplers of documents and user/users. View source -```python -step(slates) -``` + Executes the action, returns next state observation and reward. -#### Args: - -* `slates`: A list of slates, where each slate is an integer array of - size slate_size, where each element is an index into the set of - current_documents presented - -#### Returns: + + + + + + + + + + +
Args
+`slates` + +A list of slates, where each slate is an integer array of size +slate_size, where each element is an index into the set of +current_documents presented +
-* `user_obs`: A list of gym observation representing all users' next - state -* `doc_obs`: A list of observations of the documents -* `responses`: A list of AbstractResponse objects for each item in the - slate -* `done`: A boolean indicating whether the episode has terminated + + + + + + + + + + + + + + + + + + + +
Returns
+`user_obs` + +A list of gym observation representing all users' next state +
+`doc_obs` + +A list of observations of the documents +
+`responses` + +A list of AbstractResponse objects for each item in the slate +
+`done` + +A boolean indicating whether the episode has terminated +
diff --git a/docs/api_docs/python/recsim/simulator/recsim_gym.md b/docs/api_docs/python/recsim/simulator/recsim_gym.md index 6e7b3f5..d6a6e1c 100644 --- a/docs/api_docs/python/recsim/simulator/recsim_gym.md +++ b/docs/api_docs/python/recsim/simulator/recsim_gym.md @@ -5,7 +5,10 @@ # Module: recsim.simulator.recsim_gym + + +
View diff --git a/docs/api_docs/python/recsim/simulator/recsim_gym/RecSimGymEnv.md b/docs/api_docs/python/recsim/simulator/recsim_gym/RecSimGymEnv.md index 3673757..7379190 100644 --- a/docs/api_docs/python/recsim/simulator/recsim_gym/RecSimGymEnv.md +++ b/docs/api_docs/python/recsim/simulator/recsim_gym/RecSimGymEnv.md @@ -1,11 +1,6 @@
- - - - - @@ -21,112 +16,125 @@ +
# recsim.simulator.recsim_gym.RecSimGymEnv - + +
View source -## Class `RecSimGymEnv` - - Class to wrap recommender system environment to gym.Env. - - -#### Attributes: - -* `game_over`: A boolean indicating whether the current game has - finished -* `action_space`: A gym.spaces object that specifies the space for - possible actions. -* `observation_space`: A gym.spaces object that specifies the space for - possible observations. - -

__init__

- -View -source - -```python -__init__( - raw_environment, - reward_aggregator, + -

action_space

- -Returns the action space of the environment. - -Each action is a vector that specified document slate. Each element in the -vector corresponds to the index of the document in the candidate set. - -

environment

+ + + + + + + + + + + + + + + + + + + + +
+`raw_environment` + +A recsim recommender system environment. +
+`reward_aggregator` + +A function mapping a list of responses to a number. +
+`metrics_aggregator` + +A function aggregating metrics over all steps given +responses and response_names. +
+`metrics_writer` + +A function writing final metrics to TensorBoard. +
+ + + + + + + + + + + + + + + + + + + + + + +
+`game_over` + +A boolean indicating whether the current game has finished +
+`action_space` + +A gym.spaces object that specifies the space for possible +actions. +
+`observation_space` + +A gym.spaces object that specifies the space for possible +observations. +
+`environment` + Returns the recsim recommender system environment. - -

game_over

- -

observation_space

- -Returns the observation space of the environment. - -Each observation is a dictionary with three keys `user`, `doc` and `response` -that includes observation about user state, document and user response, -respectively. - -

unwrapped

- +
+`unwrapped` + Completely unwrap this env. - -#### Returns: - -* `gym.Env`: The base non-wrapped gym.Env instance +
## Methods -

__enter__

- -```python -__enter__() -``` - -

__exit__

- -```python -__exit__(*args) -``` -

close

View source -```python -close() -``` + Override close in your subclass to perform any necessary cleanup. @@ -138,18 +146,20 @@ when the program exits. View source -```python -extract_env_info() -``` +

render

View source -```python -render(mode='human') -``` + Renders the environment. @@ -170,9 +180,18 @@ Make sure that your class's metadata 'render.modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method. -#### Args: + + + + + + + + +
Args
mode (str): the mode to render with +
#### Example: @@ -191,24 +210,33 @@ class MyEnv(Env): metadata = {'render.modes': ['human', 'rgb_array']} View source -```python -reset() -``` + Resets the state of the environment and returns an initial observation. -#### Returns: + + + + + + + + +
Returns
observation (object): the initial observation. +

reset_metrics

View source -```python -reset_metrics() -``` + Resets every metric to zero. @@ -220,18 +248,20 @@ reset() gets called for every episode. View source -```python -reset_sampler() -``` +

seed

View source -```python -seed(seed=None) -``` + Sets the seed for this env's random number generator(s). @@ -241,21 +271,33 @@ Some environments use multiple pseudorandom number generators. We want to capture all such seeds used in order to ensure that there aren't accidental correlations between multiple generators. -#### Returns: + + + + + + + + -list: Returns the list of seeds used in this env's random number -generators. The first value in the list should be the "main" seed, or the value -which a reproducer should pass to 'seed'. Often, the main seed equals the -provided 'seed', but this won't be true if seed=None, for example. +
Returns
+list: Returns the list of seeds used in this env's random +number generators. The first value in the list should be the +"main" seed, or the value which a reproducer should pass to +'seed'. Often, the main seed equals the provided 'seed', but +this won't be true if seed=None, for example. +

step

View source -```python -step(action) -``` + Runs one timestep of the environment's dynamics. @@ -263,31 +305,51 @@ When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state. Accepts an action and returns a tuple (observation, reward, done, info). -#### Args: + + + + + + + -#### Returns: +
Args
action (object): An action provided by the environment +
-A four-tuple of (observation, reward, done, info) where: observation (object): -agent's observation that include 1. User's state features 2. Document's -observation 3. Observation about user's slate responses. reward (float) : The -amount of reward returned after previous action done (boolean): Whether the -episode has ended, in which case further step() calls will return undefined -results info (dict): Contains responses for the full slate for + + + + + + + + + +
Returns
+A four-tuple of (observation, reward, done, info) where: +observation (object): agent's observation that include +1. User's state features +2. Document's observation +3. Observation about user's slate responses. +reward (float) : The amount of reward returned after previous action +done (boolean): Whether the episode has ended, in which case further +step() calls will return undefined results +info (dict): Contains responses for the full slate for debugging/learning. +

update_metrics

View source -```python -update_metrics( - responses, - info=None + Updates metrics with one step responses. @@ -296,13 +358,30 @@ Updates metrics with one step responses. View source -```python -write_metrics(add_summary_fn) -``` + Writes metrics to TensorBoard by calling add_summary_fn. -## Class Members +

__enter__

+ + + +

__exit__

+ + + +## Class Variables * `metadata` * `reward_range` +* `spec = None` diff --git a/docs/api_docs/python/recsim/simulator/runner_lib.md b/docs/api_docs/python/recsim/simulator/runner_lib.md index 2e1ea52..affd243 100644 --- a/docs/api_docs/python/recsim/simulator/runner_lib.md +++ b/docs/api_docs/python/recsim/simulator/runner_lib.md @@ -1,12 +1,14 @@
-
# Module: recsim.simulator.runner_lib + + +
View @@ -27,9 +29,8 @@ that handles running the training. ## Functions +[`FLAGS(...)`](../../recsim/environments/interest_evolution/FLAGS.md): Registry +of 'Flag' objects. + [`load_gin_configs(...)`](../../recsim/simulator/runner_lib/load_gin_configs.md): Loads gin configuration files. - -## Other Members - -* `FLAGS` diff --git a/docs/api_docs/python/recsim/simulator/runner_lib/EvalRunner.md b/docs/api_docs/python/recsim/simulator/runner_lib/EvalRunner.md index 47f2ff4..0288a08 100644 --- a/docs/api_docs/python/recsim/simulator/runner_lib/EvalRunner.md +++ b/docs/api_docs/python/recsim/simulator/runner_lib/EvalRunner.md @@ -7,49 +7,83 @@ # recsim.simulator.runner_lib.EvalRunner - + +
View source -## Class `EvalRunner` - - Object that handles running the evaluation. Inherits From: [`Runner`](../../../recsim/simulator/runner_lib/Runner.md) - - -See main.py for a simple example to evaluate an agent. - -

__init__

- -```python -__init__( - *args, - **kwargs + -Initializes the Runner object in charge of running a full experiment. + -#### Args: +See main.py for a simple example to evaluate an agent. -* `base_dir`: str, the base directory to host all required - sub-directories. -* `create_agent_fn`: A function that takes as args a Tensorflow session - and an environment, and returns an agent. -* `env`: A Gym environment for running the experiments. -* `episode_log_file`: Path to output simulated episodes in - tf.SequenceExample. Disable logging if episode_log_file is an empty string. -* `checkpoint_file_prefix`: str, the prefix to use for checkpoint - files. -* `max_steps_per_episode`: int, maximum number of steps after which an - episode terminates. + + + + + + + + + + + + + + + + + + + + + + + + + +
+`base_dir` + +str, the base directory to host all required sub-directories. +
+`create_agent_fn` + +A function that takes as args a Tensorflow session and an +environment, and returns an agent. +
+`env` + +A Gym environment for running the experiments. +
+`episode_log_file` + +Path to output simulated episodes in tf.SequenceExample. +Disable logging if episode_log_file is an empty string. +
+`checkpoint_file_prefix` + +str, the prefix to use for checkpoint files. +
+`max_steps_per_episode` + +int, maximum number of steps after which an episode +terminates. +
## Methods @@ -58,8 +92,8 @@ Initializes the Runner object in charge of running a full experiment. View source -```python -run_experiment() -``` + Runs a full experiment, spread over multiple iterations. diff --git a/docs/api_docs/python/recsim/simulator/runner_lib/Runner.md b/docs/api_docs/python/recsim/simulator/runner_lib/Runner.md index 74dff31..364138a 100644 --- a/docs/api_docs/python/recsim/simulator/runner_lib/Runner.md +++ b/docs/api_docs/python/recsim/simulator/runner_lib/Runner.md @@ -6,46 +6,80 @@ # recsim.simulator.runner_lib.Runner - + +
View source -## Class `Runner` - - Object that handles running experiments. + + Here we use the term 'experiment' to mean simulating interactions between the agent and the environment and reporting some statistics pertaining to these interactions. -

__init__

+ -```python -__init__( - *args, - **kwargs -) -``` - -Initializes the Runner object in charge of running a full experiment. - -#### Args: - -* `base_dir`: str, the base directory to host all required - sub-directories. -* `create_agent_fn`: A function that takes as args a Tensorflow session - and an environment, and returns an agent. -* `env`: A Gym environment for running the experiments. -* `episode_log_file`: Path to output simulated episodes in - tf.SequenceExample. Disable logging if episode_log_file is an empty string. -* `checkpoint_file_prefix`: str, the prefix to use for checkpoint - files. -* `max_steps_per_episode`: int, maximum number of steps after which an - episode terminates. + + + + + + + + + + + + + + + + + + + + + + + +
+`base_dir` + +str, the base directory to host all required sub-directories. +
+`create_agent_fn` + +A function that takes as args a Tensorflow session and an +environment, and returns an agent. +
+`env` + +A Gym environment for running the experiments. +
+`episode_log_file` + +Path to output simulated episodes in tf.SequenceExample. +Disable logging if episode_log_file is an empty string. +
+`checkpoint_file_prefix` + +str, the prefix to use for checkpoint files. +
+`max_steps_per_episode` + +int, maximum number of steps after which an episode +terminates. +
diff --git a/docs/api_docs/python/recsim/simulator/runner_lib/TrainRunner.md b/docs/api_docs/python/recsim/simulator/runner_lib/TrainRunner.md index 80f6052..628eb79 100644 --- a/docs/api_docs/python/recsim/simulator/runner_lib/TrainRunner.md +++ b/docs/api_docs/python/recsim/simulator/runner_lib/TrainRunner.md @@ -7,49 +7,82 @@ # recsim.simulator.runner_lib.TrainRunner - + +
View source -## Class `TrainRunner` - - Object that handles running the training. Inherits From: [`Runner`](../../../recsim/simulator/runner_lib/Runner.md) - - -See main.py for a simple example to train an agent. - -

__init__

- -```python -__init__( - *args, - **kwargs + -Initializes the Runner object in charge of running a full experiment. + -#### Args: +See main.py for a simple example to train an agent. -* `base_dir`: str, the base directory to host all required - sub-directories. -* `create_agent_fn`: A function that takes as args a Tensorflow session - and an environment, and returns an agent. -* `env`: A Gym environment for running the experiments. -* `episode_log_file`: Path to output simulated episodes in - tf.SequenceExample. Disable logging if episode_log_file is an empty string. -* `checkpoint_file_prefix`: str, the prefix to use for checkpoint - files. -* `max_steps_per_episode`: int, maximum number of steps after which an - episode terminates. + + + + + + + + + + + + + + + + + + + + + + + + + +
+`base_dir` + +str, the base directory to host all required sub-directories. +
+`create_agent_fn` + +A function that takes as args a Tensorflow session and an +environment, and returns an agent. +
+`env` + +A Gym environment for running the experiments. +
+`episode_log_file` + +Path to output simulated episodes in tf.SequenceExample. +Disable logging if episode_log_file is an empty string. +
+`checkpoint_file_prefix` + +str, the prefix to use for checkpoint files. +
+`max_steps_per_episode` + +int, maximum number of steps after which an episode +terminates. +
## Methods @@ -58,8 +91,8 @@ Initializes the Runner object in charge of running a full experiment. View source -```python -run_experiment() -``` + Runs a full experiment, spread over multiple iterations. diff --git a/docs/api_docs/python/recsim/simulator/runner_lib/load_gin_configs.md b/docs/api_docs/python/recsim/simulator/runner_lib/load_gin_configs.md index 1e0ae49..9dc6c62 100644 --- a/docs/api_docs/python/recsim/simulator/runner_lib/load_gin_configs.md +++ b/docs/api_docs/python/recsim/simulator/runner_lib/load_gin_configs.md @@ -5,29 +5,46 @@ # recsim.simulator.runner_lib.load_gin_configs - + +
View source - Loads gin configuration files. -```python -recsim.simulator.runner_lib.load_gin_configs( - gin_files, - gin_bindings + -#### Args: - -* `gin_files`: list, of paths to the gin configuration files for this - experiment. -* `gin_bindings`: list, of gin parameter bindings to override the - values in the config files. + + + + + + + + + + + + + +
+`gin_files` + +list, of paths to the gin configuration files for this +experiment. +
+`gin_bindings` + +list, of gin parameter bindings to override the values in the +config files. +
diff --git a/docs/api_docs/python/recsim/user.md b/docs/api_docs/python/recsim/user.md index 493dbac..8763270 100644 --- a/docs/api_docs/python/recsim/user.md +++ b/docs/api_docs/python/recsim/user.md @@ -5,7 +5,10 @@ # Module: recsim.user + + +
View diff --git a/docs/api_docs/python/recsim/user/AbstractResponse.md b/docs/api_docs/python/recsim/user/AbstractResponse.md index f36e733..d99ebe6 100644 --- a/docs/api_docs/python/recsim/user/AbstractResponse.md +++ b/docs/api_docs/python/recsim/user/AbstractResponse.md @@ -7,17 +7,15 @@ # recsim.user.AbstractResponse - + +
View source -## Class `AbstractResponse` - - Abstract class to model a user response. @@ -29,9 +27,10 @@ Abstract class to model a user response. View source -```python -create_observation() -``` + Creates a tensor observation of this response. @@ -40,9 +39,10 @@ Creates a tensor observation of this response. View source -```python -@staticmethod -response_space() -``` + ArraySpec that defines how a single response is represented. diff --git a/docs/api_docs/python/recsim/user/AbstractUserModel.md b/docs/api_docs/python/recsim/user/AbstractUserModel.md index feb698f..ecca708 100644 --- a/docs/api_docs/python/recsim/user/AbstractUserModel.md +++ b/docs/api_docs/python/recsim/user/AbstractUserModel.md @@ -15,44 +15,57 @@ # recsim.user.AbstractUserModel - + +
View source -## Class `AbstractUserModel` - - Abstract class to represent an encoding of a user's dynamics. - - -

__init__

- -View -source - -```python -__init__( - response_model_ctor, - user_sampler, - slate_size + -#### Args: + -* `response_model_ctor`: A class/constructor representing the type of - responses this model will generate. -* `user_sampler`: An instance of AbstractUserSampler that can generate - initial user states from an inital state distribution. -* `slate_size`: integer number of documents that can be served to the - user at any interaction. + + + + + + + + + + + + + + + + +
+`response_model_ctor` + +A class/constructor representing the type of +responses this model will generate. +
+`user_sampler` + +An instance of AbstractUserSampler that can generate +initial user states from an inital state distribution. +
+`slate_size` + +integer number of documents that can be served to the user at +any interaction. +
## Methods @@ -61,9 +74,9 @@ Initializes a new user model. View source -```python -create_observation() -``` + Emits obesrvation about user's state. @@ -72,9 +85,9 @@ Emits obesrvation about user's state. View source -```python -get_response_model_ctor() -``` + Returns a constructor for the type of response this model will create. @@ -83,9 +96,10 @@ Returns a constructor for the type of response this model will create. View source -```python -is_terminal() -``` + Returns a boolean indicating whether this session is over. @@ -94,9 +108,9 @@ Returns a boolean indicating whether this session is over. View source -```python -observation_space() -``` + A Gym.spaces object that describes possible user observations. @@ -105,9 +119,9 @@ A Gym.spaces object that describes possible user observations. View source -```python -reset() -``` + Resets the user. @@ -116,9 +130,9 @@ Resets the user. View source -```python -reset_sampler() -``` + Resets the sampler. @@ -127,49 +141,91 @@ Resets the sampler. View source -```python -response_space() -``` +

simulate_response

View source -```python -simulate_response(documents) -``` + Simulates the user's response to a slate of documents. This could involve simulating models of attention, as well as random sampling for selection from scored documents. -#### Args: + + + + + -* `documents`: a list of AbstractDocuments + + + + +
Args
+`documents` + +a list of AbstractDocuments +
-#### Returns: + + + + + + + + +
Returns
(response) a list of AbstractResponse objects for each slate item +

update_state

View source -```python -update_state( - slate_documents, - responses + Updates the user's state based on the slate and document selected. -#### Args: + + + + + + + + + + + + + +
Args
+`slate_documents` + +A list of AbstractDocuments for items in the slate. +
+`responses` + +A list of AbstractResponses for each item in the slate. +
-* `slate_documents`: A list of AbstractDocuments for items in the - slate. -* `responses`: A list of AbstractResponses for each item in the slate. - Updates: The user's hidden state. +Updates: The user's hidden state. diff --git a/docs/api_docs/python/recsim/user/AbstractUserSampler.md b/docs/api_docs/python/recsim/user/AbstractUserSampler.md index ee28d8a..bb8aa39 100644 --- a/docs/api_docs/python/recsim/user/AbstractUserSampler.md +++ b/docs/api_docs/python/recsim/user/AbstractUserSampler.md @@ -9,42 +9,47 @@ # recsim.user.AbstractUserSampler - + +
View source -## Class `AbstractUserSampler` - - Abstract class to sample users. - - -

__init__

- -View -source - -```python -__init__( - user_ctor, - seed=0 + -#### Args: - -* `user_ctor`: A class/constructor for the type of user states that - will be sampled. -* `seed`: An integer for a random seed. + + + + + + + + + + + + + + +
+`user_ctor` + +A class/constructor for the type of user states that will be +sampled. +
+`seed` + +An integer for a random seed. +
## Methods @@ -53,9 +58,9 @@ User states of the type user_ctor are sampled. View source -```python -get_user_ctor() -``` + Returns the constructor/class of the user states that will be sampled. @@ -64,17 +69,18 @@ Returns the constructor/class of the user states that will be sampled. View source -```python -reset_sampler() -``` +

sample_user

View source -```python -sample_user() -``` + Creates a new instantiation of this user's hidden state parameters. diff --git a/docs/api_docs/python/recsim/user/AbstractUserState.md b/docs/api_docs/python/recsim/user/AbstractUserState.md index 0948bbc..cbab8a7 100644 --- a/docs/api_docs/python/recsim/user/AbstractUserState.md +++ b/docs/api_docs/python/recsim/user/AbstractUserState.md @@ -3,21 +3,20 @@ +
# recsim.user.AbstractUserState - + +
View source -## Class `AbstractUserState` - - Abstract class to represent a user's state. @@ -29,28 +28,42 @@ Abstract class to represent a user's state. View source -```python -create_observation() -``` + Generates obs of underlying state to simulate partial observability. -#### Returns: + + + + + -* `obs`: A float array of the observed user features. + + + + +
Returns
+`obs` + +A float array of the observed user features. +

observation_space

View source -```python -@staticmethod -observation_space() -``` + Gym.spaces object that defines how user states are represented. +## Class Variables - - +* `NUM_FEATURES = None` diff --git a/docs/api_docs/python/recsim/utils.md b/docs/api_docs/python/recsim/utils.md index 7a46883..390a557 100644 --- a/docs/api_docs/python/recsim/utils.md +++ b/docs/api_docs/python/recsim/utils.md @@ -5,7 +5,10 @@ # Module: recsim.utils + + +
View diff --git a/docs/api_docs/python/recsim/utils/aggregate_video_cluster_metrics.md b/docs/api_docs/python/recsim/utils/aggregate_video_cluster_metrics.md index 0cb76de..87eefb3 100644 --- a/docs/api_docs/python/recsim/utils/aggregate_video_cluster_metrics.md +++ b/docs/api_docs/python/recsim/utils/aggregate_video_cluster_metrics.md @@ -5,34 +5,64 @@ # recsim.utils.aggregate_video_cluster_metrics - + +
View source - Aggregates the video cluster metrics with one step responses. -```python -recsim.utils.aggregate_video_cluster_metrics( - responses, - metrics, - info=None + -#### Args: + + + + + -* `responses`: a dictionary of names, observed responses. -* `metrics`: A dictionary mapping from metric_name to its value in - float. -* `info`: Additional info for computing metrics (ignored here) + + + + + + + + + + +
+`responses` + +a dictionary of names, observed responses. +
+`metrics` + +A dictionary mapping from metric_name to its value in float. +
+`info` + +Additional info for computing metrics (ignored here) +
-#### Returns: + + + + + + + + +
A dictionary storing metrics after aggregation. +
diff --git a/docs/api_docs/python/recsim/utils/write_video_cluster_metrics.md b/docs/api_docs/python/recsim/utils/write_video_cluster_metrics.md index 53aeadb..c845cee 100644 --- a/docs/api_docs/python/recsim/utils/write_video_cluster_metrics.md +++ b/docs/api_docs/python/recsim/utils/write_video_cluster_metrics.md @@ -5,23 +5,21 @@ # recsim.utils.write_video_cluster_metrics - + +
View source - - Writes average video cluster metrics using add_summary_fn. -```python -recsim.utils.write_video_cluster_metrics( - metrics, - add_summary_fn + diff --git a/recsim/agents/slate_decomp_q_agent.py b/recsim/agents/slate_decomp_q_agent.py index f289f79..0a6d9cc 100644 --- a/recsim/agents/slate_decomp_q_agent.py +++ b/recsim/agents/slate_decomp_q_agent.py @@ -1,5 +1,4 @@ # coding=utf-8 -# coding=utf-8 # Copyright 2019 The RecSim Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -172,7 +171,7 @@ def set_element(v, i, x): numerator = numerator + tf.gather(s * q, k) denominator = denominator + tf.gather(s, k) - output_slate = tf.compat.v1.where(tf.equal(mask, 0)) + output_slate = tf.where(tf.equal(mask, 0)) return output_slate @@ -352,13 +351,13 @@ def compute_target_topk_q(reward, gamma, next_actions, next_q_values, # Get the expected Q-value of the slate containing top-K items. # [batch_size, slate_size] - next_q_values_selected = tf.compat.v1.batch_gather( + next_q_values_selected = tf.batch_gather( next_q_values, tf.cast(topk_optimal_slate, dtype=tf.int32)) # Get normalized affinity scores on the slate. # [batch_size, slate_size] - scores_selected = tf.compat.v1.batch_gather( - scores, tf.cast(topk_optimal_slate, dtype=tf.int32)) + scores_selected = tf.batch_gather(scores, + tf.cast(topk_optimal_slate, dtype=tf.int32)) next_q_target_topk = tf.reduce_sum( input_tensor=next_q_values_selected * scores_selected, axis=1) / ( @@ -475,9 +474,9 @@ def __init__(self, abstract_agent.AbstractEpisodicRecommenderAgent.__init__(self, action_space) # The doc score is a [num_candidates] vector. - self._doc_affinity_scores_ph = tf.compat.v1.placeholder( + self._doc_affinity_scores_ph = tf.placeholder( tf.float32, (self._num_candidates,), name='doc_affinity_scores_ph') - self._prob_no_click_ph = tf.compat.v1.placeholder( + self._prob_no_click_ph = tf.placeholder( tf.float32, (), name='prob_no_click_ph') self._select_slate_fn = select_slate_fn @@ -496,7 +495,7 @@ def __init__(self, def _network_adapter(self, states, scope): self._validate_states(states) - with tf.compat.v1.name_scope('network'): + with tf.name_scope('network'): # Since we decompose the slate optimization into an item-level # optimization problem, the observation space is the user state # observation plus all documents' observations. In the Dopamine DQN agent @@ -513,7 +512,7 @@ def _network_adapter(self, states, scope): return dqn_agent.DQNNetworkType(q_values) def _build_networks(self): - with tf.compat.v1.name_scope('networks'): + with tf.name_scope('networks'): self._replay_net_outputs = self._network_adapter(self._replay.states, 'Online') self._replay_next_target_net_outputs = self._network_adapter( @@ -533,7 +532,7 @@ def _build_train_op(self): # slate_q_values: [B, S] # replay_click_q: [B] click_indicator = self._replay.rewards[:, :, self._click_response_index] - slate_q_values = tf.compat.v1.batch_gather( + slate_q_values = tf.batch_gather( self._replay_net_outputs.q_values, tf.cast(self._replay.actions, dtype=tf.int32)) # Only get the Q from the clicked document. @@ -545,8 +544,7 @@ def _build_train_op(self): target = tf.stop_gradient(self._build_target_q_op()) clicked = tf.reduce_sum(input_tensor=click_indicator, axis=1) - clicked_indices = tf.squeeze( - tf.compat.v1.where(tf.equal(clicked, 1)), axis=1) + clicked_indices = tf.squeeze(tf.where(tf.equal(clicked, 1)), axis=1) # clicked_indices is a vector and tf.gather selects the batch dimension. q_clicked = tf.gather(replay_click_q, clicked_indices) target_clicked = tf.gather(target, clicked_indices) @@ -554,8 +552,8 @@ def _build_train_op(self): def get_train_op(): loss = tf.reduce_mean(input_tensor=tf.square(q_clicked - target_clicked)) if self.summary_writer is not None: - with tf.compat.v1.variable_scope('Losses'): - tf.compat.v1.summary.scalar('Loss', loss) + with tf.variable_scope('Losses'): + tf.summary.scalar('Loss', loss) return loss @@ -613,25 +611,24 @@ def _build_select_slate_op(self): p_no_click = self._prob_no_click_ph p = self._doc_affinity_scores_ph q = self._net_outputs.q_values[0] - with tf.compat.v1.name_scope('select_slate'): + with tf.name_scope('select_slate'): self._output_slate = self._select_slate_fn(self._slate_size, p_no_click, p, q) - self._output_slate = tf.compat.v1.Print( + self._output_slate = tf.Print( self._output_slate, [tf.constant('cp 1'), self._output_slate, p, q], summarize=10000) self._output_slate = tf.reshape(self._output_slate, (self._slate_size,)) - self._action_counts = tf.compat.v1.get_variable( + self._action_counts = tf.get_variable( 'action_counts', shape=[self._num_candidates], - initializer=tf.compat.v1.zeros_initializer()) + initializer=tf.zeros_initializer()) output_slate = tf.reshape(self._output_slate, [-1]) output_one_hot = tf.one_hot(output_slate, self._num_candidates) update_ops = [] for i in range(self._slate_size): - update_ops.append( - tf.compat.v1.assign_add(self._action_counts, output_one_hot[i])) + update_ops.append(tf.assign_add(self._action_counts, output_one_hot[i])) self._select_action_update_op = tf.group(*update_ops) def _select_action(self): @@ -660,7 +657,7 @@ def _select_action(self): observation = self._raw_observation user_obs = observation['user'] doc_obs = np.array(list(observation['doc'].values())) - tf.compat.v1.logging.debug('cp 1: %s, %s', doc_obs, observation) + tf.logging.debug('cp 1: %s, %s', doc_obs, observation) # TODO(cwhsu): Use score_documents_tf() and remove score_documents(). scores, score_no_click = score_documents(user_obs, doc_obs) output_slate, _ = self._sess.run( @@ -697,8 +694,8 @@ def _build_replay_buffer(self, use_staging): def _add_summary(self, tag, value): if self.summary_writer: - summary = tf.compat.v1.Summary( - value=[tf.compat.v1.Summary.Value(tag=tag, simple_value=value)]) + summary = tf.Summary( + value=[tf.Summary.Value(tag=tag, simple_value=value)]) self.summary_writer.add_summary(summary, self.training_steps) def begin_episode(self, observation): diff --git a/recsim/environments/interest_exploration.py b/recsim/environments/interest_exploration.py index 9d6885a..c8aeca1 100644 --- a/recsim/environments/interest_exploration.py +++ b/recsim/environments/interest_exploration.py @@ -71,7 +71,6 @@ class IEUserModel(user.AbstractUserModel): Args: slate_size: An integer representing the size of the slate. no_click_mass: A float indicating the mass given to a no-click option. - Must be positive, otherwise CTR is always 1. choice_model_ctor: A contructor function to create user choice model. user_state_ctor: A constructor to create user state. response_model_ctor: A constructor function to create response. The @@ -87,8 +86,6 @@ def __init__(self, user_state_ctor=None, response_model_ctor=None, seed=0): - if no_click_mass < 0: - raise ValueError('no_click_mass must be positive.') super(IEUserModel, self).__init__(response_model_ctor, IEClusterUserSampler( user_ctor=user_state_ctor, seed=seed), slate_size) diff --git a/setup.py b/setup.py index 2035764..10b974f 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ setup( name='recsim', - version='0.2.3', + version='0.2.4', author='The RecSim Team', author_email='no-reply@google.com', description=recsim_description,