diff --git a/README.md b/README.md
index decc491..4115ee4 100644
--- a/README.md
+++ b/README.md
@@ -10,16 +10,33 @@ be easily configured that vary assumptions about: user preferences and item
familiarity; user latent state and its dynamics; and choice models and other
user response behavior. We outline how RecSim offers value to RL and RS
researchers and practitioners, and how it can serve as a vehicle for
-academic-industrial collaboration.
+academic-industrial collaboration. For a detailed description of the RecSim
+architecture please read [Ie et al](https://arxiv.org/abs/1909.04847). Please
+cite the paper if you use the code from this repository in your work.
+
+### Bibtex
+
+```
+@article{ie2019recsim,
+ title={RecSim: A Configurable Simulation Platform for Recommender Systems},
+ author={Eugene Ie and Chih-wei Hsu and Martin Mladenov and Vihan Jain and Sanmit Narvekar and Jing Wang and Rui Wu and Craig Boutilier},
+ year={2019},
+ eprint={1909.04847},
+ archivePrefix={arXiv},
+ primaryClass={cs.LG}
+}
+```
+
## Disclaimer
This is not an officially supported Google product.
## What's new
-* **12/13/2019:** Added (abstract) classes for both multi-user environments and
- agents. Added bandit algorithms for generalized linear models.
+
+* **12/13/2019:** Added (abstract) classes for both multi-user environments
+ and agents. Added bandit algorithms for generalized linear models.
## Installation and Sample Usage
@@ -64,10 +81,12 @@ You could also find the simulated logs in /tmp/recsim/episode_logs.tfrecord
## Tutorials
-To get started, please check out our Colab tutorials. In [**RecSim:
-Overview**](recsim/colab/RecSim_Overview.ipynb), we give a brief overview about
-RecSim. We then talk about each configurable component:
-[**environment**](recsim/colab/RecSim_Developing_an_Environment.ipynb) and
+To get started, please check out our Colab tutorials. In
+[**RecSim: Overview**](recsim/colab/RecSim_Overview.ipynb),
+we give a brief overview about RecSim. We then talk about each configurable
+component:
+[**environment**](recsim/colab/RecSim_Developing_an_Environment.ipynb)
+and
[**recommender agent**](recsim/colab/RecSim_Developing_an_Agent.ipynb).
## Documentation
diff --git a/docs/api_docs/python/_redirects.yaml b/docs/api_docs/python/_redirects.yaml
index ebf9185..2bf1a1e 100644
--- a/docs/api_docs/python/_redirects.yaml
+++ b/docs/api_docs/python/_redirects.yaml
@@ -1,3 +1,9 @@
redirects:
+- from: /recsim/api_docs/python/recsim/environments/interest_exploration/FLAGS
+ to: /recsim/api_docs/python/recsim/environments/interest_evolution/FLAGS
+- from: /recsim/api_docs/python/recsim/environments/long_term_satisfaction/FLAGS
+ to: /recsim/api_docs/python/recsim/environments/interest_evolution/FLAGS
- from: /recsim/api_docs/python/recsim/simulator/environment/SingleUserEnvironment
to: /recsim/api_docs/python/recsim/simulator/environment/Environment
+- from: /recsim/api_docs/python/recsim/simulator/runner_lib/FLAGS
+ to: /recsim/api_docs/python/recsim/environments/interest_evolution/FLAGS
diff --git a/docs/api_docs/python/_toc.yaml b/docs/api_docs/python/_toc.yaml
index 15d1b10..8ef5af0 100644
--- a/docs/api_docs/python/_toc.yaml
+++ b/docs/api_docs/python/_toc.yaml
@@ -201,6 +201,8 @@ toc:
path: /recsim/api_docs/python/recsim/environments/interest_evolution/clicked_watchtime_reward
- title: create_environment
path: /recsim/api_docs/python/recsim/environments/interest_evolution/create_environment
+ - title: FLAGS
+ path: /recsim/api_docs/python/recsim/environments/interest_evolution/FLAGS
- title: IEvResponse
path: /recsim/api_docs/python/recsim/environments/interest_evolution/IEvResponse
- title: IEvUserDistributionSampler
diff --git a/docs/api_docs/python/index.md b/docs/api_docs/python/index.md
index 58f1905..d0ecd1b 100644
--- a/docs/api_docs/python/index.md
+++ b/docs/api_docs/python/index.md
@@ -77,6 +77,7 @@
* recsim.document.CandidateSet
* recsim.environments
* recsim.environments.interest_evolution
+* recsim.environments.interest_evolution.FLAGS
* recsim.environments.interest_evolution.IEvResponse
* recsim.environments.interest_evolution.IEvUserDistributionSampler
* recsim.environments.interest_evolution.IEvUserModel
@@ -89,6 +90,7 @@
* recsim.environments.interest_evolution.create_environment
* recsim.environments.interest_evolution.total_clicks_reward
* recsim.environments.interest_exploration
+* recsim.environments.interest_exploration.FLAGS
* recsim.environments.interest_exploration.IEClusterUserSampler
* recsim.environments.interest_exploration.IEDocument
* recsim.environments.interest_exploration.IEResponse
@@ -98,6 +100,7 @@
* recsim.environments.interest_exploration.create_environment
* recsim.environments.interest_exploration.total_clicks_reward
* recsim.environments.long_term_satisfaction
+* recsim.environments.long_term_satisfaction.FLAGS
* recsim.environments.long_term_satisfaction.LTSDocument
* recsim.environments.long_term_satisfaction.LTSDocumentSampler
* recsim.environments.long_term_satisfaction.LTSResponse
@@ -116,6 +119,7 @@
* recsim.simulator.recsim_gym.RecSimGymEnv
* recsim.simulator.runner_lib
* recsim.simulator.runner_lib.EvalRunner
+* recsim.simulator.runner_lib.FLAGS
* recsim.simulator.runner_lib.Runner
* recsim.simulator.runner_lib.TrainRunner
* recsim.simulator.runner_lib.load_gin_configs
diff --git a/docs/api_docs/python/recsim.md b/docs/api_docs/python/recsim.md
index 6b09e9f..fa6dc20 100644
--- a/docs/api_docs/python/recsim.md
+++ b/docs/api_docs/python/recsim.md
@@ -5,7 +5,10 @@
# Module: recsim
+
+
__init__
+recsim.agent.AbstractEpisodicRecommenderAgent(
+ action_space, summary_writer=None
)
-```
+
-Initializes AbstractEpisodicRecommenderAgent.
-
-#### Args:
+
-* `action_space`: A gym.spaces object that specifies the format of
- actions.
-* `summary_writer`: A Tensorflow summary writer to pass to the agent
- for in-agent training statistics in Tensorboard.
+
+
+ Args | |
---|---|
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
+`summary_writer` + | ++A Tensorflow summary writer to pass to the agent +for in-agent training statistics in Tensorboard. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+begin_episode(
+ observation=None
+)
+
Returns the agent's first action for this episode.
-#### Args:
+
-* `observation`: numpy array, the environment's initial observation.
+ Args | |
---|---|
+`observation` + | ++numpy array, the environment's initial observation. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
-#### Args:
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
Returns | |
---|---|
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation=None
)
-```
+
Signals the end of the episode to the agent.
-#### Args:
-
-* `reward`: An float that is the last reward from the environment.
-* `observation`: numpy array that represents the last observation of
- the episode.
+
+
+ Args | |
---|---|
+`reward` + | ++An float that is the last reward from the environment. + | +
+`observation` + | ++numpy array that represents the last observation of the +episode. + | +
step
+Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations. - -#### Returns: + + +@abc.abstractmethod
+step( + reward, observation ) -``` +
Args | |
---|---|
+`reward` + | ++The reward received from the agent's most recent action as a +float. + | +
+`observation` + | ++A dictionary that includes the most recent observations. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
-#### Args:
-
-* `checkpoint_dir`: A string that represents the path to the checkpoint
- and is used when we save TensorFlow objects by tf.Save.
-* `iteration_number`: An integer that represents the checkpoint version
- and is used when restoring replay buffer.
-* `bundle_dict`: A dict containing additional Python objects owned by
- the agent. Each key is an object name and the value is the actual object.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
__init__
+recsim.agent.AbstractHierarchicalAgentLayer(
+ action_space, *base_agent_ctors
)
-```
+
+
+
-Initializes AbstractRecommenderAgent.
+
-#### Args:
+ Args | |
---|---|
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+begin_episode(
+ observation=None
+)
+
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
-#### Args:
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string for the directory where objects will be saved. + | +
+`iteration_number` + | ++An integer of iteration number to use for naming the +checkpoint file. + | +
Returns | |
---|---|
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation
)
-```
+
step
+Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations. - -#### Returns: + + +@abc.abstractmethod
+step( + reward, observation ) -``` +
Args | |
---|---|
+`reward` + | ++The reward received from the agent's most recent action as a +float. + | +
+`observation` + | ++A dictionary that includes the most recent observations. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
-#### Args:
-
-* `checkpoint_dir`: A string that represents the path to the checkpoint
- saved by tf.Save.
-* `iteration_number`: An integer that represents the checkpoint version
- and is used when restoring replay buffer.
-* `bundle_dict`: A dict containing additional Python objects owned by
- the agent. Each key is an object name and the value is the actual object.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint saved +by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
__init__
+recsim.agent.AbstractMultiUserEpisodicRecommenderAgent(
+ action_space
+)
+
-```python
-__init__(action_space)
-```
+
-Initializes AbstractMultiUserEpisodicRecommenderAgent.
+
-#### Args:
+ Args | |
---|---|
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+begin_episode(
+ observation=None
+)
+
Returns the agent's first action for this episode.
-#### Args:
+
-* `observation`: numpy array, the environment's initial observation.
+ Args | |
---|---|
+`observation` + | ++numpy array, the environment's initial observation. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
-#### Args:
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
Returns | |
---|---|
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation=None
)
-```
+
Signals the end of the episode to the agent.
-#### Args:
-
-* `reward`: An float that is the last reward from the environment.
-* `observation`: numpy array that represents the last observation of
- the episode.
+
+
+ Args | |
---|---|
+`reward` + | ++An float that is the last reward from the environment. + | +
+`observation` + | ++numpy array that represents the last observation of the +episode. + | +
step
+Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations. - -#### Returns: + + +@abc.abstractmethod
+step( + reward, observation ) -``` +
Args | |
---|---|
+`reward` + | ++The reward received from the agent's most recent action as a +float. + | +
+`observation` + | ++A dictionary that includes the most recent observations. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
-#### Args:
-
-* `checkpoint_dir`: A string that represents the path to the checkpoint
- and is used when we save TensorFlow objects by tf.Save.
-* `iteration_number`: An integer that represents the checkpoint version
- and is used when restoring replay buffer.
-* `bundle_dict`: A dict containing additional Python objects owned by
- the agent. Each key is an object name and the value is the actual object.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
__init__
+recsim.agent.AbstractRecommenderAgent(
+ action_space
+)
+
-```python
-__init__(action_space)
-```
+
-Initializes AbstractRecommenderAgent.
+
-#### Args:
+ Args | |
---|---|
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+Returns a self-contained bundle of the agent's state. @@ -67,60 +80,115 @@ This is used for checkpointing. It will return a dictionary containing all non-TensorFlow objects (to be saved into a file by the caller), and it saves all TensorFlow objects into a checkpoint file. -#### Args: + + +@abc.abstractmethod
+bundle_and_checkpoint( + checkpoint_dir, iteration_number ) -``` +
Args | |
---|---|
+`checkpoint_dir` + | ++A string for the directory where objects will be saved. + | +
+`iteration_number` + | ++An integer of iteration number to use for naming the +checkpoint file. + | +
Returns | |
---|---|
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
step
+Records the most recent transition and returns the agent's next action. We store the observation of the last time step since we want to store it with the reward. -#### Args: - -* `reward`: The reward received from the agent's most recent action as - a float. -* `observation`: A dictionary that includes the most recent - observations. - -#### Returns: + + +@abc.abstractmethod
+step( + reward, observation ) -``` +
Args | |
---|---|
+`reward` + | ++The reward received from the agent's most recent action as a +float. + | +
+`observation` + | ++A dictionary that includes the most recent observations. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
unbundle
+Restores the agent from a checkpoint. @@ -128,15 +196,48 @@ Restores the agent's Python objects to those specified in bundle_dict, and restores the TensorFlow objects to those specified in the checkpoint_dir. If the checkpoint_dir does not exist, will not reset the agent's state. -#### Args: - -* `checkpoint_dir`: A string that represents the path to the checkpoint - saved by tf.Save. -* `iteration_number`: An integer that represents the checkpoint version - and is used when restoring replay buffer. -* `bundle_dict`: A dict containing additional Python objects owned by - the agent. Each key is an object name and the value is the actual object. + + +@abc.abstractmethod
+unbundle( + checkpoint_dir, iteration_number, bundle_dict ) -``` +
Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint saved +by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
+recsim.agents.agent_utils.GymSpaceWalker(
+ gym_space, leaf_op
+)
+
+
Gym spaces have nested structure in terms of container spaces (e.g. Dict and
@@ -30,26 +34,18 @@ the proces. E.g., given a gym space of the form Tuple((Box(1), Box(1)) and a
leaf operator f, this class can is used to transform an observation (a, b) to
[f(a), f(b)].
-#### Args:
+
+
+ Args |
---|
__init__
apply_and_flatten
+apply_and_flatten(
+ gym_observations
+)
+
diff --git a/docs/api_docs/python/recsim/agents/agent_utils/epsilon_greedy_exploration.md b/docs/api_docs/python/recsim/agents/agent_utils/epsilon_greedy_exploration.md
index a7ec774..9395afe 100644
--- a/docs/api_docs/python/recsim/agents/agent_utils/epsilon_greedy_exploration.md
+++ b/docs/api_docs/python/recsim/agents/agent_utils/epsilon_greedy_exploration.md
@@ -5,30 +5,27 @@
# recsim.agents.agent_utils.epsilon_greedy_exploration
-
+
+recsim.agents.agent_utils.epsilon_greedy_exploration(
+ state_action_iterator, q_function, epsilon
)
-```
+
Either picks a slate uniformly at random with probability epsilon, or returns a
-slate with maximal Q-value. TODO(mmladenov): more verbose doc. Args:
-state_action_iterator: an iterator over slate, state_action_index tuples.
-q_function: a container holding Q-values of state-action pairs. epsilon:
-probability of random action. Returns: slate: the picked slate. sa_index: the
-index of the picked slate in the Q-value table.
+slate with maximal Q-value. Args: state_action_iterator: an iterator over slate,
+state_action_index tuples. q_function: a container holding Q-values of
+state-action pairs. epsilon: probability of random action. Returns: slate: the
+picked slate. sa_index: the index of the picked slate in the Q-value table.
diff --git a/docs/api_docs/python/recsim/agents/agent_utils/min_count_exploration.md b/docs/api_docs/python/recsim/agents/agent_utils/min_count_exploration.md
index 6967b66..2ab2572 100644
--- a/docs/api_docs/python/recsim/agents/agent_utils/min_count_exploration.md
+++ b/docs/api_docs/python/recsim/agents/agent_utils/min_count_exploration.md
@@ -5,23 +5,22 @@
# recsim.agents.agent_utils.min_count_exploration
-
+
+recsim.agents.agent_utils.min_count_exploration(
+ state_action_iterator, counts_function
)
-```
+
diff --git a/docs/api_docs/python/recsim/agents/bandits.md b/docs/api_docs/python/recsim/agents/bandits.md
index 980293a..437f6fc 100644
--- a/docs/api_docs/python/recsim/agents/bandits.md
+++ b/docs/api_docs/python/recsim/agents/bandits.md
@@ -5,7 +5,10 @@
# Module: recsim.agents.bandits
+
+
+recsim.agents.bandits.algorithms.KLUCB(
+ num_arms, params, seed=0
+)
+
+
See "The KL-UCB algorithm for bounded stochastic bandits and beyond" by Garivier
and Cappe.
-__init__
Args | |
---|---|
+`num_arms` + | ++Number of arms. Must be greater than one. + | +
+`params` + | ++A dictionary which includes additional parameters like +optimism_scaling. Default is an empty dictionary. + | +
+`seed` + | ++Random seed for this object. Default is zero. + | +
+get_arm(
+ t
+)
+
get_score
+get_score(
+ t
+)
+
Computes upper confidence bounds of reward / pulls at round t.
@@ -81,31 +98,29 @@ Computes upper confidence bounds of reward / pulls at round t.
View
source
-```python
-@staticmethod
-print()
-```
++@staticmethod
+print() +
set_state
+set_state(
+ pulls, reward
)
-```
+
update
+update(
+ arm, reward
)
-```
+
diff --git a/docs/api_docs/python/recsim/agents/bandits/algorithms/MABAlgorithm.md b/docs/api_docs/python/recsim/agents/bandits/algorithms/MABAlgorithm.md
index 4c47765..54cef67 100644
--- a/docs/api_docs/python/recsim/agents/bandits/algorithms/MABAlgorithm.md
+++ b/docs/api_docs/python/recsim/agents/bandits/algorithms/MABAlgorithm.md
@@ -8,55 +8,97 @@
# recsim.agents.bandits.algorithms.MABAlgorithm
-
+
+recsim.agents.bandits.algorithms.MABAlgorithm(
+ num_arms, params, seed=0
+)
+
+
We implement multi-armed bandit algorithms with confidence width tuning proposed
in Hsu et al. https://arxiv.org/abs/1904.02664.
-#### Attributes:
-
-* `pulls`: A numpy array which counts number of pulls of each arm
-* `reward`: A numpy array which sums up reward of each arm
-* `optimism_scaling`: A float specifying the confidence level. Default
- value (1.0) corresponds to the exploration strategy presented in the
- literature. A smaller number means less exploration and more exploitation.
-* `_rng`: An instance of random.RandomState for random number
- generation
-
-__init__
Args | |
---|---|
+`num_arms` + | ++Number of arms. Must be greater than one. + | +
+`params` + | ++A dictionary which includes additional parameters like +optimism_scaling. Default is an empty dictionary. + | +
+`seed` + | ++Random seed for this object. Default is zero. + | +
Attributes | |
---|---|
+`pulls` + | ++A numpy array which counts number of pulls of each arm + | +
+`reward` + | ++A numpy array which sums up reward of each arm + | +
+`optimism_scaling` + | ++A float specifying the confidence level. Default value +(1.0) corresponds to the exploration strategy presented in the literature. +A smaller number means less exploration and more exploitation. + | +
+`_rng` + | ++An instance of random.RandomState for random number generation + | +
+set_state(
+ pulls, reward
)
-```
+
update
+update(
+ arm, reward
)
-```
+
diff --git a/docs/api_docs/python/recsim/agents/bandits/algorithms/ThompsonSampling.md b/docs/api_docs/python/recsim/agents/bandits/algorithms/ThompsonSampling.md
index 60c2eb7..9412f90 100644
--- a/docs/api_docs/python/recsim/agents/bandits/algorithms/ThompsonSampling.md
+++ b/docs/api_docs/python/recsim/agents/bandits/algorithms/ThompsonSampling.md
@@ -11,47 +11,60 @@
# recsim.agents.bandits.algorithms.ThompsonSampling
-
+
__init__
+recsim.agents.bandits.algorithms.ThompsonSampling(
+ num_arms, params, seed=0
)
-```
+
-Initializes MABAlgorithm.
+
-#### Args:
+See "Further Optimal Regret Bounds for Thompson Sampling" by Agrawal and Goyal.
-* `num_arms`: Number of arms. Must be greater than one.
-* `params`: A dictionary which includes additional parameters like
- optimism_scaling. Default is an empty dictionary.
-* `seed`: Random seed for this object. Default is zero.
+
+
+ Args | |
---|---|
+`num_arms` + | ++Number of arms. Must be greater than one. + | +
+`params` + | ++A dictionary which includes additional parameters like +optimism_scaling. Default is an empty dictionary. + | +
+`seed` + | ++Random seed for this object. Default is zero. + | +
+get_arm(
+ t
+)
+
get_score
+get_score(
+ t
+)
+
Samples scores from the posterior distribution.
@@ -80,31 +97,29 @@ Samples scores from the posterior distribution.
View
source
-```python
-@staticmethod
-print()
-```
++@staticmethod
+print() +
set_state
+set_state(
+ pulls, reward
)
-```
+
update
+update(
+ arm, reward
)
-```
+
diff --git a/docs/api_docs/python/recsim/agents/bandits/algorithms/UCB1.md b/docs/api_docs/python/recsim/agents/bandits/algorithms/UCB1.md
index f3f8e63..663ec53 100644
--- a/docs/api_docs/python/recsim/agents/bandits/algorithms/UCB1.md
+++ b/docs/api_docs/python/recsim/agents/bandits/algorithms/UCB1.md
@@ -11,48 +11,61 @@
# recsim.agents.bandits.algorithms.UCB1
-
+
+recsim.agents.bandits.algorithms.UCB1(
+ num_arms, params, seed=0
+)
+
+
See "Finite-time Analysis of the Multiarmed Bandit Problem" by Auer,
Cesa-Bianchi, and Fischer.
-__init__
Args | |
---|---|
+`num_arms` + | ++Number of arms. Must be greater than one. + | +
+`params` + | ++A dictionary which includes additional parameters like +optimism_scaling. Default is an empty dictionary. + | +
+`seed` + | ++Random seed for this object. Default is zero. + | +
+get_arm(
+ t
+)
+
get_score
+get_score(
+ t
+)
+
Computes upper confidence bounds of reward / pulls at round t.
@@ -81,31 +98,29 @@ Computes upper confidence bounds of reward / pulls at round t.
View
source
-```python
-@staticmethod
-print()
-```
++@staticmethod
+print() +
set_state
+set_state(
+ pulls, reward
)
-```
+
update
+update(
+ arm, reward
)
-```
+
diff --git a/docs/api_docs/python/recsim/agents/cluster_bandit_agent.md b/docs/api_docs/python/recsim/agents/cluster_bandit_agent.md
index 99bdb53..023ca7b 100644
--- a/docs/api_docs/python/recsim/agents/cluster_bandit_agent.md
+++ b/docs/api_docs/python/recsim/agents/cluster_bandit_agent.md
@@ -5,7 +5,10 @@
# Module: recsim.agents.cluster_bandit_agent
+
+
+recsim.agents.cluster_bandit_agent.ClusterBanditAgent(
+ observation_space, action_space, alg_ctor=recsim.agents.bandits.algorithms.UCB1,
+ ci_scaling=1.0, random_seed=0, **kwargs
+)
+
+
This agent assumes no knowledge of user's affinity for each topic but receives
@@ -35,35 +39,73 @@ observations of user's past responses for each topic. When creating a slate, it
utilizes a bandit algorithm to pick the best topics. Within the same best topic,
we pick documents with the best document quality scores.
-__init__
Args | |
---|---|
+`observation_space` + | ++Instance of a gym space corresponding to the +observation format. + | +
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
+`alg_ctor` + | ++A class of an MABAlgorithm for exploration, default to UCB1. + | +
+`ci_scaling` + | ++A floating number specifying the scaling of confidence bound. + | +
+`random_seed` + | ++An integer for random seed. + | +
+`**kwargs` + | ++currently unused arguments. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+begin_episode(
+ observation=None
+)
+
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
-#### Args:
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string for the directory where objects will be saved. + | +
+`iteration_number` + | ++An integer of iteration number to use for naming the +checkpoint file. + | +
Returns | |
---|---|
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation
)
-```
+
step
+step(
+ reward, observation
)
-```
+
Records the most recent transition and returns the agent's next action.
We store the observation of the last time step since we want to store it with
the reward.
-#### Args:
-
-* `reward`: Unused.
-* `observation`: A dictionary that includes the most recent
- observations and should have the following fields:
- - user: A dictionary representing user's observed state. Assumes
- observation['user']['sufficient_statics'] is a dictionary containing
- base agent impression counts and base agent click counts.
-
-#### Returns:
+
+
+ Args | |
---|---|
+`reward` + | ++Unused. + | +
+`observation` + | ++A dictionary that includes the most recent observations and +should have the following fields: +- user: A dictionary representing user's observed state. Assumes +observation['user']['sufficient_statics'] is a dictionary containing +base agent impression counts and base agent click counts. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
-#### Args:
-
-* `checkpoint_dir`: A string that represents the path to the checkpoint
- saved by tf.Save.
-* `iteration_number`: An integer that represents the checkpoint version
- and is used when restoring replay buffer.
-* `bundle_dict`: A dict containing additional Python objects owned by
- the agent. Each key is an object name and the value is the actual object.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint saved +by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
__init__
+recsim.agents.cluster_bandit_agent.GreedyClusterAgent(
+ observation_space, action_space, cluster_id, **kwargs
)
-```
-
-Initializes AbstractEpisodicRecommenderAgent.
-
+
-#### Args:
-
-
-* `action_space`: A gym.spaces object that specifies the format of actions.
-* `summary_writer`: A Tensorflow summary writer to pass to the agent
- for in-agent training statistics in Tensorboard.
+
+
+
+ Args | |
---|---|
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
+`summary_writer` + | ++A Tensorflow summary writer to pass to the agent +for in-agent training statistics in Tensorboard. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+begin_episode(
+ observation=None
+)
+
Returns the agent's first action for this episode.
+
-#### Args:
-
-
-* `observation`: numpy array, the environment's initial observation.
-
-
-#### Returns:
+ Args | |
---|---|
+`observation` + | ++numpy array, the environment's initial observation. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
Returns | |
---|---|
A dictionary containing additional Python objects to be checkpointed by - the experiment. Each key is a string for the object name and the value - is actual object. If the checkpoint directory does not exist, returns - empty dictionary. +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation=None
)
-```
+
Signals the end of the episode to the agent.
-
-#### Args:
-
-
-* `reward`: An float that is the last reward from the environment.
-* `observation`: numpy array that represents the last observation of the
- episode.
+
+
+ Args | |
---|---|
+`reward` + | ++An float that is the last reward from the environment. + | +
+`observation` + | ++numpy array that represents the last observation of the +episode. + | +
step
+step(
+ reward, observation
)
-```
+
Records the most recent transition and returns the agent's next action.
We store the observation of the last time step since we want to store it
with the reward.
-#### Args:
-
-
-* `reward`: The reward received from the agent's most recent action as a
- float.
-* `observation`: A dictionary that includes the most recent observations.
-
-
-#### Returns:
-
+
+
+ Args | |
---|---|
+`reward` + | ++The reward received from the agent's most recent action as a +float. + | +
+`observation` + | ++A dictionary that includes the most recent observations. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
__init__
+recsim.agents.dopamine.dqn_agent.DQNAgentRecSim(
+ sess, observation_space, num_actions, stack_size, optimizer_name, eval_mode,
**kwargs
)
-```
-
-Initializes the agent and constructs the components of its graph.
-
-#### Args:
-
-* `sess`: `tf.Session`, for executing ops.
-* `num_actions`: int, number of actions the agent can take at any
- state.
-* `observation_shape`: tuple of ints describing the observation shape.
-* `observation_dtype`: tf.DType, specifies the type of the
- observations. Note that if your inputs are continuous, you should set this
- to tf.float32.
-* `stack_size`: int, number of frames to use in state stack.
-* `network`: tf.Keras.Model, expecting 2 parameters: num_actions,
- network_type. A call to this object will return an instantiation of the
- network provided. The network returned can be run with different inputs to
- create different outputs. See
- dopamine.discrete_domains.atari_lib.NatureDQNNetwork as an example.
-* `gamma`: float, discount factor with the usual RL meaning.
-* `update_horizon`: int, horizon at which updates are performed, the
- 'n' in n-step update.
-* `min_replay_history`: int, number of transitions that should be
- experienced before the agent begins training its value function.
-* `update_period`: int, period between DQN updates.
-* `target_update_period`: int, update period for the target network.
-* `epsilon_fn`: function expecting 4 parameters: (decay_period, step,
- warmup_steps, epsilon). This function should return the epsilon value used
- for exploration during training.
-* `epsilon_train`: float, the value to which the agent's epsilon is
- eventually decayed during training.
-* `epsilon_eval`: float, epsilon used when evaluating the agent.
-* `epsilon_decay_period`: int, length of the epsilon decay schedule.
-* `tf_device`: str, Tensorflow device on which the agent's graph is
- executed.
-* `eval_mode`: bool, True for evaluation and False for training.
-* `use_staging`: bool, when True use a staging area to prefetch the
- next training batch, speeding training up by about 30%.
-* `max_tf_checkpoints_to_keep`: int, the number of TensorFlow
- checkpoints to keep.
-* `optimizer`: `tf.train.Optimizer`, for training the value function.
-* `summary_writer`: SummaryWriter object for outputting training
- statistics. Summary writing disabled if set to None.
-* `summary_writing_frequency`: int, frequency with which summaries will
- be written. Lower values will result in slower training.
-* `allow_partial_reload`: bool, whether we allow reloading a partial
- agent (for instance, only the network parameters).
+
+
+
+
+
+ Args | |
---|---|
+`sess` + | ++`tf.compat.v1.Session`, for executing ops. + | +
+`num_actions` + | ++int, number of actions the agent can take at any state. + | +
+`observation_shape` + | ++tuple of ints describing the observation shape. + | +
+`observation_dtype` + | ++tf.DType, specifies the type of the observations. Note +that if your inputs are continuous, you should set this to tf.float32. + | +
+`stack_size` + | ++int, number of frames to use in state stack. + | +
+`network` + | ++tf.Keras.Model, expecting 2 parameters: num_actions, +network_type. A call to this object will return an instantiation of the +network provided. The network returned can be run with different inputs +to create different outputs. See +dopamine.discrete_domains.atari_lib.NatureDQNNetwork as an example. + | +
+`gamma` + | ++float, discount factor with the usual RL meaning. + | +
+`update_horizon` + | ++int, horizon at which updates are performed, the 'n' in +n-step update. + | +
+`min_replay_history` + | ++int, number of transitions that should be experienced +before the agent begins training its value function. + | +
+`update_period` + | ++int, period between DQN updates. + | +
+`target_update_period` + | ++int, update period for the target network. + | +
+`epsilon_fn` + | ++function expecting 4 parameters: +(decay_period, step, warmup_steps, epsilon). This function should return +the epsilon value used for exploration during training. + | +
+`epsilon_train` + | ++float, the value to which the agent's epsilon is eventually +decayed during training. + | +
+`epsilon_eval` + | ++float, epsilon used when evaluating the agent. + | +
+`epsilon_decay_period` + | ++int, length of the epsilon decay schedule. + | +
+`tf_device` + | ++str, Tensorflow device on which the agent's graph is executed. + | +
+`eval_mode` + | ++bool, True for evaluation and False for training. + | +
+`use_staging` + | ++bool, when True use a staging area to prefetch the next +training batch, speeding training up by about 30%. + | +
+`max_tf_checkpoints_to_keep` + | ++int, the number of TensorFlow checkpoints to +keep. + | +
+`optimizer` + | ++`tf.compat.v1.train.Optimizer`, for training the value +function. + | +
+`summary_writer` + | ++SummaryWriter object for outputting training statistics. +Summary writing disabled if set to None. + | +
+`summary_writing_frequency` + | ++int, frequency with which summaries will be +written. Lower values will result in slower training. + | +
+`allow_partial_reload` + | ++bool, whether we allow reloading a partial agent +(for instance, only the network parameters). + | +
begin_episode
+begin_episode(
+ observation
+)
+
Returns the agent's first action for this episode.
-#### Args:
+
-* `observation`: numpy array, the environment's initial observation.
+ Args | |
---|---|
+`observation` + | ++numpy array, the environment's initial observation. + | +
Returns | |
---|---|
int, the selected action. + | +
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
@@ -122,66 +271,129 @@ This is used for checkpointing. It will return a dictionary containing all
non-TensorFlow objects (to be saved into a file by the caller), and it saves all
TensorFlow objects into a checkpoint file.
-#### Args:
-
-* `checkpoint_dir`: str, directory where TensorFlow objects will be
- saved.
-* `iteration_number`: int, iteration number to use for naming the
- checkpoint file.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++str, directory where TensorFlow objects will be saved. + | +
+`iteration_number` + | ++int, iteration number to use for naming the checkpoint +file. + | +
Returns | |
---|---|
A dict containing additional Python objects to be checkpointed by the experiment. If the checkpoint directory does not exist, returns None. + | +
end_episode
+end_episode(
+ reward
+)
+
Signals the end of the episode to the agent.
We store the observation of the current time step, which is the last observation
of the episode.
-#### Args:
+
+
+ Args | |
---|---|
+`reward` + | ++float, the last reward from the environment. + | +
step
+step(
+ reward, observation
)
-```
+
Records the most recent transition and returns the agent's next action.
We store the observation of the last time step since we want to store it with
the reward.
-#### Args:
-
-* `reward`: float, the reward received from the agent's most recent
- action.
-* `observation`: numpy array, the most recent observation.
+
+
+ Args | |
---|---|
+`reward` + | ++float, the reward received from the agent's most recent action. + | +
+`observation` + | ++numpy array, the most recent observation. + | +
Returns | |
---|---|
int, the selected action. + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dictionary
)
-```
+
Restores the agent from a checkpoint.
@@ -189,14 +401,47 @@ Restores the agent's Python objects to those specified in bundle_dictionary, and
restores the TensorFlow objects to those specified in the checkpoint_dir. If the
checkpoint_dir does not exist, will not reset the agent's state.
-#### Args:
-
-* `checkpoint_dir`: str, path to the checkpoint saved by tf.Save.
-* `iteration_number`: int, checkpoint version, used when restoring the
- replay buffer.
-* `bundle_dictionary`: dict, containing additional Python objects owned
- by the agent.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++str, path to the checkpoint saved by tf.Save. + | +
+`iteration_number` + | ++int, checkpoint version, used when restoring the replay +buffer. + | +
+`bundle_dictionary` + | ++dict, containing additional Python objects owned by +the agent. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
__new__
+recsim.agents.dopamine.dqn_agent.DQNNetworkType(
q_values
)
-```
-
-Create new instance of dqn_network(q_values,)
-
-## Properties
-
-q_values
-
-
+
+
+
+ Attributes | |
---|---|
`q_values` | + | +
__init__
+recsim.agents.dopamine.dqn_agent.ObservationAdapter(
+ input_observation_space, stack_size=1
)
-```
+
-Initialize self. See help(type(self)) for accurate signature.
-
-## Properties
+
+
-output_observation_space
Attributes | |
---|---|
+`output_observation_space` + | +The output observation space of the adapter. + | +
+encode(
+ observation
+)
+
Encode user observation and document observations to an image.
diff --git a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/ResponseAdapter.md b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/ResponseAdapter.md
index 3a4c25a..bddbfef 100644
--- a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/ResponseAdapter.md
+++ b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/ResponseAdapter.md
@@ -1,55 +1,65 @@
__init__
+recsim.agents.dopamine.dqn_agent.ResponseAdapter(
+ input_response_space
+)
+
-```python
-__init__(input_response_space)
-```
+
-Init function for ResponseAdapter.
+
+
+ Args | |
---|---|
+`input_response_space` + | ++this is assumed to be an instance of +gym.spaces.Tuple; each element of the tuple is has to be an instance +of gym.spaces.Dict consisting of feature_name: 0-d gym.spaces.Box +(single float) key-value pairs. + | +
Attributes | |
---|---|
`response_dtype` |
-
+ |
`response_names` |
-
+ |
`response_shape` |
-
+ |
+
+encode(
+ responses
+)
+
diff --git a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/recsim_dqn_network.md b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/recsim_dqn_network.md
index 8703e62..00a59e9 100644
--- a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/recsim_dqn_network.md
+++ b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/recsim_dqn_network.md
@@ -5,22 +5,19 @@
# recsim.agents.dopamine.dqn_agent.recsim_dqn_network
-
+
+recsim.agents.dopamine.dqn_agent.recsim_dqn_network(
+ user, doc, scope
)
-```
+
diff --git a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/wrapped_replay_buffer.md b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/wrapped_replay_buffer.md
index b988380..592c297 100644
--- a/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/wrapped_replay_buffer.md
+++ b/docs/api_docs/python/recsim/agents/dopamine/dqn_agent/wrapped_replay_buffer.md
@@ -5,18 +5,19 @@
# recsim.agents.dopamine.dqn_agent.wrapped_replay_buffer
-
+
+recsim.agents.dopamine.dqn_agent.wrapped_replay_buffer(
+ **kwargs
+)
+
diff --git a/docs/api_docs/python/recsim/agents/full_slate_q_agent.md b/docs/api_docs/python/recsim/agents/full_slate_q_agent.md
index ecbfd34..ea7a1f8 100644
--- a/docs/api_docs/python/recsim/agents/full_slate_q_agent.md
+++ b/docs/api_docs/python/recsim/agents/full_slate_q_agent.md
@@ -5,7 +5,10 @@
# Module: recsim.agents.full_slate_q_agent
+
+
__init__
+recsim.agents.full_slate_q_agent.FullSlateQAgent(
+ sess, observation_space, action_space, optimizer_name='', eval_mode=False,
**kwargs
)
-```
+
-Initializes a FullSlateQAgent.
+
-#### Args:
+This is a standard, nondecomposed Q-learning method that treats each slate
+atomically (i.e., holistically) as a single action.
-* `sess`: a Tensorflow session.
-* `observation_space`: A gym.spaces object that specifies the format of
- observations.
-* `action_space`: A gym.spaces object that specifies the format of
- actions.
-* `optimizer_name`: The name of the optimizer.
-* `eval_mode`: A bool for whether the agent is in training or
- evaluation mode.
-* `**kwargs`: Keyword arguments to the DQNAgent.
+
+
+ Args | |
---|---|
+`sess` + | ++a Tensorflow session. + | +
+`observation_space` + | ++A gym.spaces object that specifies the format of +observations. + | +
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
+`optimizer_name` + | ++The name of the optimizer. + | +
+`eval_mode` + | ++A bool for whether the agent is in training or evaluation mode. + | +
+`**kwargs` + | ++Keyword arguments to the DQNAgent. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+begin_episode(
+ observation
+)
+
Returns the agent's first action for this episode.
-#### Args:
+
+
+ Args | |
---|---|
+`observation` + | ++numpy array, the environment's initial observation. + | +
Returns | |
---|---|
+An integer array of size _slate_size, the selected slated, each +element of which is an index in the list of doc_obs. + | +
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
@@ -100,79 +165,147 @@ This is used for checkpointing. It will return a dictionary containing all
non-TensorFlow objects (to be saved into a file by the caller), and it saves all
TensorFlow objects into a checkpoint file.
-#### Args:
-
-* `checkpoint_dir`: str, directory where TensorFlow objects will be
- saved.
-* `iteration_number`: int, iteration number to use for naming the
- checkpoint file.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++str, directory where TensorFlow objects will be saved. + | +
+`iteration_number` + | ++int, iteration number to use for naming the checkpoint +file. + | +
Returns | |
---|---|
A dict containing additional Python objects to be checkpointed by the experiment. If the checkpoint directory does not exist, returns None. + | +
end_episode
+end_episode(
+ reward, observation
)
-```
+
Signals the end of the episode to the agent.
We store the observation of the current time step, which is the last observation
of the episode.
-#### Args:
-
-* `reward`: float, the last reward from the environment.
-* `observation`: numpy array, the environment's initial observation.
+
+
+ Args | |
---|---|
+`reward` + | ++float, the last reward from the environment. + | +
+`observation` + | ++numpy array, the environment's initial observation. + | +
step
+step(
+ reward, observation
)
-```
+
Receives observations of environment and returns a slate.
-#### Args:
-
-* `reward`: A double representing the overall reward to the recommended
- slate.
-* `observation`: A dictionary that stores all the observations
- including:
- - user: A list of floats representing the user's observed state
- - doc: A list of observations of document features
- - response: A vector valued response signal that represent user's response
- to each document
-
-#### Returns:
+
+
+ Args | |
---|---|
+`reward` + | ++A double representing the overall reward to the recommended slate. + | +
+`observation` + | ++A dictionary that stores all the observations including: +- user: A list of floats representing the user's observed state +- doc: A list of observations of document features +- response: A vector valued response signal that represent user's +response to each document + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index in the list of document observvations. + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dictionary
)
-```
+
Restores the agent from a checkpoint.
@@ -180,14 +313,47 @@ Restores the agent's Python objects to those specified in bundle_dictionary, and
restores the TensorFlow objects to those specified in the checkpoint_dir. If the
checkpoint_dir does not exist, will not reset the agent's state.
-#### Args:
-
-* `checkpoint_dir`: str, path to the checkpoint saved by tf.Save.
-* `iteration_number`: int, checkpoint version, used when restoring the
- replay buffer.
-* `bundle_dictionary`: dict, containing additional Python objects owned
- by the agent.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++str, path to the checkpoint saved by tf.Save. + | +
+`iteration_number` + | ++int, checkpoint version, used when restoring the replay +buffer. + | +
+`bundle_dictionary` + | ++dict, containing additional Python objects owned by +the agent. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
+recsim.agents.greedy_pctr_agent.GreedyPCTRAgent(
+ action_space, belief_state,
+ choice_model=cm.MultinomialLogitChoiceModel({'no_click_mass': 5})
+)
+
+
This agent assumes knowledge of the true underlying choice model. Note that this
@@ -36,36 +40,52 @@ implicitly means it receives observations of the true user and document states.
This agent myopically creates slates with items that have the highest
probability of being clicked under the given choice model.
-__init__
Args | |
---|---|
+`action_space` + | ++A gym.spaces object that specifies the format of actions + | +
+`belief_state` + | ++An instantiation of AbstractUserState assumed by the agent + | +
+`choice_model` + | ++An instantiation of AbstractChoiceModel assumed by the agent +Default to a multinomial logit choice model with no_click_mass = 5. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+begin_episode(
+ observation=None
+)
+
Returns the agent's first action for this episode.
-#### Args:
+
-* `observation`: numpy array, the environment's initial observation.
+ Args | |
---|---|
+`observation` + | ++numpy array, the environment's initial observation. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
-#### Args:
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
Returns | |
---|---|
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation=None
)
-```
+
Signals the end of the episode to the agent.
-#### Args:
-
-* `reward`: An float that is the last reward from the environment.
-* `observation`: numpy array that represents the last observation of
- the episode.
+
+
+ Args | |
---|---|
+`reward` + | ++An float that is the last reward from the environment. + | +
+`observation` + | ++numpy array that represents the last observation of the +episode. + | +
findBestDocuments
+findBestDocuments(
+ scores
+)
+
Returns the indices of the highest scores in sorted order.
-#### Args:
+
-* `scores`: A list of floats representing unnormalized document scores
+ Args | |
---|---|
+`scores` + | ++A list of floats representing unnormalized document scores + | +
Returns | |
---|---|
+`sorted_indices` + | ++A list of integers indexing the highest scores, in sorted +order + | +
step
+step(
+ reward, observation
)
-```
+
Records the most recent transition and returns the agent's next action.
We store the observation of the last time step since we want to store it with
the reward.
-#### Args:
-
-* `reward`: Unused.
-* `observation`: A dictionary that includes the most recent
- observations and should have the following fields:
- - user: A list of floats representing the user's observed state
- - doc: A list of observations of document features
-
-#### Returns:
+
+
+ Args | |
---|---|
+`reward` + | ++Unused. + | +
+`observation` + | ++A dictionary that includes the most recent observations and +should have the following fields: +- user: A list of floats representing the user's observed state +- doc: A list of observations of document features + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
-#### Args:
-
-* `checkpoint_dir`: A string that represents the path to the checkpoint
- and is used when we save TensorFlow objects by tf.Save.
-* `iteration_number`: An integer that represents the checkpoint version
- and is used when restoring replay buffer.
-* `bundle_dict`: A dict containing additional Python objects owned by
- the agent. Each key is an object name and the value is the actual object.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
+recsim.agents.layers.abstract_click_bandit.AbstractClickBanditLayer(
+ observation_space, action_space, arm_base_agent_ctors,
+ alg_ctor=recsim.agents.bandits.algorithms.UCB1, ci_scaling=1.0, random_seed=0,
+ **kwargs
+)
+
+
This layer consumes a list of base agents with apriori unknown mean payoffs
@@ -40,36 +45,81 @@ confidence bound as index, the AbstractClickBandit will put the partial slate
of the highest-UCB base agent in first place, then the second, until the slate
is complete.
-__init__
Args | |
---|---|
+`observation_space` + | ++Instance of a gym space corresponding to the +observation format. + | +
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
+`arm_base_agent_ctors` + | ++a list of agent constructors, each agent corresponds +to a bandit arm. + | +
+`alg_ctor` + | ++A class of an MABAlgorithm for exploration, default to UCB1. + | +
+`ci_scaling` + | ++A floating number specifying the scaling of confidence bound. + | +
+`random_seed` + | ++An integer for random seed. + | +
+`**kwargs` + | ++arguments for base agents. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+begin_episode(
+ observation=None
+)
+
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string for the directory where objects will be saved. + | +
+`iteration_number` + | ++An integer of iteration number to use for naming the +checkpoint file. + | +
Returns | |
---|---|
A dictionary containing additional Python objects to be checkpointed by - the experiment. Each key is a string for the object name and the value - is actual object. If the checkpoint directory does not exist, returns - empty dictionary. +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation
)
-```
-
-
-
+
step
+step(
+ reward, observation
)
-```
+
Records the most recent transition and returns the agent's next action.
We store the observation of the last time step since we want to store it
with the reward.
-#### Args:
-
-
-* `reward`: Unused.
-* `observation`: A dictionary that includes the most recent observations and
- should have the following fields:
- - user: A dictionary representing user's observed state. Assumes
- observation['user']['sufficient_statics'] is a dictionary containing
- base agent impression counts and base agent click counts.
-
-
-#### Returns:
-
+
+
+ Args | |
---|---|
+`reward` + | ++Unused. + | +
+`observation` + | ++A dictionary that includes the most recent observations and +should have the following fields: +- user: A dictionary representing user's observed state. Assumes +observation['user']['sufficient_statics'] is a dictionary containing +base agent impression counts and base agent click counts. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint saved +by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
+recsim.agents.layers.cluster_click_statistics.ClusterClickStatsLayer(
+ base_agent_ctor, observation_space, action_space, **kwargs
+)
+
+
This module assumes each document belongs to single cluster and we know the
@@ -36,39 +38,56 @@ number of possible clusters. Every time we increase impression count for a
cluster if the agent recommends a document from that cluster. We also increase
click count for a cluster if user responds a click.
-__init__
Args | |
---|---|
+`base_agent_ctor` + | ++a constructor for the base agent. + | +
+`observation_space` + | ++a gym.spaces object specifying the format of +observations. + | +
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
+`**kwargs` + | ++arguments to pass to the downstream agent at construction time. + | +
multi_user
Attributes | |
---|---|
`multi_user` | Returns boolean indicating whether this agent +serves multiple users. |
`observation_space` |
-
+ |
+
+begin_episode(
+ observation=None
+)
+
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
-#### Args:
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string for the directory where objects will be saved. + | +
+`iteration_number` + | ++An integer of iteration number to use for naming the +checkpoint file. + | +
Returns | |
---|---|
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation
)
-```
+
step
+step(
+ reward, observation
)
-```
+
Records the most recent transition and returns the agent's next action.
We store the observation of the last time step since we want to store it with
the reward.
-#### Args:
-
-* `reward`: The reward received from the agent's most recent action as
- a float.
-* `observation`: A dictionary that includes the most recent
- observations.
-
-#### Returns:
+
+
+ Args | |
---|---|
+`reward` + | ++The reward received from the agent's most recent action as a +float. + | +
+`observation` + | ++A dictionary that includes the most recent observations. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
-#### Args:
-
-* `checkpoint_dir`: A string that represents the path to the checkpoint
- saved by tf.Save.
-* `iteration_number`: An integer that represents the checkpoint version
- and is used when restoring replay buffer.
-* `bundle_dict`: A dict containing additional Python objects owned by
- the agent. Each key is an object name and the value is the actual object.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint saved +by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
+recsim.agents.layers.fixed_length_history.FixedLengthHistoryLayer(
+ base_agent_ctor, observation_space, action_space, history_length,
+ remember_user=True, remember_response=True, remember_doc=False, **kwargs
+)
+
+
This module introduces sufficient statistics in the form of a buffer holding the
@@ -39,51 +42,87 @@ are not enough observations to fill the buffer, so they will be filled with
None. Each non-vacuous element of the tuple is an instance of (a subset of)
observation_space.
-__init__
Args | |
---|---|
+`base_agent_ctor` + | ++a constructor for the base agent. + | +
+`observation_space` + | ++a gym.spaces object specifying the format of +observations. + | +
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
+`history_length` + | ++positive integer number of observations to remember. + | +
+`remember_user` + | ++boolean, indicates whether to track +observation_space[\'user\']. + | +
+`remember_response` + | ++boolean, indicates whether to track +observation_space[\'response\']. + | +
+`remember_doc` + | ++boolean, indicates whether to track +observation_space[\'doc\']. + | +
+`**kwargs` + | ++arguments to pass to the downstream agent at construction time. + | +
multi_user
Attributes | |
---|---|
`multi_user` | Returns boolean indicating whether this agent +serves multiple users. |
`observation_space` |
-
+ |
+
+begin_episode(
+ observation=None
+)
+
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
-#### Args:
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string for the directory where objects will be saved. + | +
+`iteration_number` + | ++An integer of iteration number to use for naming the +checkpoint file. + | +
Returns | |
---|---|
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation
)
-```
+
step
+step(
+ reward, observation
)
-```
+
Records the most recent transition and returns the agent's next action.
We store the observation of the last time step since we want to store it with
the reward.
-#### Args:
-
-* `reward`: The reward received from the agent's most recent action as
- a float.
-* `observation`: A dictionary that includes the most recent
- observations.
-
-#### Returns:
+
+
+ Args | |
---|---|
+`reward` + | ++The reward received from the agent's most recent action as a +float. + | +
+`observation` + | ++A dictionary that includes the most recent observations. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
-#### Args:
-
-* `checkpoint_dir`: A string that represents the path to the checkpoint
- saved by tf.Save.
-* `iteration_number`: An integer that represents the checkpoint version
- and is used when restoring replay buffer.
-* `bundle_dict`: A dict containing additional Python objects owned by
- the agent. Each key is an object name and the value is the actual object.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint saved +by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
+recsim.agents.layers.sufficient_statistics.SufficientStatisticsLayer(
+ base_agent_ctor, observation_space, action_space, sufficient_statistics_space,
+ **kwargs
+)
+
+
This module assumes each document belongs to single cluster and we know the
@@ -36,42 +39,64 @@ number of possible clusters. Every time we increase impression count for a
cluster if the agent recommends a document from that cluster. We also increase
click count for a cluster if user responds a click.
-__init__
Args | |
---|---|
+`base_agent_ctor` + | ++a constructor for the base agent. + | +
+`observation_space` + | ++a gym.spaces object specifying the format of +observations. + | +
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
+`sufficient_statistics_space` + | ++a gym.spaces object specifying the format of +the created sufficient statistics. + | +
+`**kwargs` + | ++arguments to pass to the downstream agent at construction time. + | +
multi_user
Attributes | |
---|---|
`multi_user` | Returns boolean indicating whether this agent +serves multiple users. |
`observation_space` |
-
+ |
+
+begin_episode(
+ observation=None
+)
+
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
-#### Args:
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string for the directory where objects will be saved. + | +
+`iteration_number` + | ++An integer of iteration number to use for naming the +checkpoint file. + | +
Returns | |
---|---|
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation
)
-```
+
step
+step(
+ reward, observation
)
-```
+
Records the most recent transition and returns the agent's next action.
We store the observation of the last time step since we want to store it with
the reward.
-#### Args:
-
-* `reward`: The reward received from the agent's most recent action as
- a float.
-* `observation`: A dictionary that includes the most recent
- observations.
-
-#### Returns:
+
+
+ Args | |
---|---|
+`reward` + | ++The reward received from the agent's most recent action as a +float. + | +
+`observation` + | ++A dictionary that includes the most recent observations. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
-#### Args:
-
-* `checkpoint_dir`: A string that represents the path to the checkpoint
- saved by tf.Save.
-* `iteration_number`: An integer that represents the checkpoint version
- and is used when restoring replay buffer.
-* `bundle_dict`: A dict containing additional Python objects owned by
- the agent. Each key is an object name and the value is the actual object.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint saved +by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
+recsim.agents.layers.temporal_aggregation.TemporalAggregationLayer(
+ base_agent_ctor, observation_space, action_space, gamma=0.0,
+ aggregation_period=1, switching_cost=1.0, document_comparison_fcn=None, **kwargs
+)
+
+
A reinforcement learning agent that implements learns a temporally aggregated
@@ -48,49 +52,92 @@ becomes non-Markovian.
The two methods are not mutually exclusive and may be used in conjunction by
specifying a non-unit aggregation_period and a non-zero switching_cost.
-__init__
Args | |
---|---|
+`base_agent_ctor` + | ++a constructor for the base agent. + | +
+`observation_space` + | ++a gym.spaces object specifying the format of +observations. + | +
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
+`gamma` + | ++geometric discounting factor between [0, 1) for the event-level +objective. + | +
+`aggregation_period` + | ++number of time steps to hold an action fixed. + | +
+`switching_cost` + | ++a non-negative penalty for switching an action. + | +
+`document_comparison_fcn` + | ++a function taking two document observations and +returning a Boolean value that indicates if they are considered +equivalent. This is useful for making decisions at a higher abstraction +level (e.g. comparing only document topics). If not provided, this will +default to direct observation equality. + | +
+`**kwargs` + | ++base_agent initialization args. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+begin_episode(
+ observation=None
+)
+
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
-#### Args:
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string for the directory where objects will be saved. + | +
+`iteration_number` + | ++An integer of iteration number to use for naming the +checkpoint file. + | +
Returns | |
---|---|
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation
)
-```
+
step
+step(
+ reward, observation
)
-```
+
Preprocesses the reward and observation and calls base agent.
-#### Args:
-
-* `reward`: The reward received from the agent's most recent action as
- a float.
-* `observation`: A dictionary that includes the most recent
- observations and should have the following fields:
- - user: A NumPy array representing user's observed state. Assumes it is a
- concatenation of topic pull counts and topic click counts.
- - doc: A NumPy array representing observations of document features.
- Assumes it is a concatenation of one-hot encoding of topic_id and
- document quality.
-
-#### Returns:
-
-* `slate`: An integer array of size _slate_size, where each element is
- an index into the list of doc_obs.
+
+
+ Args | |
---|---|
+`reward` + | ++The reward received from the agent's most recent action as a +float. + | +
+`observation` + | ++A dictionary that includes the most recent observations and +should have the following fields: +- user: A NumPy array representing user's observed state. Assumes it is +a concatenation of topic pull counts and topic click counts. +- doc: A NumPy array representing observations of document features. +Assumes it is a concatenation of one-hot encoding of topic_id and +document quality. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs. + | +
Raises | |
---|---|
+`RuntimeError` + | ++if the agent has to hold a slate with given features fixed +for k steps but the documents needed to reconstruct that slate +become unavailable. + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
-#### Args:
-
-* `checkpoint_dir`: A string that represents the path to the checkpoint
- saved by tf.Save.
-* `iteration_number`: An integer that represents the checkpoint version
- and is used when restoring replay buffer.
-* `bundle_dict`: A dict containing additional Python objects owned by
- the agent. Each key is an object name and the value is the actual object.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint saved +by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
__init__
+recsim.agents.random_agent.RandomAgent(
+ action_space, random_seed=0
)
-```
+
-Initializes AbstractEpisodicRecommenderAgent.
-
-#### Args:
+
-* `action_space`: A gym.spaces object that specifies the format of
- actions.
-* `summary_writer`: A Tensorflow summary writer to pass to the agent
- for in-agent training statistics in Tensorboard.
+
+
+ Args | |
---|---|
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
+`summary_writer` + | ++A Tensorflow summary writer to pass to the agent +for in-agent training statistics in Tensorboard. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+begin_episode(
+ observation=None
+)
+
Returns the agent's first action for this episode.
-#### Args:
+
-* `observation`: numpy array, the environment's initial observation.
+ Args | |
---|---|
+`observation` + | ++numpy array, the environment's initial observation. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
-#### Args:
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
Returns | |
---|---|
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation=None
)
-```
+
Signals the end of the episode to the agent.
-#### Args:
-
-* `reward`: An float that is the last reward from the environment.
-* `observation`: numpy array that represents the last observation of
- the episode.
+
+
+ Args | |
---|---|
+`reward` + | ++An float that is the last reward from the environment. + | +
+`observation` + | ++numpy array that represents the last observation of the +episode. + | +
step
+step(
+ reward, observation
)
-```
+
Records the most recent transition and returns the agent's next action.
We store the observation of the last time step since we want to store it with
the reward.
-#### Args:
-
-* `reward`: Unused.
-* `observation`: A dictionary that includes the most recent
- observation. Should include 'doc' field that includes observation of all
- candidates.
-
-#### Returns:
+
+
+ Args | |
---|---|
+`reward` + | ++Unused. + | +
+`observation` + | ++A dictionary that includes the most recent observation. +Should include 'doc' field that includes observation of all candidates. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
-#### Args:
-
-* `checkpoint_dir`: A string that represents the path to the checkpoint
- and is used when we save TensorFlow objects by tf.Save.
-* `iteration_number`: An integer that represents the checkpoint version
- and is used when restoring replay buffer.
-* `bundle_dict`: A dict containing additional Python objects owned by
- the agent. Each key is an object name and the value is the actual object.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint and is +used when we save TensorFlow objects by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
__init__
+recsim.agents.slate_decomp_q_agent.SlateDecompQAgent(
+ sess, observation_space, action_space, optimizer_name='', select_slate_fn=None,
+ compute_target_fn=None, stack_size=1, eval_mode=False, **kwargs
)
-```
-
-Initializes SlateDecompQAgent.
+
-#### Args:
+
-* `sess`: a Tensorflow session.
-* `observation_space`: A gym.spaces object that specifies the format of
- observations.
-* `action_space`: A gym.spaces object that specifies the format of
- actions.
-* `optimizer_name`: The name of the optimizer.
-* `select_slate_fn`: A function that selects the slate.
-* `compute_target_fn`: A function that omputes the target q value.
-* `stack_size`: The stack size for the replay buffer.
-* `eval_mode`: A bool for whether the agent is in training or
- evaluation mode.
-* `**kwargs`: Keyword arguments to the DQNAgent.
+
+
+ Args | |
---|---|
+`sess` + | ++a Tensorflow session. + | +
+`observation_space` + | ++A gym.spaces object that specifies the format of +observations. + | +
+`action_space` + | ++A gym.spaces object that specifies the format of actions. + | +
+`optimizer_name` + | ++The name of the optimizer. + | +
+`select_slate_fn` + | ++A function that selects the slate. + | +
+`compute_target_fn` + | ++A function that omputes the target q value. + | +
+`stack_size` + | ++The stack size for the replay buffer. + | +
+`eval_mode` + | ++A bool for whether the agent is in training or evaluation mode. + | +
+`**kwargs` + | ++Keyword arguments to the DQNAgent. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+begin_episode(
+ observation
+)
+
Returns the agent's first action for this episode.
-#### Args:
+
+
+ Args | |
---|---|
+`observation` + | ++numpy array, the environment's initial observation. + | +
Returns | |
---|---|
+An integer array of size _slate_size, the selected slated, each +element of which is an index in the list of doc_obs. + | +
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
@@ -100,76 +183,143 @@ This is used for checkpointing. It will return a dictionary containing all
non-TensorFlow objects (to be saved into a file by the caller), and it saves all
TensorFlow objects into a checkpoint file.
-#### Args:
-
-* `checkpoint_dir`: str, directory where TensorFlow objects will be
- saved.
-* `iteration_number`: int, iteration number to use for naming the
- checkpoint file.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++str, directory where TensorFlow objects will be saved. + | +
+`iteration_number` + | ++int, iteration number to use for naming the checkpoint +file. + | +
Returns | |
---|---|
A dict containing additional Python objects to be checkpointed by the experiment. If the checkpoint directory does not exist, returns None. + | +
end_episode
+end_episode(
+ reward, observation
)
-```
+
Signals the end of the episode to the agent.
We store the observation of the current time step, which is the last observation
of the episode.
-#### Args:
-
-* `reward`: float, the last reward from the environment.
-* `observation`: numpy array, the environment's initial observation.
+
+
+ Args | |
---|---|
+`reward` + | ++float, the last reward from the environment. + | +
+`observation` + | ++numpy array, the environment's initial observation. + | +
step
+step(
+ reward, observation
)
-```
+
Records the transition and returns the agent's next action.
It uses document-level user response instead of overral reward as the reward of
the problem.
-#### Args:
-
-* `reward`: unused.
-* `observation`: a space.Dict that includes observation of the user
- state observation, documents and user responses.
+
+
+ Args | |
---|---|
+`reward` + | ++unused. + | +
+`observation` + | ++a space.Dict that includes observation of the user state +observation, documents and user responses. + | +
Returns | |
---|---|
Array, the selected action. + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dictionary
)
-```
+
Restores the agent from a checkpoint.
@@ -177,14 +327,47 @@ Restores the agent's Python objects to those specified in bundle_dictionary, and
restores the TensorFlow objects to those specified in the checkpoint_dir. If the
checkpoint_dir does not exist, will not reset the agent's state.
-#### Args:
-
-* `checkpoint_dir`: str, path to the checkpoint saved by tf.Save.
-* `iteration_number`: int, checkpoint version, used when restoring the
- replay buffer.
-* `bundle_dictionary`: dict, containing additional Python objects owned
- by the agent.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++str, path to the checkpoint saved by tf.Save. + | +
+`iteration_number` + | ++int, checkpoint version, used when restoring the replay +buffer. + | +
+`bundle_dictionary` + | ++dict, containing additional Python objects owned by +the agent. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
+recsim.agents.slate_decomp_q_agent.compute_probs_tf(
+ slate, scores_tf, score_no_click_tf
)
-```
+
This assumes scores are normalizable, e.g., scores cannot be negative.
-#### Args:
+
+
+ Args | |
---|---|
+`slate` + | ++a list of integers that represents the video slate. + | +
+`scores_tf` + | ++a float tensor that stores the scores of all documents. + | +
+`score_no_click_tf` + | ++a float tensor that represents the score for the action +of picking no document. + | +
Returns | |
---|---|
+A float tensor that represents the probabilities of selecting each document +in the slate. + | +
+recsim.agents.slate_decomp_q_agent.compute_target_greedy_q(
+ reward, gamma, next_actions, next_q_values, next_states, terminals
)
-```
+
This algorithm corresponds to the method "GT" in Ie et al.
https://arxiv.org/abs/1905.12767..
-#### Args:
+
+
+ Args | |
---|---|
+`reward` + | ++[batch_size] tensor, the immediate reward. + | +
+`gamma` + | ++float, discount factor with the usual RL meaning. + | +
+`next_actions` + | ++[batch_size, slate_size] tensor, the next slate. + | +
+`next_q_values` + | ++[batch_size, num_of_documents] tensor, the q values of the +documents in the next step. + | +
+`next_states` + | ++[batch_size, 1 + num_of_documents] tensor, the features for the +user and the docuemnts in the next step. + | +
+`terminals` + | ++[batch_size] tensor, indicating if this is a terminal step. + | +
Returns | |
---|---|
[batch_size] tensor, the target q values. + | +
+recsim.agents.slate_decomp_q_agent.compute_target_optimal_q(
+ reward, gamma, next_actions, next_q_values, next_states, terminals
)
-```
+
This algorithm corresponds to the method "OT" in Ie et al.
https://arxiv.org/abs/1905.12767..
-#### Args:
+
+
+ Args | |
---|---|
+`reward` + | ++[batch_size] tensor, the immediate reward. + | +
+`gamma` + | ++float, discount factor with the usual RL meaning. + | +
+`next_actions` + | ++[batch_size, slate_size] tensor, the next slate. + | +
+`next_q_values` + | ++[batch_size, num_of_documents] tensor, the q values of the +documents in the next step. + | +
+`next_states` + | ++[batch_size, 1 + num_of_documents] tensor, the features for the +user and the docuemnts in the next step. + | +
+`terminals` + | ++[batch_size] tensor, indicating if this is a terminal step. + | +
Returns | |
---|---|
[batch_size] tensor, the target q values. + | +
+recsim.agents.slate_decomp_q_agent.compute_target_sarsa(
+ reward, gamma, next_actions, next_q_values, next_states, terminals
)
-```
+
-#### Args:
+
+
+ Args | |
---|---|
+`reward` + | ++[batch_size] tensor, the immediate reward. + | +
+`gamma` + | ++float, discount factor with the usual RL meaning. + | +
+`next_actions` + | ++[batch_size, slate_size] tensor, the next slate. + | +
+`next_q_values` + | ++[batch_size, num_of_documents] tensor, the q values of the +documents in the next step. + | +
+`next_states` + | ++[batch_size, 1 + num_of_documents] tensor, the features for the +user and the docuemnts in the next step. + | +
+`terminals` + | ++[batch_size] tensor, indicating if this is a terminal step. + | +
Returns | |
---|---|
[batch_size] tensor, the target q values. + | +
+recsim.agents.slate_decomp_q_agent.compute_target_topk_q(
+ reward, gamma, next_actions, next_q_values, next_states, terminals
)
-```
+
This algorithm corresponds to the method "TT" in Ie et al.
https://arxiv.org/abs/1905.12767.
-#### Args:
+
+
+ Args | |
---|---|
+`reward` + | ++[batch_size] tensor, the immediate reward. + | +
+`gamma` + | ++float, discount factor with the usual RL meaning. + | +
+`next_actions` + | ++[batch_size, slate_size] tensor, the next slate. + | +
+`next_q_values` + | ++[batch_size, num_of_documents] tensor, the q values of the +documents in the next step. + | +
+`next_states` + | ++[batch_size, 1 + num_of_documents] tensor, the features for the +user and the docuemnts in the next step. + | +
+`terminals` + | ++[batch_size] tensor, indicating if this is a terminal step. + | +
Returns | |
---|---|
[batch_size] tensor, the target q values. + | +
+recsim.agents.slate_decomp_q_agent.create_agent(
+ agent_name, sess, **kwargs
)
-```
+
diff --git a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/score_documents.md b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/score_documents.md
index 919e770..62dc055 100644
--- a/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/score_documents.md
+++ b/docs/api_docs/python/recsim/agents/slate_decomp_q_agent/score_documents.md
@@ -5,44 +5,84 @@
# recsim.agents.slate_decomp_q_agent.score_documents
-
+
+recsim.agents.slate_decomp_q_agent.score_documents(
+ user_obs, doc_obs, no_click_mass=1.0, is_mnl=False, min_normalizer=-1.0
)
-```
+
Similar to score_documents_tf but works on NumPy objects.
-#### Args:
+
+
+ Args | |
---|---|
+`user_obs` + | ++An instance of AbstractUserState. + | +
+`doc_obs` + | ++A numpy array that represents the observation of all documents in +the candidate set. + | +
+`no_click_mass` + | ++a float indicating the mass given to a no click option + | +
+`is_mnl` + | ++whether to use a multinomial logit model instead of a multinomial +proportional model. + | +
+`min_normalizer` + | ++A float (<= 0) used to offset the scores to be positive when +using multinomial proportional model. + | +
Returns | |
---|---|
+A float array that stores unnormalzied scores of documents and a float +number that represents the score for the action of picking no document. + | +
+recsim.agents.slate_decomp_q_agent.score_documents_tf(
+ user_obs, doc_obs, no_click_mass=1.0, is_mnl=False, min_normalizer=-1.0
)
-```
+
@@ -32,19 +28,63 @@ This implements both multinomial proportional model and multinormial logit model
given some parameters. We also assume scores are based on inner products of
user_obs and doc_obs.
-#### Args:
+
+
+ Args | |
---|---|
+`user_obs` + | ++An instance of AbstractUserState. + | +
+`doc_obs` + | ++A numpy array that represents the observation of all documents in +the candidate set. + | +
+`no_click_mass` + | ++a float indicating the mass given to a no click option + | +
+`is_mnl` + | ++whether to use a multinomial logit model instead of a multinomial +proportional model. + | +
+`min_normalizer` + | ++A float (<= 0) used to offset the scores to be positive when +using multinomial proportional model. + | +
Returns | |
---|---|
+A float tensor that stores unnormalzied scores of documents and a float +tensor that represents the score for the action of picking no document. + | +
+recsim.agents.slate_decomp_q_agent.select_slate_greedy(
+ slate_size, s_no_click, s, q
)
-```
+
This algorithm corresponds to the method "GS" in Ie et al.
https://arxiv.org/abs/1905.12767.
-#### Args:
+
+
+ Args | |
---|---|
+`slate_size` + | ++int, the size of the recommendation slate. + | +
+`s_no_click` + | ++float tensor, the score for not clicking any document. + | +
+`s` + | ++[num_of_documents] tensor, the scores for clicking documents. + | +
+`q` + | ++[num_of_documents] tensor, the predicted q values for documents. + | +
Returns | |
---|---|
[slate_size] tensor, the selected slate. + | +
+recsim.agents.slate_decomp_q_agent.select_slate_optimal(
+ slate_size, s_no_click, s, q
)
-```
+
This algorithm corresponds to the method "OS" in Ie et al.
https://arxiv.org/abs/1905.12767.
-#### Args:
+
+
+ Args | |
---|---|
+`slate_size` + | ++int, the size of the recommendation slate. + | +
+`s_no_click` + | ++float tensor, the score for not clicking any document. + | +
+`s` + | ++[num_of_documents] tensor, the scores for clicking documents. + | +
+`q` + | ++[num_of_documents] tensor, the predicted q values for documents. + | +
Returns | |
---|---|
[slate_size] tensor, the selected slate. + | +
+recsim.agents.slate_decomp_q_agent.select_slate_topk(
+ slate_size, s_no_click, s, q
)
-```
+
This algorithm corresponds to the method "TS" in Ie et al.
https://arxiv.org/abs/1905.12767.
-#### Args:
+
+
+ Args | |
---|---|
+`slate_size` + | ++int, the size of the recommendation slate. + | +
+`s_no_click` + | ++float tensor, the score for not clicking any document. + | +
+`s` + | ++[num_of_documents] tensor, the scores for clicking documents. + | +
+`q` + | ++[num_of_documents] tensor, the predicted q values for documents. + | +
Returns | |
---|---|
[slate_size] tensor, the selected slate. + | +
+recsim.agents.tabular_q_agent.TabularQAgent(
+ observation_space, action_space, eval_mode=False, ignore_response=True,
+ discretization_bounds=(0.0, 10.0), number_bins=100,
+ exploration_policy='epsilon_greedy', exploration_temperature=0.99,
+ learning_rate=0.1, gamma=0.99, ordinal_slates=False, **kwargs
+)
+
+
This agent provides a tabular implementation of the Q-learning algorithm. To
@@ -46,63 +52,130 @@ Q-function. Producing ground truth Q-functions is the main intended use of this
agent, since discretization is prohibitively expensive in high-dimensional
environments.
-__init__
Args | |
---|---|
+`observation_space` + | ++a gym.spaces object specifying the format of +observations. + | +
+`action_space` + | ++a gym.spaces object that specifies the format of actions. + | +
+`eval_mode` + | ++Boolean indicating whether the agent is in training or eval +mode. + | +
+`ignore_response` + | ++Boolean indicating whether the agent should ignore the +response part of the observation. + | +
+`discretization_bounds` + | ++pair of real numbers indicating the min and max +value for continuous attributes discretization. Values below the min +will all be grouped in the first bin, while values above the max will +all be grouped in the last bin. See the documentation of numpy.digitize +for further details. + | +
+`number_bins` + | ++positive integer number of bins used to discretize continuous +attributes. + | +
+`exploration_policy` + | ++either one of ['epsilon_greedy', 'min_count'] or a +custom function. +function. + | +
+`exploration_temperature` + | ++a real number passed as parameter to the +exploration policy. + | +
+`learning_rate` + | ++a real number between 0 and 1 indicating how much to update +Q-values, i.e. Q_t+1(s,a) = (1 - learning_rate) * Q_t(s, a) ++ learning_rate * (R(s,a) + ...). + | +
+`gamma` + | ++real value between 0 and 1 indicating the discount factor of the +MDP. + | +
+`ordinal_slates` + | ++boolean indicating whether slate ordering matters, e.g. +whether the slates (1, 2) and (2, 1) should be considered different +actions. Using ordinal slates increases complexity factorially. + | +
+`**kwargs` + | ++additional arguments like eval_mode. + | +
multi_user
Attributes | |
---|---|
+`multi_user` + | +Returns boolean indicating whether this agent serves multiple users. + | +
+begin_episode(
+ observation=None
+)
+
Returns the agent's first action for this episode.
-#### Args:
+
-* `observation`: numpy array, the environment's initial observation.
+ Args | |
---|---|
+`observation` + | ++numpy array, the environment's initial observation. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
bundle_and_checkpoint
+bundle_and_checkpoint(
+ checkpoint_dir, iteration_number
)
-```
+
Returns a self-contained bundle of the agent's state.
-#### Args:
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string for the directory where objects will be saved. + | +
+`iteration_number` + | ++An integer of iteration number to use for naming the +checkpoint file. + | +
Returns | |
---|---|
+A dictionary containing additional Python objects to be checkpointed by +the experiment. Each key is a string for the object name and the value +is actual object. If the checkpoint directory does not exist, returns +empty dictionary. + | +
end_episode
+end_episode(
+ reward, observation
)
-```
+
Signals the end of the episode to the agent.
-#### Args:
-
-* `reward`: An float that is the last reward from the environment.
-* `observation`: numpy array that represents the last observation of
- the episode.
+
+
+ Args | |
---|---|
+`reward` + | ++An float that is the last reward from the environment. + | +
+`observation` + | ++numpy array that represents the last observation of the +episode. + | +
step
+step(
+ reward, observation
)
-```
+
Records the most recent transition and returns the agent's next action.
We store the observation of the last time step since we want to store it with
the reward.
-#### Args:
-
-* `reward`: The reward received from the agent's most recent action as
- a float.
-* `observation`: A dictionary that includes the most recent
- observations and should have the following fields:
- - user: A NumPy array representing user's observed state. Assumes it is a
- concatenation of topic pull counts and topic click counts.
- - doc: A NumPy array representing observations of document features.
- Assumes it is a concatenation of one-hot encoding of topic_id and
- document quality.
+
+
+ Args | |
---|---|
+`reward` + | ++The reward received from the agent's most recent action as a +float. + | +
+`observation` + | ++A dictionary that includes the most recent observations and +should have the following fields: +- user: A NumPy array representing user's observed state. Assumes it is +a concatenation of topic pull counts and topic click counts. +- doc: A NumPy array representing observations of document features. +Assumes it is a concatenation of one-hot encoding of topic_id and +document quality. + | +
Returns | |
---|---|
+`slate` + | ++An integer array of size _slate_size, where each element is an +index into the list of doc_obs + | +
Raises | |
---|---|
+`ValueError` + | ++if reward is not in [0, 1]. + | +
unbundle
+unbundle(
+ checkpoint_dir, iteration_number, bundle_dict
)
-```
+
Restores the agent from a checkpoint.
-#### Args:
-
-* `checkpoint_dir`: A string that represents the path to the checkpoint
- saved by tf.Save.
-* `iteration_number`: An integer that represents the checkpoint version
- and is used when restoring replay buffer.
-* `bundle_dict`: A dict containing additional Python objects owned by
- the agent. Each key is an object name and the value is the actual object.
+
+
+ Args | |
---|---|
+`checkpoint_dir` + | ++A string that represents the path to the checkpoint saved +by tf.Save. + | +
+`iteration_number` + | ++An integer that represents the checkpoint version and is +used when restoring replay buffer. + | +
+`bundle_dict` + | ++A dict containing additional Python objects owned by the +agent. Each key is an object name and the value is the actual object. + | +
Returns | |
---|---|
bool, True if unbundling was successful. + | +
Attributes | |
---|---|
`score_no_click` |
-
+ |
`scores` | + + | +
+Returns selected index of document in the slate. -#### Returns: - -* `selected_index`: a integer indicating which item was chosen, or None - if none were selected. + + +@abc.abstractmethod
+choose_item() +
Returns | |
---|---|
+`selected_index` + | ++a integer indicating which item was chosen, or None if +none were selected. + | +
score_documents
+Computes unnormalized scores of documents in the slate given user state. -#### Args: - -* `user_state`: An instance of AbstractUserState. -* `doc_obs`: A numpy array that represents the observation of all - documents in the slate. - -#### Attributes: + + +@abc.abstractmethod
+score_documents( + user_state, doc_obs ) -``` +
Args | |
---|---|
+`user_state` + | ++An instance of AbstractUserState. + | +
+`doc_obs` + | ++A numpy array that represents the observation of all documents in +the slate. + | +
Attributes | |
---|---|
+`scores` + | ++A numpy array that stores the scores of all documents. + | +
+`score_no_click` + | ++A float that represents the score for the action of +picking no document. + | +
+recsim.choice_model.CascadeChoiceModel(
+ choice_features
+)
+
-#### Raises:
+
-* `ValueError`: if either attention_prob or base_attention_prob is
- invalid.
+
-__init__
Raises | |
---|---|
+`ValueError` + | ++if either attention_prob or base_attention_prob is invalid. + | +
Attributes | |
---|---|
`attention_prob` | The probability of examining a document i +given document i - 1 not clicked. |
`score_scaling` | +A multiplicative factor to convert score of document i to the click +probability of examined document i. |
`score_no_click` | +
-
+ |
`scores` |
-
+ |
+
+choose_item()
+
Returns selected index of document in the slate.
-#### Returns:
-
-* `selected_index`: a integer indicating which item was chosen, or None
- if none were selected.
+
+
+ Returns | |
---|---|
+`selected_index` + | ++a integer indicating which item was chosen, or None if +none were selected. + | +
score_documents
+Computes unnormalized scores of documents in the slate given user state. -#### Args: - -* `user_state`: An instance of AbstractUserState. -* `doc_obs`: A numpy array that represents the observation of all - documents in the slate. - -#### Attributes: + + +@abc.abstractmethod
+score_documents( + user_state, doc_obs ) -``` +
Args | |
---|---|
+`user_state` + | ++An instance of AbstractUserState. + | +
+`doc_obs` + | ++A numpy array that represents the observation of all documents in +the slate. + | +
Attributes | |
---|---|
+`scores` + | ++A numpy array that stores the scores of all documents. + | +
+`score_no_click` + | ++A float that represents the score for the action of +picking no document. + | +
+recsim.choice_model.ExponentialCascadeChoiceModel(
+ choice_features
+)
+
+
Clicks the item at position i according to p(i) = attention_prob * score_scaling
* exp(score(i)) by going through the slate in order, and stopping once an item
has been clicked.
-__init__
Attributes | |
---|---|
`score_no_click` |
-
+ |
`scores` |
-
+ |
+
+choose_item()
+
Returns selected index of document in the slate.
-#### Returns:
-
-* `selected_index`: a integer indicating which item was chosen, or None
- if none were selected.
+
+
+ Returns | |
---|---|
+`selected_index` + | ++a integer indicating which item was chosen, or None if +none were selected. + | +
score_documents
+score_documents(
+ user_state, doc_obs
)
-```
+
Computes unnormalized scores of documents in the slate given user state.
-#### Args:
-
-* `user_state`: An instance of AbstractUserState.
-* `doc_obs`: A numpy array that represents the observation of all
- documents in the slate.
-
-#### Attributes:
+
+
+ Args | |
---|---|
+`user_state` + | ++An instance of AbstractUserState. + | +
+`doc_obs` + | ++A numpy array that represents the observation of all documents in +the slate. + | +
Attributes | |
---|---|
+`scores` + | ++A numpy array that stores the scores of all documents. + | +
+`score_no_click` + | ++A float that represents the score for the action of +picking no document. + | +
+recsim.choice_model.MultinomialLogitChoiceModel(
+ choice_features
+)
+
+
Samples item x in scores according to p(x) = exp(x) / Sum_{y in scores} exp(y)
-#### Args:
-
-* `choice_features`: a dict that stores the features used in choice
- model: `no_click_mass`: a float indicating the mass given to a no click
- option.
-
-__init__
Args | |
---|---|
+`choice_features` + | ++a dict that stores the features used in choice model: +`no_click_mass`: a float indicating the mass given to a no click option. + | +
Attributes | |
---|---|
`score_no_click` |
-
+ |
`scores` |
-
+ |
+
+choose_item()
+
Returns selected index of document in the slate.
-#### Returns:
-
-* `selected_index`: a integer indicating which item was chosen, or None
- if none were selected.
+
+
+ Returns | |
---|---|
+`selected_index` + | ++a integer indicating which item was chosen, or None if +none were selected. + | +
score_documents
+score_documents(
+ user_state, doc_obs
)
-```
+
Computes unnormalized scores of documents in the slate given user state.
-#### Args:
-
-* `user_state`: An instance of AbstractUserState.
-* `doc_obs`: A numpy array that represents the observation of all
- documents in the slate.
-
-#### Attributes:
+
+
+ Args | |
---|---|
+`user_state` + | ++An instance of AbstractUserState. + | +
+`doc_obs` + | ++A numpy array that represents the observation of all documents in +the slate. + | +
Attributes | |
---|---|
+`scores` + | ++A numpy array that stores the scores of all documents. + | +
+`score_no_click` + | ++A float that represents the score for the action of +picking no document. + | +
+recsim.choice_model.MultinomialProportionalChoiceModel(
+ choice_features
+)
+
+
Samples item x in scores according to p(x) = x - min_normalizer / sum(x -
min_normalizer)
-#### Attributes:
-
-* `min_normalizer`: A float (<= 0) used to offset the scores to be
- positive. Specifically, if the scores have negative elements, then they do
- not form a valid probability distribution for sampling. Subtracting the
- least expected element is one heuristic for normalization.
-* `no_click_mass`: An optional float indicating the mass given to a no
- click option
-
-__init__
Attributes | |
---|---|
`min_normalizer` | A float (<= 0) used to offset the scores +to be positive. Specifically, if the scores have negative elements, then they do +not form a valid probability distribution for sampling. Subtracting the least +expected element is one heuristic for normalization. |
+`no_click_mass` | An optional float indicating the mass given to a no +click option |
`score_no_click` | -## Properties + |
`scores` |
-
-
- |
+
+choose_item()
+
Returns selected index of document in the slate.
-#### Returns:
-
-* `selected_index`: a integer indicating which item was chosen, or None
- if none were selected.
+
+
+ Returns | |
---|---|
+`selected_index` + | ++a integer indicating which item was chosen, or None if +none were selected. + | +
score_documents
+score_documents(
+ user_state, doc_obs
)
-```
+
Computes unnormalized scores of documents in the slate given user state.
-#### Args:
-
-* `user_state`: An instance of AbstractUserState.
-* `doc_obs`: A numpy array that represents the observation of all
- documents in the slate.
-
-#### Attributes:
+
+
+ Args | |
---|---|
+`user_state` + | ++An instance of AbstractUserState. + | +
+`doc_obs` + | ++A numpy array that represents the observation of all documents in +the slate. + | +
Attributes | |
---|---|
+`scores` + | ++A numpy array that stores the scores of all documents. + | +
+`score_no_click` + | ++A float that represents the score for the action of +picking no document. + | +
Attributes | |
---|---|
`score_no_click` |
-
+ |
`scores` | + + | +
+choose_item()
+
Returns selected index of document in the slate.
-#### Returns:
-
-* `selected_index`: a integer indicating which item was chosen, or None
- if none were selected.
+
+
+ Returns | |
---|---|
+`selected_index` + | ++a integer indicating which item was chosen, or None if +none were selected. + | +
score_documents
+Computes unnormalized scores of documents in the slate given user state. -#### Args: - -* `user_state`: An instance of AbstractUserState. -* `doc_obs`: A numpy array that represents the observation of all - documents in the slate. - -#### Attributes: + + +@abc.abstractmethod
+score_documents( + user_state, doc_obs ) -``` +
Args | |
---|---|
+`user_state` + | ++An instance of AbstractUserState. + | +
+`doc_obs` + | ++A numpy array that represents the observation of all documents in +the slate. + | +
Attributes | |
---|---|
+`scores` + | ++A numpy array that stores the scores of all documents. + | +
+`score_no_click` + | ++A float that represents the score for the action of +picking no document. + | +
+recsim.choice_model.ProportionalCascadeChoiceModel(
+ choice_features
+)
+
+
Clicks the item at position i according to attention_prob * score_scaling *
(score(i) - min_normalizer) by going through the slate in order, and stopping
once an item has been clicked.
-__init__
Attributes | |
---|---|
`score_no_click` |
-
+ |
`scores` |
-
+ |
+
+choose_item()
+
Returns selected index of document in the slate.
-#### Returns:
-
-* `selected_index`: a integer indicating which item was chosen, or None
- if none were selected.
+
+
+ Returns | |
---|---|
+`selected_index` + | ++a integer indicating which item was chosen, or None if +none were selected. + | +
score_documents
+score_documents(
+ user_state, doc_obs
)
-```
+
Computes unnormalized scores of documents in the slate given user state.
-#### Args:
-
-* `user_state`: An instance of AbstractUserState.
-* `doc_obs`: A numpy array that represents the observation of all
- documents in the slate.
-
-#### Attributes:
+
+
+ Args | |
---|---|
+`user_state` + | ++An instance of AbstractUserState. + | +
+`doc_obs` + | ++A numpy array that represents the observation of all documents in +the slate. + | +
Attributes | |
---|---|
+`scores` + | ++A numpy array that stores the scores of all documents. + | +
+`score_no_click` + | ++A float that represents the score for the action of +picking no document. + | +
+recsim.choice_model.softmax(
+ vector
+)
+
diff --git a/docs/api_docs/python/recsim/document.md b/docs/api_docs/python/recsim/document.md
index e5cbf8d..d65332b 100644
--- a/docs/api_docs/python/recsim/document.md
+++ b/docs/api_docs/python/recsim/document.md
@@ -5,7 +5,10 @@
# Module: recsim.document
+
+
__init__
+recsim.document.AbstractDocument(
+ doc_id
+)
+
-View
-source
-
-```python
-__init__(doc_id)
-```
-
-Initialize self. See help(type(self)) for accurate signature.
+
## Methods
@@ -42,9 +36,10 @@ Initialize self. See help(type(self)) for accurate signature.
View
source
-```python
-create_observation()
-```
++Returns observable properties of this document as a float array. @@ -53,9 +48,9 @@ Returns observable properties of this document as a float array. View source -```python -doc_id() -``` +@abc.abstractmethod
+create_observation() +
+doc_id()
+
Returns the document ID.
@@ -64,9 +59,14 @@ Returns the document ID.
View
source
-```python
-@classmethod
-observation_space(cls)
-```
++Gym space that defines how documents are represented. + +## Class Variables + +* `NUM_FEATURES = None` diff --git a/docs/api_docs/python/recsim/document/AbstractDocumentSampler.md b/docs/api_docs/python/recsim/document/AbstractDocumentSampler.md index f9054e4..4a462f0 100644 --- a/docs/api_docs/python/recsim/document/AbstractDocumentSampler.md +++ b/docs/api_docs/python/recsim/document/AbstractDocumentSampler.md @@ -1,7 +1,6 @@@classmethod
+@abc.abstractmethod
+observation_space() +
__init__
+recsim.document.AbstractDocumentSampler(
+ doc_ctor, seed=0
)
-```
-
-Initialize self. See help(type(self)) for accurate signature.
+
-## Properties
+
+
-num_clusters
Attributes | |
---|---|
+`num_clusters` + | +Returns the number of document clusters. Returns 0 if not applicable. + | +
+get_doc_ctor()
+
Returns the constructor/class of the documents that will be sampled.
@@ -64,18 +62,19 @@ Returns the constructor/class of the documents that will be sampled.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
sample_document
+Samples and return an instantiation of AbstractDocument. @@ -84,11 +83,10 @@ Samples and return an instantiation of AbstractDocument. View source -```python -update_state( - documents, - responses +@abc.abstractmethod
+sample_document() +
+update_state(
+ documents, responses
)
-```
+
Update document state (if needed) given user's (or users') responses.
diff --git a/docs/api_docs/python/recsim/document/CandidateSet.md b/docs/api_docs/python/recsim/document/CandidateSet.md
index 1626daa..b50f0d8 100644
--- a/docs/api_docs/python/recsim/document/CandidateSet.md
+++ b/docs/api_docs/python/recsim/document/CandidateSet.md
@@ -13,35 +13,26 @@
# recsim.document.CandidateSet
-
+
+recsim.document.CandidateSet()
+
+
The candidate set is represented as a hashmap (dictionary), with documents
indexed by their document ID.
-__init__
add_document
+add_document(
+ document
+)
+
Adds a document to the candidate set.
@@ -60,9 +53,9 @@ Adds a document to the candidate set.
View
source
-```python
-create_observation()
-```
+
+create_observation()
+
Returns a dictionary of observable features of documents.
@@ -71,9 +64,9 @@ Returns a dictionary of observable features of documents.
View
source
-```python
-get_all_documents()
-```
+
+get_all_documents()
+
Returns all documents.
@@ -82,39 +75,64 @@ Returns all documents.
View
source
-```python
-get_documents(document_ids)
-```
+
+get_documents(
+ document_ids
+)
+
Gets the documents associated with the specified document IDs.
-#### Args:
+
+
+ Args | |
---|---|
+`document_ids` + | ++an array representing indices into the candidate set. +Indices can be integers or string-encoded integers. + | +
Returns | |
---|---|
+(documents) an ordered list of AbstractDocuments associated with the +document ids. + | +
observation_space
+observation_space()
+
remove_document
+remove_document(
+ document
+)
+
Removes a document from the set (to simulate a changing corpus).
@@ -123,8 +141,8 @@ Removes a document from the set (to simulate a changing corpus).
View
source
-```python
-size()
-```
+
+size()
+
Returns an integer, the number of documents in this candidate set.
diff --git a/docs/api_docs/python/recsim/environments.md b/docs/api_docs/python/recsim/environments.md
index 8848e77..d490527 100644
--- a/docs/api_docs/python/recsim/environments.md
+++ b/docs/api_docs/python/recsim/environments.md
@@ -5,7 +5,10 @@
# Module: recsim.environments
+
+
+recsim.environments.interest_evolution.FLAGS(
+ argv, known_only=False
+)
+
+
+
+
+A 'FlagValues' can then scan command line arguments, passing flag arguments
+through to the 'Flag' objects that it owns. It also provides easy access to the
+flag values. Typically only one 'FlagValues' object is needed by an application:
+flags.FLAGS
+
+This class is heavily overloaded:
+
+'Flag' objects are registered via __setitem__: FLAGS['longname'] = x # register
+a new flag
+
+The .value attribute of the registered 'Flag' objects can be accessed as
+attributes of this 'FlagValues' object, through __getattr__. Both the long and
+short name of the original 'Flag' objects can be used to access its value:
+FLAGS.longname # parsed flag value FLAGS.x # parsed flag value (short name)
+
+Command line arguments are scanned and passed to the registered 'Flag' objects
+through the __call__ method. Unparsed arguments, including
+argv[0](e.g. the program name) are returned. argv = FLAGS(sys.argv) # scan
+command line arguments
+
+The original registered Flag objects can be retrieved through the use of the
+dictionary-like operator, __getitem__: x = FLAGS['longname'] # access the
+registered Flag object
+
+The str() operator of a 'FlagValues' object provides help for all of the
+registered 'Flag' objects.
diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/IEvResponse.md b/docs/api_docs/python/recsim/environments/interest_evolution/IEvResponse.md
index 7a17de9..8346e36 100644
--- a/docs/api_docs/python/recsim/environments/interest_evolution/IEvResponse.md
+++ b/docs/api_docs/python/recsim/environments/interest_evolution/IEvResponse.md
@@ -10,55 +10,114 @@
# recsim.environments.interest_evolution.IEvResponse
-
+
__init__
+recsim.environments.interest_evolution.IEvResponse(
+ clicked=False, watch_time=0.0, liked=False, quality=0.0, cluster_id=0.0
)
-```
+
-Creates a new user response for a video.
+
-#### Args:
+
+
+ Args | |
---|---|
+`clicked` + | ++A boolean indicating whether the video was clicked + | +
+`watch_time` + | ++A float for fraction of the video watched + | +
+`liked` + | ++A boolean indicating whether the video was liked + | +
+`quality` + | ++A float for document quality + | +
+`cluster_id` + | ++a integer for the cluster ID of the document. + | +
Attributes | |
---|---|
+`clicked` + | ++A boolean indicating whether the video was clicked. + | +
+`watch_time` + | ++A float for fraction of the video watched. + | +
+`liked` + | ++A boolean indicating whether the video was liked. + | +
+`quality` + | ++A float indicating the quality of the video. + | +
+`cluster_id` + | ++A integer representing the cluster ID of the video. + | +
+create_observation()
+
Creates a tensor observation of this response.
@@ -78,14 +137,14 @@ Creates a tensor observation of this response.
View
source
-```python
-@classmethod
-response_space(cls)
-```
++ArraySpec that defines how a single response is represented. -## Class Members +## Class Variables * `MAX_QUALITY_SCORE = 100` * `MIN_QUALITY_SCORE = -100` diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserDistributionSampler.md b/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserDistributionSampler.md index 3c24e63..afa2452 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserDistributionSampler.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserDistributionSampler.md @@ -9,37 +9,27 @@ # recsim.environments.interest_evolution.IEvUserDistributionSampler - +@classmethod
+response_space() +
__init__
+recsim.environments.interest_evolution.IEvUserDistributionSampler(
+ user_ctor=recsim.environments.interest_evolution.IEvUserState, **kwargs
)
-```
+
-Creates a new user state sampler.
+
## Methods
@@ -48,9 +38,9 @@ Creates a new user state sampler.
View
source
-```python
-get_user_ctor()
-```
+
+get_user_ctor()
+
Returns the constructor/class of the user states that will be sampled.
@@ -59,17 +49,17 @@ Returns the constructor/class of the user states that will be sampled.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
sample_user
+sample_user()
+
Samples a new user, with a new set of features.
diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserModel.md b/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserModel.md
index 203e0fe..be9a7a3 100644
--- a/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserModel.md
+++ b/docs/api_docs/python/recsim/environments/interest_evolution/IEvUserModel.md
@@ -15,65 +15,117 @@
# recsim.environments.interest_evolution.IEvUserModel
-
+
__init__
+recsim.environments.interest_evolution.IEvUserModel(
+ slate_size, choice_model_ctor=None,
response_model_ctor=recsim.environments.interest_evolution.IEvResponse,
user_state_ctor=recsim.environments.interest_evolution.IEvUserState,
- no_click_mass=1.0,
- seed=0,
- alpha_x_intercept=1.0,
- alpha_y_intercept=0.3
+ no_click_mass=1.0, seed=0, alpha_x_intercept=1.0, alpha_y_intercept=0.3
)
-```
+
+
+
-Initializes a new user model.
+Assumes the user state contains: - user_interests - time_budget - no_click_mass
-#### Args:
+
+
+ Args | |
---|---|
+`slate_size` + | ++An integer representing the size of the slate + | +
+`choice_model_ctor` + | ++A contructor function to create user choice model. + | +
+`response_model_ctor` + | ++A constructor function to create response. The +function should take a string of doc ID as input and returns a +IEvResponse object. + | +
+`user_state_ctor` + | ++A constructor to create user state + | +
+`no_click_mass` + | ++A float that will be passed to compute probability of no +click. + | +
+`seed` + | ++A integer used as the seed of the choice model. + | +
+`alpha_x_intercept` + | ++A float for the x intercept of the line used to compute +interests update factor. + | +
+`alpha_y_intercept` + | ++A float for the y intercept of the line used to compute +interests update factor. + | +
Raises | |
---|---|
+`Exception` + | ++if choice_model_ctor is not specified. + | +
+create_observation()
+
Emits obesrvation about user's state.
@@ -93,9 +145,9 @@ Emits obesrvation about user's state.
View
source
-```python
-get_response_model_ctor()
-```
+
+get_response_model_ctor()
+
Returns a constructor for the type of response this model will create.
@@ -104,9 +156,9 @@ Returns a constructor for the type of response this model will create.
View
source
-```python
-is_terminal()
-```
+
+is_terminal()
+
Returns a boolean indicating if the session is over.
@@ -115,9 +167,9 @@ Returns a boolean indicating if the session is over.
View
source
-```python
-observation_space()
-```
+
+observation_space()
+
A Gym.spaces object that describes possible user observations.
@@ -126,9 +178,9 @@ A Gym.spaces object that describes possible user observations.
View
source
-```python
-reset()
-```
+
+reset()
+
Resets the user.
@@ -137,9 +189,9 @@ Resets the user.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
Resets the sampler.
@@ -148,40 +200,65 @@ Resets the sampler.
View
source
-```python
-response_space()
-```
+
+response_space()
+
simulate_response
+simulate_response(
+ documents
+)
+
Simulates the user's response to a slate of documents with choice model.
-#### Args:
+
-* `documents`: a list of IEvVideo objects
+ Args | |
---|---|
+`documents` + | ++a list of IEvVideo objects + | +
Returns | |
---|---|
+`responses` + | ++a list of IEvResponse objects, one for each document + | +
update_state
+update_state(
+ slate_documents, responses
)
-```
+
Updates the user state based on responses to the slate.
@@ -190,8 +267,26 @@ update the user's interests some small step size alpha based on the user's
interest in that topic. The update is either towards the video's features or
away, and is determined stochastically by the user's interest in that document.
-#### Args:
-
-* `slate_documents`: a list of IEvVideos representing the slate
-* `responses`: a list of IEvResponses representing the user's response
- to each video in the slate.
+
+
+ Args | |
---|---|
+`slate_documents` + | ++a list of IEvVideos representing the slate + | +
+`responses` + | ++a list of IEvResponses representing the user's response to each +video in the slate. + | +
__init__
+recsim.environments.interest_evolution.IEvUserState(
+ user_interests, time_budget=None, score_scaling=None, attention_prob=None,
+ no_click_mass=None, keep_interact_prob=None, min_doc_utility=None,
+ user_update_alpha=None, watched_videos=None, impressed_videos=None,
+ liked_videos=None, step_penalty=None, min_normalizer=None,
+ user_quality_factor=None, document_quality_factor=None
)
-```
+
-Initializes a new user.
+
## Methods
@@ -61,9 +42,9 @@ Initializes a new user.
View
source
-```python
-create_observation()
-```
+
+create_observation()
+
Return an observation of this user's observable state.
@@ -72,10 +53,10 @@ Return an observation of this user's observable state.
View
source
-```python
-@classmethod
-observation_space(cls)
-```
++Gym.spaces object that defines how user states are represented. @@ -84,10 +65,12 @@ Gym.spaces object that defines how user states are represented. View source -```python -score_document(doc_obs) -``` +@classmethod
+observation_space() +
+score_document(
+ doc_obs
+)
+
-## Class Members
+## Class Variables
* `NUM_FEATURES = 20`
diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideo.md b/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideo.md
index 4c0f226..54d2e5e 100644
--- a/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideo.md
+++ b/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideo.md
@@ -11,47 +11,64 @@
# recsim.environments.interest_evolution.IEvVideo
-
+
__init__
+recsim.environments.interest_evolution.IEvVideo(
+ doc_id, features, cluster_id=None, video_length=None, quality=None
)
-```
+
-Generates a random set of features for this interest evolution Video.
+
+
+
+
+ Attributes | |
---|---|
+`features` + | ++A numpy array that stores video features. + | +
+`cluster_id` + | ++An integer that represents. + | +
+`video_length` + | ++A float for video length. + | +
+`quality` + | ++a float the represents document quality. + | +
+create_observation()
+
Returns observable properties of this document as a float array.
@@ -71,9 +88,9 @@ Returns observable properties of this document as a float array.
View
source
-```python
-doc_id()
-```
+
+doc_id()
+
Returns the document ID.
@@ -82,14 +99,14 @@ Returns the document ID.
View
source
-```python
-@classmethod
-observation_space(cls)
-```
++Gym space that defines how documents are represented. -## Class Members +## Class Variables * `MAX_VIDEO_LENGTH = 100.0` * `NUM_FEATURES = 20` diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideoSampler.md b/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideoSampler.md index 9db8eba..9c75df2 100644 --- a/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideoSampler.md +++ b/docs/api_docs/python/recsim/environments/interest_evolution/IEvVideoSampler.md @@ -1,7 +1,6 @@@classmethod
+observation_space() +
__init__
+recsim.environments.interest_evolution.IEvVideoSampler(
doc_ctor=recsim.environments.interest_evolution.IEvVideo,
- min_feature_value=-1.0,
- max_feature_value=1.0,
- video_length_mean=4.3,
- video_length_std=1.0,
- **kwargs
+ min_feature_value=-1.0, max_feature_value=1.0, video_length_mean=4.3,
+ video_length_std=1.0, **kwargs
)
-```
-
-Creates a new interest evolution video sampler.
+
-#### Args:
+
-* `doc_ctor`: A class/constructor for the type of videos that will be
- sampled by this sampler.
-* `min_feature_value`: A float for the min feature value.
-* `max_feature_value`: A float for the max feature value.
-* `video_length_mean`: A float for the mean of the video length.
-* `video_length_std`: A float for the std deviation of video length.
-* `**kwargs`: other keyword parameters for the video sampler.
+
+
+ Args | |
---|---|
+`doc_ctor` + | ++A class/constructor for the type of videos that will be sampled +by this sampler. + | +
+`min_feature_value` + | ++A float for the min feature value. + | +
+`max_feature_value` + | ++A float for the max feature value. + | +
+`video_length_mean` + | ++A float for the mean of the video length. + | +
+`video_length_std` + | ++A float for the std deviation of video length. + | +
+`**kwargs` + | ++other keyword parameters for the video sampler. + | +
num_clusters
Attributes | |
---|---|
+`num_clusters` + | +Returns the number of document clusters. Returns 0 if not applicable. + | +
+get_doc_ctor()
+
Returns the constructor/class of the documents that will be sampled.
@@ -81,18 +120,18 @@ Returns the constructor/class of the documents that will be sampled.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
sample_document
+sample_document()
+
Samples and return an instantiation of AbstractDocument.
@@ -101,11 +140,10 @@ Samples and return an instantiation of AbstractDocument.
View
source
-```python
-update_state(
- documents,
- responses
+
+update_state(
+ documents, responses
)
-```
+
Update document state (if needed) given user's (or users') responses.
diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelUserSampler.md b/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelUserSampler.md
index e453d6c..3536933 100644
--- a/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelUserSampler.md
+++ b/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelUserSampler.md
@@ -9,34 +9,28 @@
# recsim.environments.interest_evolution.UtilityModelUserSampler
-
+
__init__
+recsim.environments.interest_evolution.UtilityModelUserSampler(
+ user_ctor=recsim.environments.interest_evolution.IEvUserState,
+ document_quality_factor=1.0, no_click_mass=1.0, min_normalizer=-1.0, **kwargs
)
-```
+
-Creates a new user state sampler.
+
## Methods
@@ -45,9 +39,9 @@ Creates a new user state sampler.
View
source
-```python
-get_user_ctor()
-```
+
+get_user_ctor()
+
Returns the constructor/class of the user states that will be sampled.
@@ -56,17 +50,17 @@ Returns the constructor/class of the user states that will be sampled.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
sample_user
+sample_user()
+
Creates a new instantiation of this user's hidden state parameters.
diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelVideoSampler.md b/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelVideoSampler.md
index c7c8ab7..ed0b748 100644
--- a/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelVideoSampler.md
+++ b/docs/api_docs/python/recsim/environments/interest_evolution/UtilityModelVideoSampler.md
@@ -1,7 +1,6 @@
__init__
+recsim.environments.interest_evolution.UtilityModelVideoSampler(
+ doc_ctor=recsim.environments.interest_evolution.IEvVideo, min_utility=-3.0,
+ max_utility=3.0, video_length=4.0, **kwargs
)
-```
-
-Creates a new utility model video sampler.
+
-#### Args:
+
-* `doc_ctor`: A class/constructor for the type of videos that will be
- sampled by this sampler.
-* `min_utility`: A float for the min utility score.
-* `max_utility`: A float for the max utility score.
-* `video_length`: A float for the video_length in minutes.
-* `**kwargs`: other keyword parameters for the video sampler.
+
+
+ Args | |
---|---|
+`doc_ctor` + | ++A class/constructor for the type of videos that will be sampled +by this sampler. + | +
+`min_utility` + | ++A float for the min utility score. + | +
+`max_utility` + | ++A float for the max utility score. + | +
+`video_length` + | ++A float for the video_length in minutes. + | +
+`**kwargs` + | ++other keyword parameters for the video sampler. + | +
num_clusters
Attributes | |
---|---|
+`num_clusters` + | +Returns the number of document clusters. Returns 0 if not applicable. + | +
+get_doc_ctor()
+
Returns the constructor/class of the documents that will be sampled.
@@ -79,18 +112,18 @@ Returns the constructor/class of the documents that will be sampled.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
sample_document
+sample_document()
+
Samples and return an instantiation of AbstractDocument.
@@ -99,11 +132,10 @@ Samples and return an instantiation of AbstractDocument.
View
source
-```python
-update_state(
- documents,
- responses
+
+update_state(
+ documents, responses
)
-```
+
Update document state (if needed) given user's (or users') responses.
diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/clicked_watchtime_reward.md b/docs/api_docs/python/recsim/environments/interest_evolution/clicked_watchtime_reward.md
index 003a81d..685cb22 100644
--- a/docs/api_docs/python/recsim/environments/interest_evolution/clicked_watchtime_reward.md
+++ b/docs/api_docs/python/recsim/environments/interest_evolution/clicked_watchtime_reward.md
@@ -5,28 +5,53 @@
# recsim.environments.interest_evolution.clicked_watchtime_reward
-
+
+recsim.environments.interest_evolution.clicked_watchtime_reward(
+ responses
+)
+
-#### Args:
+
-* `responses`: A list of IEvResponse objects
+ Args | |
---|---|
+`responses` + | ++A list of IEvResponse objects + | +
Returns | |
---|---|
+`reward` + | ++A float representing the total watch time from the responses + | +
+recsim.environments.interest_evolution.create_environment(
+ env_config
+)
+
diff --git a/docs/api_docs/python/recsim/environments/interest_evolution/total_clicks_reward.md b/docs/api_docs/python/recsim/environments/interest_evolution/total_clicks_reward.md
index 80b3c5c..a89fb87 100644
--- a/docs/api_docs/python/recsim/environments/interest_evolution/total_clicks_reward.md
+++ b/docs/api_docs/python/recsim/environments/interest_evolution/total_clicks_reward.md
@@ -5,27 +5,53 @@
# recsim.environments.interest_evolution.total_clicks_reward
-
+
+recsim.environments.interest_evolution.total_clicks_reward(
+ responses
+)
+
-#### Args:
+
+
+ Args | |
---|---|
+`responses` + | ++A list of IEvResponse objects + | +
Returns | |
---|---|
+`reward` + | ++A float representing the total clicks from the responses + | +
+recsim.environments.interest_exploration.IEClusterUserSampler(
+ user_type_distribution=(0.3, 0.7), user_document_mean_affinity_matrix=((0.1,
+ 0.7), (0.7, 0.1)), user_document_stddev_affinity_matrix=((0.1, 0.1), (0.1,
+ 0.1)), user_ctor=recsim.environments.interest_exploration.IEUserState, **kwargs
+)
+
+
This sampler consumes a distribution over user types and type-specific
@@ -35,37 +41,71 @@ type-specific parameters. In this case, these are the mean and scale of a
lognormal distribution, i.e. the affinity of user u of type U towards an
document of type D is drawn according to lognormal(mean(U,D), scale(U,D)).
-#### Args:
-
-* `user_type_distribution`: a non-negative array of dimension equal to
- the number of user types, whose entries sum to one.
-* `user_document_mean_affinity_matrix`: a non-negative two-dimensional
- array with dimensions number of user types by number of document topics.
- Represents the mean of the affinity score of a user type to a topic.
-* `user_document_stddev_affinity_matrix`: a non-negative
- two-dimensional array with dimensions number of user types by number of
- document topics. Represents the scale of the affinity score of a user type
- to a topic.
-* `user_ctor`: constructor for a user state.
-
-__init__
Args | |
---|---|
+`user_type_distribution` + | ++a non-negative array of dimension equal to the +number of user types, whose entries sum to one. + | +
+`user_document_mean_affinity_matrix` + | ++a non-negative two-dimensional array +with dimensions number of user types by number of document topics. +Represents the mean of the affinity score of a user type to a topic. + | +
+`user_document_stddev_affinity_matrix` + | ++a non-negative two-dimensional array +with dimensions number of user types by number of document topics. +Represents the scale of the affinity score of a user type to a topic. + | +
+`user_ctor` + | ++constructor for a user state. + | +
Args | |
---|---|
+`user_ctor` + | ++A class/constructor for the type of user states that will be +sampled. + | +
+`seed` + | ++An integer for a random seed. + | +
+avg_affinity_given_topic()
+
get_user_ctor
+get_user_ctor()
+
Returns the constructor/class of the user states that will be sampled.
@@ -94,17 +134,17 @@ Returns the constructor/class of the user states that will be sampled.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
sample_user
+sample_user()
+
Creates a new instantiation of this user's hidden state parameters.
diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/IEDocument.md b/docs/api_docs/python/recsim/environments/interest_exploration/IEDocument.md
index 87b7000..9a11838 100644
--- a/docs/api_docs/python/recsim/environments/interest_exploration/IEDocument.md
+++ b/docs/api_docs/python/recsim/environments/interest_exploration/IEDocument.md
@@ -6,48 +6,55 @@
+
__init__
+recsim.environments.interest_exploration.IEDocument(
+ doc_id, cluster_id, quality
)
-```
+
-Initialize self. See help(type(self)) for accurate signature.
+
+
+
+
+ Attributes | |
---|---|
+`cluster_id` + | ++an integer representing the document cluster. + | +
+`quality` + | ++non-negative real number representing the quality of the document. + | +
+create_observation()
+
Returns observable properties of this document as a float array.
@@ -67,9 +74,9 @@ Returns observable properties of this document as a float array.
View
source
-```python
-doc_id()
-```
+
+doc_id()
+
Returns the document ID.
@@ -78,13 +85,14 @@ Returns the document ID.
View
source
-```python
-@classmethod
-observation_space(cls)
-```
++Gym space that defines how documents are represented. -## Class Members +## Class Variables * `NUM_CLUSTERS = 0` +* `NUM_FEATURES = None` diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/IEResponse.md b/docs/api_docs/python/recsim/environments/interest_exploration/IEResponse.md index 69ce4cf..5f950cc 100644 --- a/docs/api_docs/python/recsim/environments/interest_exploration/IEResponse.md +++ b/docs/api_docs/python/recsim/environments/interest_exploration/IEResponse.md @@ -9,43 +9,56 @@ # recsim.environments.interest_exploration.IEResponse - +@classmethod
+observation_space() +
__init__
+recsim.environments.interest_exploration.IEResponse(
+ clicked=False, quality=0.0, cluster_id=0
)
-```
+
-Initialize self. See help(type(self)) for accurate signature.
+
+
+
+
+ Attributes | |
---|---|
+`clicked` + | ++boolean indicating whether the item was clicked or not. + | +
+`quality` + | ++a float indicating the quality of the document. + | +
+`cluster_id` + | ++an integer representing the topic ID of the document. + | +
+create_observation()
+
Creates a tensor observation of this response.
@@ -65,13 +78,13 @@ Creates a tensor observation of this response.
View
source
-```python
-@classmethod
-response_space(cls)
-```
++ArraySpec that defines how a single response is represented. -## Class Members +## Class Variables * `NUM_CLUSTERS = 0` diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/IETopicDocumentSampler.md b/docs/api_docs/python/recsim/environments/interest_exploration/IETopicDocumentSampler.md index 3c3f183..3cdfc27 100644 --- a/docs/api_docs/python/recsim/environments/interest_exploration/IETopicDocumentSampler.md +++ b/docs/api_docs/python/recsim/environments/interest_exploration/IETopicDocumentSampler.md @@ -1,7 +1,6 @@@classmethod
+response_space() +
+recsim.environments.interest_exploration.IETopicDocumentSampler(
+ topic_distribution=(0.2, 0.8), topic_quality_mean=(0.8, 0.2),
+ topic_quality_stddev=(0.1, 0.1),
+ doc_ctor=recsim.environments.interest_exploration.IEDocument, **kwargs
+)
+
+
Consumes a distribution over document topics and topic-specific parameters for
generating a quality score (according to a lognormal distribution).
-#### Args:
-
-* `topic_distribution`: a non-negative array of dimension equal to the
- number of topics, whose entries sum to one.
-* `topic_quality_mean`: a non-negative array of dimension equal to the
- number of topics, representing the mean of the topic quality score.
-* `topic_quality_stddev`: a non-negative array of dimension equal to
- the number of topics, representing the scale of the topic quality score.
-* `doc_ctor`: A class/constructor for the type of videos that will be
- sampled by this sampler.
-
-__init__
Args | |
---|---|
+`topic_distribution` + | ++a non-negative array of dimension equal to the +number of topics, whose entries sum to one. + | +
+`topic_quality_mean` + | ++a non-negative array of dimension equal to the +number of topics, representing the mean of the topic quality score. + | +
+`topic_quality_stddev` + | ++a non-negative array of dimension equal to the +number of topics, representing the scale of the topic quality score. + | +
+`doc_ctor` + | ++A class/constructor for the type of videos that will be sampled +by this sampler. + | +
num_clusters
Attributes | |
---|---|
+`num_clusters` + | +Returns the number of document clusters. Returns 0 if not applicable. + | +
+get_doc_ctor()
+
Returns the constructor/class of the documents that will be sampled.
@@ -78,18 +112,18 @@ Returns the constructor/class of the documents that will be sampled.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
sample_document
+sample_document()
+
Samples the topic and then samples document features given the topic.
@@ -98,11 +132,10 @@ Samples the topic and then samples document features given the topic.
View
source
-```python
-update_state(
- documents,
- responses
+
+update_state(
+ documents, responses
)
-```
+
Update document state (if needed) given user's (or users') responses.
diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/IEUserModel.md b/docs/api_docs/python/recsim/environments/interest_exploration/IEUserModel.md
index d8eb990..ae88380 100644
--- a/docs/api_docs/python/recsim/environments/interest_exploration/IEUserModel.md
+++ b/docs/api_docs/python/recsim/environments/interest_exploration/IEUserModel.md
@@ -1,7 +1,6 @@
+recsim.environments.interest_exploration.IEUserModel(
+ slate_size, no_click_mass=5,
+ choice_model_ctor=recsim.choice_model.MultinomialLogitChoiceModel,
+ user_state_ctor=None, response_model_ctor=None, seed=0
+)
+
+
The user in this scenario is completely characterized by a vector g of affinity
@@ -42,48 +47,69 @@ on these scores.
The state space consists of a vector of affinity scores which is unique to the
user and static but not observable.
-#### Args:
-
-slate_size: An integer representing the size of the slate. no_click_mass: A
-float indicating the mass given to a no-click option. Must be positive,
-otherwise CTR is always 1. choice_model_ctor: A contructor function to create
-user choice model. user_state_ctor: A constructor to create user state.
-response_model_ctor: A constructor function to create response. The function
-should take a string of doc ID as input and returns a IEResponse object. seed:
-an integer used as the seed in random sampling.
-
-__init__
Args |
---|
Args | |
---|---|
+`response_model_ctor` + | ++A class/constructor representing the type of +responses this model will generate. + | +
+`user_sampler` + | ++An instance of AbstractUserSampler that can generate +initial user states from an inital state distribution. + | +
+`slate_size` + | ++integer number of documents that can be served to the user at +any interaction. + | +
avg_user_state
Attributes | |
---|---|
+`avg_user_state` + | +Returns the prior of user state. + | +
+create_observation()
+
Emits obesrvation about user's state.
@@ -103,9 +129,9 @@ Emits obesrvation about user's state.
View
source
-```python
-get_response_model_ctor()
-```
+
+get_response_model_ctor()
+
Returns a constructor for the type of response this model will create.
@@ -114,9 +140,9 @@ Returns a constructor for the type of response this model will create.
View
source
-```python
-is_terminal()
-```
+
+is_terminal()
+
Returns a boolean indicating if the session is over.
@@ -125,9 +151,9 @@ Returns a boolean indicating if the session is over.
View
source
-```python
-observation_space()
-```
+
+observation_space()
+
A Gym.spaces object that describes possible user observations.
@@ -136,9 +162,9 @@ A Gym.spaces object that describes possible user observations.
View
source
-```python
-reset()
-```
+
+reset()
+
Resets the user.
@@ -147,9 +173,9 @@ Resets the user.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
Resets the sampler.
@@ -158,46 +184,89 @@ Resets the sampler.
View
source
-```python
-response_space()
-```
+
+response_space()
+
simulate_response
+simulate_response(
+ documents
+)
+
Simulates the user's response to a slate of documents with choice model.
-#### Args:
+
+
+ Args | |
---|---|
+`documents` + | ++a list of IEDocument objects in the slate. + | +
Returns | |
---|---|
+`responses` + | ++a list of IEResponse objects, one for each document. + | +
update_state
+update_state(
+ slate_documents, responses
)
-```
+
Updates the user's state based on the slate and document selected.
-#### Args:
+
+
+ Args | |
---|---|
+`slate_documents` + | ++A list of AbstractDocuments for items in the slate. + | +
+`responses` + | ++A list of AbstractResponses for each item in the slate. + | +
+recsim.environments.interest_exploration.IEUserState(
+ topic_affinity
+)
+
-* `topic_affinity`: a nonnegative vector holds document type affinities
- which are not temporal dynamics and hidden.
-
-__init__
Attributes | |
---|---|
+`topic_affinity` + | ++a nonnegative vector holds document type affinities which +are not temporal dynamics and hidden. + | +
+create_observation()
+
User's topic_affinity is not observable.
@@ -60,10 +66,10 @@ User's topic_affinity is not observable.
View
source
-```python
-@staticmethod
-observation_space()
-```
++Gym.spaces object that defines how user states are represented. @@ -72,8 +78,14 @@ Gym.spaces object that defines how user states are represented. View source -```python -score_document(doc_obs) -``` +@staticmethod
+observation_space() +
+score_document(
+ doc_obs
+)
+
Returns user document affinity plus document quality.
+
+## Class Variables
+
+* `NUM_FEATURES = None`
diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/create_environment.md b/docs/api_docs/python/recsim/environments/interest_exploration/create_environment.md
index a555897..f8050b5 100644
--- a/docs/api_docs/python/recsim/environments/interest_exploration/create_environment.md
+++ b/docs/api_docs/python/recsim/environments/interest_exploration/create_environment.md
@@ -5,20 +5,21 @@
# recsim.environments.interest_exploration.create_environment
-
+
+recsim.environments.interest_exploration.create_environment(
+ env_config
+)
+
diff --git a/docs/api_docs/python/recsim/environments/interest_exploration/total_clicks_reward.md b/docs/api_docs/python/recsim/environments/interest_exploration/total_clicks_reward.md
index 39685e8..17599a7 100644
--- a/docs/api_docs/python/recsim/environments/interest_exploration/total_clicks_reward.md
+++ b/docs/api_docs/python/recsim/environments/interest_exploration/total_clicks_reward.md
@@ -5,27 +5,53 @@
# recsim.environments.interest_exploration.total_clicks_reward
-
+
+recsim.environments.interest_exploration.total_clicks_reward(
+ responses
+)
+
-#### Args:
+
+
+ Args | |
---|---|
+`responses` + | ++A list of IEResponse objects + | +
Returns | |
---|---|
+`reward` + | ++A float representing the total clicks from the responses + | +
__init__
+recsim.environments.long_term_satisfaction.LTSDocument(
+ doc_id, clickbait_score
)
-```
+
-Initialize self. See help(type(self)) for accurate signature.
+
+
+
+
+ Attributes | |
---|---|
+`clickbait_score` + | ++real number in [0,1] representing the clickbaitiness of a +document. + | +
+create_observation()
+
Returns observable properties of this document as a float array.
@@ -64,9 +67,9 @@ Returns observable properties of this document as a float array.
View
source
-```python
-doc_id()
-```
+
+doc_id()
+
Returns the document ID.
@@ -75,9 +78,13 @@ Returns the document ID.
View
source
-```python
-@staticmethod
-observation_space()
-```
++Gym space that defines how documents are represented. + +## Class Variables + +* `NUM_FEATURES = None` diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSDocumentSampler.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSDocumentSampler.md index 8e8ef14..a556dfb 100644 --- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSDocumentSampler.md +++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSDocumentSampler.md @@ -1,7 +1,6 @@@staticmethod
+observation_space() +
+recsim.environments.long_term_satisfaction.LTSDocumentSampler(
+ doc_ctor=recsim.environments.long_term_satisfaction.LTSDocument, **kwargs
+)
+
-#### Args:
+
-doc_ctor: A class/constructor for the type of documents that will be sampled by
-this sampler.
+
-__init__
Args |
---|
num_clusters
Attributes | |
---|---|
+`num_clusters` + | +Returns the number of document clusters. Returns 0 if not applicable. + | +
+get_doc_ctor()
+
Returns the constructor/class of the documents that will be sampled.
@@ -72,18 +77,18 @@ Returns the constructor/class of the documents that will be sampled.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
sample_document
+sample_document()
+
Samples and return an instantiation of AbstractDocument.
@@ -92,11 +97,10 @@ Samples and return an instantiation of AbstractDocument.
View
source
-```python
-update_state(
- documents,
- responses
+
+update_state(
+ documents, responses
)
-```
+
Update document state (if needed) given user's (or users') responses.
diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSResponse.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSResponse.md
index 84f8123..ba9b8aa 100644
--- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSResponse.md
+++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSResponse.md
@@ -9,48 +9,74 @@
# recsim.environments.long_term_satisfaction.LTSResponse
-
+
__init__
+recsim.environments.long_term_satisfaction.LTSResponse(
+ clicked=False, engagement=0.0
)
-```
+
-Creates a new user response for a document.
+
-#### Args:
+
+
+ Args | |
---|---|
+`clicked` + | ++boolean indicating whether the item was clicked or not. + | +
+`engagement` + | ++real number representing the degree of engagement with a +document (e.g. watch time). + | +
Attributes | |
---|---|
+`engagement` + | ++real number representing the degree of engagement with a +document (e.g. watch time). + | +
+`clicked` + | ++boolean indicating whether the item was clicked or not. + | +
+create_observation()
+
Creates a tensor observation of this response.
@@ -70,13 +96,13 @@ Creates a tensor observation of this response.
View
source
-```python
-@classmethod
-response_space(cls)
-```
++ArraySpec that defines how a single response is represented. -## Class Members +## Class Variables * `MAX_ENGAGEMENT_MAGNITUDE = 100.0` diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSStaticUserSampler.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSStaticUserSampler.md index 8402641..2c3dd01 100644 --- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSStaticUserSampler.md +++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSStaticUserSampler.md @@ -9,34 +9,29 @@ # recsim.environments.long_term_satisfaction.LTSStaticUserSampler - +@classmethod
+response_space() +
__init__
+recsim.environments.long_term_satisfaction.LTSStaticUserSampler(
+ user_ctor=recsim.environments.long_term_satisfaction.LTSUserState,
+ memory_discount=0.7, sensitivity=0.01, innovation_stddev=0.05, choc_mean=5.0,
+ choc_stddev=1.0, kale_mean=4.0, kale_stddev=1.0, time_budget=60, **kwargs
)
-```
+
-Creates a new user state sampler.
+
## Methods
@@ -45,9 +40,9 @@ Creates a new user state sampler.
View
source
-```python
-get_user_ctor()
-```
+
+get_user_ctor()
+
Returns the constructor/class of the user states that will be sampled.
@@ -56,17 +51,17 @@ Returns the constructor/class of the user states that will be sampled.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
sample_user
+sample_user()
+
Creates a new instantiation of this user's hidden state parameters.
diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSUserModel.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSUserModel.md
index e5077d8..6f9cfc1 100644
--- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSUserModel.md
+++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/LTSUserModel.md
@@ -16,21 +16,25 @@
# recsim.environments.long_term_satisfaction.LTSUserModel
-
+
+recsim.environments.long_term_satisfaction.LTSUserModel(
+ slate_size, user_state_ctor=None, response_model_ctor=None, seed=0
+)
+
+
Implements a controlled continuous Hidden Markov Model of the user having the
@@ -60,30 +64,38 @@ A constructor to create user state. response_model_ctor: A constructor function
to create response. The function should take a string of doc ID as input and
returns a LTSResponse object. seed: an integer as the seed in random sampling.
-__init__
Args | |
---|---|
+`response_model_ctor` + | ++A class/constructor representing the type of +responses this model will generate. + | +
+`user_sampler` + | ++An instance of AbstractUserSampler that can generate +initial user states from an inital state distribution. + | +
+`slate_size` + | ++integer number of documents that can be served to the user at +any interaction. + | +
+create_observation()
+
Emits obesrvation about user's state.
@@ -103,29 +115,48 @@ Emits obesrvation about user's state.
View
source
-```python
-generate_response(
- doc,
- response
+
+generate_response(
+ doc, response
)
-```
+
Generates a response to a clicked document.
-#### Args:
+
+
+ Args | |
---|---|
+`doc` + | ++an LTSDocument object. + | +
+`response` + | ++an LTSResponse for the document. + | +
get_response_model_ctor
+get_response_model_ctor()
+
Returns a constructor for the type of response this model will create.
@@ -134,9 +165,9 @@ Returns a constructor for the type of response this model will create.
View
source
-```python
-is_terminal()
-```
+
+is_terminal()
+
Returns a boolean indicating if the session is over.
@@ -145,9 +176,9 @@ Returns a boolean indicating if the session is over.
View
source
-```python
-observation_space()
-```
+
+observation_space()
+
A Gym.spaces object that describes possible user observations.
@@ -156,9 +187,9 @@ A Gym.spaces object that describes possible user observations.
View
source
-```python
-reset()
-```
+
+reset()
+
Resets the user.
@@ -167,9 +198,9 @@ Resets the user.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
Resets the sampler.
@@ -178,45 +209,88 @@ Resets the sampler.
View
source
-```python
-response_space()
-```
+
+response_space()
+
simulate_response
+simulate_response(
+ documents
+)
+
Simulates the user's response to a slate of documents with choice model.
-#### Args:
+
+
+ Args | |
---|---|
+`documents` + | ++a list of LTSDocument objects. + | +
Returns | |
---|---|
+`responses` + | ++a list of LTSResponse objects, one for each document. + | +
update_state
+update_state(
+ slate_documents, responses
)
-```
+
Updates the user's latent state based on responses to the slate.
-#### Args:
-
-* `slate_documents`: a list of LTSDocuments representing the slate
-* `responses`: a list of LTSResponses representing the user's response
- to each document in the slate.
+
+
+ Args | |
---|---|
+`slate_documents` + | ++a list of LTSDocuments representing the slate + | +
+`responses` + | ++a list of LTSResponses representing the user's response to each +document in the slate. + | +
+recsim.environments.long_term_satisfaction.LTSUserState(
+ memory_discount, sensitivity, innovation_stddev, choc_mean, choc_stddev,
+ kale_mean, kale_stddev, net_positive_exposure, time_budget
+)
+
+
See the LTSUserModel class documentation for precise information about how the
@@ -36,27 +42,6 @@ kale_mean: mean of engagement with non-clickbaity content. kale_stddev: standard
deviation of engagement with non-clickbaity content. net_positive_exposure:
starting value for NPE (NPE_0). time_budget: length of a user session.
-__init__
create_observation
+create_observation()
+
User's state is not observable.
@@ -75,10 +60,10 @@ User's state is not observable.
View
source
-```python
-@staticmethod
-observation_space()
-```
++Gym.spaces object that defines how user states are represented. @@ -87,6 +72,12 @@ Gym.spaces object that defines how user states are represented. View source -```python -score_document(doc_obs) -``` +@staticmethod
+observation_space() +
+score_document(
+ doc_obs
+)
+
+
+## Class Variables
+
+* `NUM_FEATURES = None`
diff --git a/docs/api_docs/python/recsim/environments/long_term_satisfaction/clicked_engagement_reward.md b/docs/api_docs/python/recsim/environments/long_term_satisfaction/clicked_engagement_reward.md
index 726ce10..f99f8cd 100644
--- a/docs/api_docs/python/recsim/environments/long_term_satisfaction/clicked_engagement_reward.md
+++ b/docs/api_docs/python/recsim/environments/long_term_satisfaction/clicked_engagement_reward.md
@@ -5,28 +5,53 @@
# recsim.environments.long_term_satisfaction.clicked_engagement_reward
-
+
+recsim.environments.long_term_satisfaction.clicked_engagement_reward(
+ responses
+)
+
-#### Args:
+
-* `responses`: A list of LTSResponse objects
+ Args | |
---|---|
+`responses` + | ++A list of LTSResponse objects + | +
Returns | |
---|---|
+`reward` + | ++A float representing the total watch time from the responses + | +
+recsim.environments.long_term_satisfaction.create_environment(
+ env_config
+)
+
diff --git a/docs/api_docs/python/recsim/simulator.md b/docs/api_docs/python/recsim/simulator.md
index 199d7f7..44d9547 100644
--- a/docs/api_docs/python/recsim/simulator.md
+++ b/docs/api_docs/python/recsim/simulator.md
@@ -5,7 +5,10 @@
# Module: recsim.simulator
+
+
__init__
+recsim.simulator.environment.AbstractEnvironment(
+ user_model, document_sampler, num_candidates, slate_size,
resample_documents=True
)
-```
-
-Initializes a new simulation environment.
-
-#### Args:
-
-* `user_model`: An instantiation of AbstractUserModel or list of such
- instantiations
-* `document_sampler`: An instantiation of AbstractDocumentSampler
-* `num_candidates`: An integer representing the size of the
- candidate_set
-* `slate_size`: An integer representing the slate size
-* `resample_documents`: A boolean indicating whether to resample the
- candidate set every step
-
-## Properties
+
-candidate_set
num_candidates
slate_size
Args | |
---|---|
+`user_model` + | ++An instantiation of AbstractUserModel or list of such +instantiations + | +
+`document_sampler` + | ++An instantiation of AbstractDocumentSampler + | +
+`num_candidates` + | ++An integer representing the size of the candidate_set + | +
+`slate_size` + | ++An integer representing the slate size + | +
+`resample_documents` + | ++A boolean indicating whether to resample the candidate +set every step + | +
user_model
Attributes | |
---|---|
+`user_model` + | ++An list or single instantiation of AbstractUserModel +representing the user/users. + | +
+`document_sampler` + | ++An instantiation of AbstractDocumentSampler. + | +
+`num_candidates` + | ++An integer representing the size of the candidate_set. + | +
+`slate_size` + | ++An integer representing the slate size. + | +
+`candidate_set` + | ++An instantiation of CandidateSet. + | +
+`num_clusters` + | ++An integer representing the number of document clusters. + | +
+Resets the environment and return the first observation. -#### Returns: - -* `user_obs`: An array of floats representing observations of the - user's current state -* `doc_obs`: An OrderedDict of document observations keyed by document - ids + + +@abc.abstractmethod
+reset() +
Returns | |
---|---|
+`user_obs` + | ++An array of floats representing observations of the user's +current state + | +
+`doc_obs` + | ++An OrderedDict of document observations keyed by document ids + | +
reset_sampler
+Resets the relevant samplers of documents and user/users. @@ -114,22 +181,65 @@ Resets the relevant samplers of documents and user/users. View source -```python -step(slate) -``` +@abc.abstractmethod
+reset_sampler() +
+Executes the action, returns next state observation and reward. -#### Args: - -* `slate`: An integer array of size slate_size (or list of such - arrays), where each element is an index into the set of current_documents - presented. - -#### Returns: + + +@abc.abstractmethod
+step( + slate +) +
Args | |
---|---|
+`slate` + | ++An integer array of size slate_size (or list of such arrays), where +each element is an index into the set of current_documents presented. + | +
Returns | |
---|---|
+`user_obs` + | ++A gym observation representing the user's next state + | +
+`doc_obs` + | ++A list of observations of the documents + | +
+`responses` + | ++A list of AbstractResponse objects for each item in the slate + | +
+`done` + | ++A boolean indicating whether the episode has terminated + | +
__init__
+recsim.simulator.environment.Environment(
+ user_model, document_sampler, num_candidates, slate_size,
resample_documents=True
)
-```
-
-Initializes a new simulation environment.
-
-#### Args:
-
-* `user_model`: An instantiation of AbstractUserModel or list of such
- instantiations
-* `document_sampler`: An instantiation of AbstractDocumentSampler
-* `num_candidates`: An integer representing the size of the
- candidate_set
-* `slate_size`: An integer representing the slate size
-* `resample_documents`: A boolean indicating whether to resample the
- candidate set every step
+
-## Properties
-
-candidate_set
num_candidates
slate_size
Args | |
---|---|
+`user_model` + | ++An instantiation of AbstractUserModel or list of such +instantiations + | +
+`document_sampler` + | ++An instantiation of AbstractDocumentSampler + | +
+`num_candidates` + | ++An integer representing the size of the candidate_set + | +
+`slate_size` + | ++An integer representing the slate size + | +
+`resample_documents` + | ++A boolean indicating whether to resample the candidate +set every step + | +
user_model
Attributes | |
---|---|
+`user_model` + | ++An instantiation of AbstractUserModel that represents a user. + | +
+`document_sampler` + | ++An instantiation of AbstractDocumentSampler. + | +
+`num_candidates` + | ++An integer representing the size of the candidate_set. + | +
+`slate_size` + | ++An integer representing the slate size. + | +
+`candidate_set` + | ++An instantiation of CandidateSet. + | +
+`num_clusters` + | ++An integer representing the number of document clusters. + | +
+reset()
+
Resets the environment and return the first observation.
-#### Returns:
-
-* `user_obs`: An array of floats representing observations of the
- user's current state
-* `doc_obs`: An OrderedDict of document observations keyed by document
- ids
+
+
+ Returns | |
---|---|
+`user_obs` + | ++An array of floats representing observations of the user's +current state + | +
+`doc_obs` + | ++An OrderedDict of document observations keyed by document ids + | +
reset_sampler
+reset_sampler()
+
Resets the relevant samplers of documents and user/users.
@@ -121,21 +189,64 @@ Resets the relevant samplers of documents and user/users.
View
source
-```python
-step(slate)
-```
+
+step(
+ slate
+)
+
Executes the action, returns next state observation and reward.
-#### Args:
-
-* `slate`: An integer array of size slate_size, where each element is
- an index into the set of current_documents presented
-
-#### Returns:
+
+
+ Args | |
---|---|
+`slate` + | ++An integer array of size slate_size, where each element is an index +into the set of current_documents presented + | +
Returns | |
---|---|
+`user_obs` + | ++A gym observation representing the user's next state + | +
+`doc_obs` + | ++A list of observations of the documents + | +
+`responses` + | ++A list of AbstractResponse objects for each item in the slate + | +
+`done` + | ++A boolean indicating whether the episode has terminated + | +
__init__
+recsim.simulator.environment.MultiUserEnvironment(
+ user_model, document_sampler, num_candidates, slate_size,
resample_documents=True
)
-```
-
-Initializes a new simulation environment.
-
-#### Args:
-
-* `user_model`: An instantiation of AbstractUserModel or list of such
- instantiations
-* `document_sampler`: An instantiation of AbstractDocumentSampler
-* `num_candidates`: An integer representing the size of the
- candidate_set
-* `slate_size`: An integer representing the slate size
-* `resample_documents`: A boolean indicating whether to resample the
- candidate set every step
-
-## Properties
+
-candidate_set
num_candidates
num_users
slate_size
Args | |
---|---|
+`user_model` + | ++An instantiation of AbstractUserModel or list of such +instantiations + | +
+`document_sampler` + | ++An instantiation of AbstractDocumentSampler + | +
+`num_candidates` + | ++An integer representing the size of the candidate_set + | +
+`slate_size` + | ++An integer representing the slate size + | +
+`resample_documents` + | ++A boolean indicating whether to resample the candidate +set every step + | +
user_model
Attributes | |
---|---|
+`user_model` + | ++A list of AbstractUserModel instances that represent users. + | +
+`num_users` + | ++An integer representing the number of users. + | +
+`document_sampler` + | ++An instantiation of AbstractDocumentSampler. + | +
+`num_candidates` + | ++An integer representing the size of the candidate_set. + | +
+`slate_size` + | ++An integer representing the slate size. + | +
+`candidate_set` + | ++An instantiation of CandidateSet. + | +
+`num_clusters` + | ++An integer representing the number of document clusters. + | +
+reset()
+
Resets the environment and return the first observation.
-#### Returns:
-
-* `user_obs`: An array of floats representing observations of the
- user's current state
-* `doc_obs`: An OrderedDict of document observations keyed by document
- ids
+
+
+ Returns | |
---|---|
+`user_obs` + | ++An array of floats representing observations of the user's +current state + | +
+`doc_obs` + | ++An OrderedDict of document observations keyed by document ids + | +
reset_sampler
+reset_sampler()
+
Resets the relevant samplers of documents and user/users.
@@ -121,23 +188,65 @@ Resets the relevant samplers of documents and user/users.
View
source
-```python
-step(slates)
-```
+
+step(
+ slates
+)
+
Executes the action, returns next state observation and reward.
-#### Args:
-
-* `slates`: A list of slates, where each slate is an integer array of
- size slate_size, where each element is an index into the set of
- current_documents presented
-
-#### Returns:
+
+
+ Args | |
---|---|
+`slates` + | ++A list of slates, where each slate is an integer array of size +slate_size, where each element is an index into the set of +current_documents presented + | +
Returns | |
---|---|
+`user_obs` + | ++A list of gym observation representing all users' next state + | +
+`doc_obs` + | ++A list of observations of the documents + | +
+`responses` + | ++A list of AbstractResponse objects for each item in the slate + | +
+`done` + | ++A boolean indicating whether the episode has terminated + | +
__init__
+recsim.simulator.recsim_gym.RecSimGymEnv(
+ raw_environment, reward_aggregator,
metrics_aggregator=_dummy_metrics_aggregator,
metrics_writer=_dummy_metrics_writer
)
-```
-
-Initializes a RecSim environment conforming to gym.Env.
-
-#### Args:
-
-* `raw_environment`: A recsim recommender system environment.
-* `reward_aggregator`: A function mapping a list of responses to a
- number.
-* `metrics_aggregator`: A function aggregating metrics over all steps
- given responses and response_names.
-* `metrics_writer`: A function writing final metrics to TensorBoard.
-
-## Properties
+
-action_space
environment
Args | |
---|---|
+`raw_environment` + | ++A recsim recommender system environment. + | +
+`reward_aggregator` + | ++A function mapping a list of responses to a number. + | +
+`metrics_aggregator` + | ++A function aggregating metrics over all steps given +responses and response_names. + | +
+`metrics_writer` + | ++A function writing final metrics to TensorBoard. + | +
Attributes | |
---|---|
+`game_over` + | ++A boolean indicating whether the current game has finished + | +
+`action_space` + | ++A gym.spaces object that specifies the space for possible +actions. + | +
+`observation_space` + | ++A gym.spaces object that specifies the space for possible +observations. + | +
+`environment` + | +
Returns the recsim recommender system environment.
-
-
-
- |
+
+`unwrapped` + | +Completely unwrap this env. - -#### Returns: - -* `gym.Env`: The base non-wrapped gym.Env instance + | +
__enter__
__exit__
close
+close()
+
Override close in your subclass to perform any necessary cleanup.
@@ -138,18 +146,20 @@ when the program exits.
View
source
-```python
-extract_env_info()
-```
+
+extract_env_info()
+
render
+render(
+ mode='human'
+)
+
Renders the environment.
@@ -170,9 +180,18 @@ Make sure that your class's metadata 'render.modes' key includes the list of
supported modes. It's recommended to call super() in implementations to use the
functionality of this method.
-#### Args:
+
+ Args | |
---|---|
mode (str): the mode to render with + | +
+reset()
+
Resets the state of the environment and returns an initial observation.
-#### Returns:
+
+ Returns | |
---|---|
observation (object): the initial observation. + | +
reset_metrics
+reset_metrics()
+
Resets every metric to zero.
@@ -220,18 +248,20 @@ reset() gets called for every episode.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
seed
+seed(
+ seed=None
+)
+
Sets the seed for this env's random number generator(s).
@@ -241,21 +271,33 @@ Some environments use multiple pseudorandom number generators. We want to
capture all such seeds used in order to ensure that there aren't accidental
correlations between multiple generators.
-#### Returns:
+
+
+ Returns | |
---|---|
+list |
+
step
+step(
+ action
+)
+
Runs one timestep of the environment's dynamics.
@@ -263,31 +305,51 @@ When end of episode is reached, you are responsible for calling `reset()` to
reset this environment's state. Accepts an action and returns a tuple
(observation, reward, done, info).
-#### Args:
+
+ Args | |
---|---|
action (object): An action provided by the environment + | +
Returns | |
---|---|
+A four-tuple of (observation, reward, done, info) where: +observation (object): agent's observation that include +1. User's state features +2. Document's observation +3. Observation about user's slate responses. +reward (float) : The amount of reward returned after previous action +done (boolean): Whether the episode has ended, in which case further +step() calls will return undefined results +info (dict): Contains responses for the full slate for debugging/learning. + | +
update_metrics
+update_metrics(
+ responses, info=None
)
-```
+
Updates metrics with one step responses.
@@ -296,13 +358,30 @@ Updates metrics with one step responses.
View
source
-```python
-write_metrics(add_summary_fn)
-```
+
+write_metrics(
+ add_summary_fn
+)
+
Writes metrics to TensorBoard by calling add_summary_fn.
-## Class Members
+__enter__
+__enter__()
+
+
+__exit__
+__exit__(
+ *args
+)
+
+
+## Class Variables
* `metadata`
* `reward_range`
+* `spec = None`
diff --git a/docs/api_docs/python/recsim/simulator/runner_lib.md b/docs/api_docs/python/recsim/simulator/runner_lib.md
index 2e1ea52..affd243 100644
--- a/docs/api_docs/python/recsim/simulator/runner_lib.md
+++ b/docs/api_docs/python/recsim/simulator/runner_lib.md
@@ -1,12 +1,14 @@
__init__
+recsim.simulator.runner_lib.EvalRunner(
+ max_eval_episodes=125000, test_mode=False, min_interval_secs=30,
+ train_base_dir=None, **kwargs
)
-```
+
-Initializes the Runner object in charge of running a full experiment.
+
-#### Args:
+See main.py for a simple example to evaluate an agent.
-* `base_dir`: str, the base directory to host all required
- sub-directories.
-* `create_agent_fn`: A function that takes as args a Tensorflow session
- and an environment, and returns an agent.
-* `env`: A Gym environment for running the experiments.
-* `episode_log_file`: Path to output simulated episodes in
- tf.SequenceExample. Disable logging if episode_log_file is an empty string.
-* `checkpoint_file_prefix`: str, the prefix to use for checkpoint
- files.
-* `max_steps_per_episode`: int, maximum number of steps after which an
- episode terminates.
+
+
+ Args | |
---|---|
+`base_dir` + | ++str, the base directory to host all required sub-directories. + | +
+`create_agent_fn` + | ++A function that takes as args a Tensorflow session and an +environment, and returns an agent. + | +
+`env` + | ++A Gym environment for running the experiments. + | +
+`episode_log_file` + | ++Path to output simulated episodes in tf.SequenceExample. +Disable logging if episode_log_file is an empty string. + | +
+`checkpoint_file_prefix` + | ++str, the prefix to use for checkpoint files. + | +
+`max_steps_per_episode` + | ++int, maximum number of steps after which an episode +terminates. + | +
+run_experiment()
+
Runs a full experiment, spread over multiple iterations.
diff --git a/docs/api_docs/python/recsim/simulator/runner_lib/Runner.md b/docs/api_docs/python/recsim/simulator/runner_lib/Runner.md
index 74dff31..364138a 100644
--- a/docs/api_docs/python/recsim/simulator/runner_lib/Runner.md
+++ b/docs/api_docs/python/recsim/simulator/runner_lib/Runner.md
@@ -6,46 +6,80 @@
# recsim.simulator.runner_lib.Runner
-
+
+recsim.simulator.runner_lib.Runner(
+ base_dir, create_agent_fn, env, episode_log_file='',
+ checkpoint_file_prefix='ckpt', max_steps_per_episode=27000
+)
+
+
Here we use the term 'experiment' to mean simulating interactions between the
agent and the environment and reporting some statistics pertaining to these
interactions.
-__init__
Args | |
---|---|
+`base_dir` + | ++str, the base directory to host all required sub-directories. + | +
+`create_agent_fn` + | ++A function that takes as args a Tensorflow session and an +environment, and returns an agent. + | +
+`env` + | ++A Gym environment for running the experiments. + | +
+`episode_log_file` + | ++Path to output simulated episodes in tf.SequenceExample. +Disable logging if episode_log_file is an empty string. + | +
+`checkpoint_file_prefix` + | ++str, the prefix to use for checkpoint files. + | +
+`max_steps_per_episode` + | ++int, maximum number of steps after which an episode +terminates. + | +
__init__
+recsim.simulator.runner_lib.TrainRunner(
+ max_training_steps=250000, num_iterations=100, checkpoint_frequency=1, **kwargs
)
-```
+
-Initializes the Runner object in charge of running a full experiment.
+
-#### Args:
+See main.py for a simple example to train an agent.
-* `base_dir`: str, the base directory to host all required
- sub-directories.
-* `create_agent_fn`: A function that takes as args a Tensorflow session
- and an environment, and returns an agent.
-* `env`: A Gym environment for running the experiments.
-* `episode_log_file`: Path to output simulated episodes in
- tf.SequenceExample. Disable logging if episode_log_file is an empty string.
-* `checkpoint_file_prefix`: str, the prefix to use for checkpoint
- files.
-* `max_steps_per_episode`: int, maximum number of steps after which an
- episode terminates.
+
+
+ Args | |
---|---|
+`base_dir` + | ++str, the base directory to host all required sub-directories. + | +
+`create_agent_fn` + | ++A function that takes as args a Tensorflow session and an +environment, and returns an agent. + | +
+`env` + | ++A Gym environment for running the experiments. + | +
+`episode_log_file` + | ++Path to output simulated episodes in tf.SequenceExample. +Disable logging if episode_log_file is an empty string. + | +
+`checkpoint_file_prefix` + | ++str, the prefix to use for checkpoint files. + | +
+`max_steps_per_episode` + | ++int, maximum number of steps after which an episode +terminates. + | +
+run_experiment()
+
Runs a full experiment, spread over multiple iterations.
diff --git a/docs/api_docs/python/recsim/simulator/runner_lib/load_gin_configs.md b/docs/api_docs/python/recsim/simulator/runner_lib/load_gin_configs.md
index 1e0ae49..9dc6c62 100644
--- a/docs/api_docs/python/recsim/simulator/runner_lib/load_gin_configs.md
+++ b/docs/api_docs/python/recsim/simulator/runner_lib/load_gin_configs.md
@@ -5,29 +5,46 @@
# recsim.simulator.runner_lib.load_gin_configs
-
+
+recsim.simulator.runner_lib.load_gin_configs(
+ gin_files, gin_bindings
)
-```
+
-#### Args:
-
-* `gin_files`: list, of paths to the gin configuration files for this
- experiment.
-* `gin_bindings`: list, of gin parameter bindings to override the
- values in the config files.
+
+
+ Args | |
---|---|
+`gin_files` + | ++list, of paths to the gin configuration files for this +experiment. + | +
+`gin_bindings` + | ++list, of gin parameter bindings to override the values in the +config files. + | +
+Creates a tensor observation of this response. @@ -40,9 +39,10 @@ Creates a tensor observation of this response. View source -```python -@staticmethod -response_space() -``` +@abc.abstractmethod
+create_observation() +
+ArraySpec that defines how a single response is represented. diff --git a/docs/api_docs/python/recsim/user/AbstractUserModel.md b/docs/api_docs/python/recsim/user/AbstractUserModel.md index feb698f..ecca708 100644 --- a/docs/api_docs/python/recsim/user/AbstractUserModel.md +++ b/docs/api_docs/python/recsim/user/AbstractUserModel.md @@ -15,44 +15,57 @@ # recsim.user.AbstractUserModel - +@staticmethod
+@abc.abstractmethod
+response_space() +
__init__
+recsim.user.AbstractUserModel(
+ response_model_ctor, user_sampler, slate_size
)
-```
-
-Initializes a new user model.
+
-#### Args:
+
-* `response_model_ctor`: A class/constructor representing the type of
- responses this model will generate.
-* `user_sampler`: An instance of AbstractUserSampler that can generate
- initial user states from an inital state distribution.
-* `slate_size`: integer number of documents that can be served to the
- user at any interaction.
+
+
+ Args | |
---|---|
+`response_model_ctor` + | ++A class/constructor representing the type of +responses this model will generate. + | +
+`user_sampler` + | ++An instance of AbstractUserSampler that can generate +initial user states from an inital state distribution. + | +
+`slate_size` + | ++integer number of documents that can be served to the user at +any interaction. + | +
+create_observation()
+
Emits obesrvation about user's state.
@@ -72,9 +85,9 @@ Emits obesrvation about user's state.
View
source
-```python
-get_response_model_ctor()
-```
+
+get_response_model_ctor()
+
Returns a constructor for the type of response this model will create.
@@ -83,9 +96,10 @@ Returns a constructor for the type of response this model will create.
View
source
-```python
-is_terminal()
-```
++Returns a boolean indicating whether this session is over. @@ -94,9 +108,9 @@ Returns a boolean indicating whether this session is over. View source -```python -observation_space() -``` +@abc.abstractmethod
+is_terminal() +
+observation_space()
+
A Gym.spaces object that describes possible user observations.
@@ -105,9 +119,9 @@ A Gym.spaces object that describes possible user observations.
View
source
-```python
-reset()
-```
+
+reset()
+
Resets the user.
@@ -116,9 +130,9 @@ Resets the user.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
Resets the sampler.
@@ -127,49 +141,91 @@ Resets the sampler.
View
source
-```python
-response_space()
-```
+
+response_space()
+
simulate_response
+Simulates the user's response to a slate of documents. This could involve simulating models of attention, as well as random sampling for selection from scored documents. -#### Args: + + +@abc.abstractmethod
+simulate_response( + documents +) +
Args | |
---|---|
+`documents` + | ++a list of AbstractDocuments + | +
Returns | |
---|---|
(response) a list of AbstractResponse objects for each slate item + | +
update_state
+Updates the user's state based on the slate and document selected. -#### Args: + + +@abc.abstractmethod
+update_state( + slate_documents, responses ) -``` +
Args | |
---|---|
+`slate_documents` + | ++A list of AbstractDocuments for items in the slate. + | +
+`responses` + | ++A list of AbstractResponses for each item in the slate. + | +
__init__
+recsim.user.AbstractUserSampler(
+ user_ctor, seed=0
)
-```
-
-Creates a new user state sampler.
-
-User states of the type user_ctor are sampled.
+
-#### Args:
-
-* `user_ctor`: A class/constructor for the type of user states that
- will be sampled.
-* `seed`: An integer for a random seed.
+
+
+
+ Args | |
---|---|
+`user_ctor` + | ++A class/constructor for the type of user states that will be +sampled. + | +
+`seed` + | ++An integer for a random seed. + | +
+get_user_ctor()
+
Returns the constructor/class of the user states that will be sampled.
@@ -64,17 +69,18 @@ Returns the constructor/class of the user states that will be sampled.
View
source
-```python
-reset_sampler()
-```
+
+reset_sampler()
+
sample_user
+Creates a new instantiation of this user's hidden state parameters. diff --git a/docs/api_docs/python/recsim/user/AbstractUserState.md b/docs/api_docs/python/recsim/user/AbstractUserState.md index 0948bbc..cbab8a7 100644 --- a/docs/api_docs/python/recsim/user/AbstractUserState.md +++ b/docs/api_docs/python/recsim/user/AbstractUserState.md @@ -3,21 +3,20 @@ +@abc.abstractmethod
+sample_user() +
+Generates obs of underlying state to simulate partial observability. -#### Returns: + + +@abc.abstractmethod
+create_observation() +
Returns | |
---|---|
+`obs` + | ++A float array of the observed user features. + | +
observation_space
+Gym.spaces object that defines how user states are represented. +## Class Variables - - +* `NUM_FEATURES = None` diff --git a/docs/api_docs/python/recsim/utils.md b/docs/api_docs/python/recsim/utils.md index 7a46883..390a557 100644 --- a/docs/api_docs/python/recsim/utils.md +++ b/docs/api_docs/python/recsim/utils.md @@ -5,7 +5,10 @@ # Module: recsim.utils + +@staticmethod
+@abc.abstractmethod
+observation_space() +
+recsim.utils.aggregate_video_cluster_metrics(
+ responses, metrics, info=None
)
-```
+
-#### Args:
+
+
+ Args | |
---|---|
+`responses` + | ++a dictionary of names, observed responses. + | +
+`metrics` + | ++A dictionary mapping from metric_name to its value in float. + | +
+`info` + | ++Additional info for computing metrics (ignored here) + | +
Returns | |
---|---|
A dictionary storing metrics after aggregation. + | +
+recsim.utils.write_video_cluster_metrics(
+ metrics, add_summary_fn
)
-```
+
diff --git a/recsim/agents/slate_decomp_q_agent.py b/recsim/agents/slate_decomp_q_agent.py
index f289f79..0a6d9cc 100644
--- a/recsim/agents/slate_decomp_q_agent.py
+++ b/recsim/agents/slate_decomp_q_agent.py
@@ -1,5 +1,4 @@
# coding=utf-8
-# coding=utf-8
# Copyright 2019 The RecSim Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -172,7 +171,7 @@ def set_element(v, i, x):
numerator = numerator + tf.gather(s * q, k)
denominator = denominator + tf.gather(s, k)
- output_slate = tf.compat.v1.where(tf.equal(mask, 0))
+ output_slate = tf.where(tf.equal(mask, 0))
return output_slate
@@ -352,13 +351,13 @@ def compute_target_topk_q(reward, gamma, next_actions, next_q_values,
# Get the expected Q-value of the slate containing top-K items.
# [batch_size, slate_size]
- next_q_values_selected = tf.compat.v1.batch_gather(
+ next_q_values_selected = tf.batch_gather(
next_q_values, tf.cast(topk_optimal_slate, dtype=tf.int32))
# Get normalized affinity scores on the slate.
# [batch_size, slate_size]
- scores_selected = tf.compat.v1.batch_gather(
- scores, tf.cast(topk_optimal_slate, dtype=tf.int32))
+ scores_selected = tf.batch_gather(scores,
+ tf.cast(topk_optimal_slate, dtype=tf.int32))
next_q_target_topk = tf.reduce_sum(
input_tensor=next_q_values_selected * scores_selected, axis=1) / (
@@ -475,9 +474,9 @@ def __init__(self,
abstract_agent.AbstractEpisodicRecommenderAgent.__init__(self, action_space)
# The doc score is a [num_candidates] vector.
- self._doc_affinity_scores_ph = tf.compat.v1.placeholder(
+ self._doc_affinity_scores_ph = tf.placeholder(
tf.float32, (self._num_candidates,), name='doc_affinity_scores_ph')
- self._prob_no_click_ph = tf.compat.v1.placeholder(
+ self._prob_no_click_ph = tf.placeholder(
tf.float32, (), name='prob_no_click_ph')
self._select_slate_fn = select_slate_fn
@@ -496,7 +495,7 @@ def __init__(self,
def _network_adapter(self, states, scope):
self._validate_states(states)
- with tf.compat.v1.name_scope('network'):
+ with tf.name_scope('network'):
# Since we decompose the slate optimization into an item-level
# optimization problem, the observation space is the user state
# observation plus all documents' observations. In the Dopamine DQN agent
@@ -513,7 +512,7 @@ def _network_adapter(self, states, scope):
return dqn_agent.DQNNetworkType(q_values)
def _build_networks(self):
- with tf.compat.v1.name_scope('networks'):
+ with tf.name_scope('networks'):
self._replay_net_outputs = self._network_adapter(self._replay.states,
'Online')
self._replay_next_target_net_outputs = self._network_adapter(
@@ -533,7 +532,7 @@ def _build_train_op(self):
# slate_q_values: [B, S]
# replay_click_q: [B]
click_indicator = self._replay.rewards[:, :, self._click_response_index]
- slate_q_values = tf.compat.v1.batch_gather(
+ slate_q_values = tf.batch_gather(
self._replay_net_outputs.q_values,
tf.cast(self._replay.actions, dtype=tf.int32))
# Only get the Q from the clicked document.
@@ -545,8 +544,7 @@ def _build_train_op(self):
target = tf.stop_gradient(self._build_target_q_op())
clicked = tf.reduce_sum(input_tensor=click_indicator, axis=1)
- clicked_indices = tf.squeeze(
- tf.compat.v1.where(tf.equal(clicked, 1)), axis=1)
+ clicked_indices = tf.squeeze(tf.where(tf.equal(clicked, 1)), axis=1)
# clicked_indices is a vector and tf.gather selects the batch dimension.
q_clicked = tf.gather(replay_click_q, clicked_indices)
target_clicked = tf.gather(target, clicked_indices)
@@ -554,8 +552,8 @@ def _build_train_op(self):
def get_train_op():
loss = tf.reduce_mean(input_tensor=tf.square(q_clicked - target_clicked))
if self.summary_writer is not None:
- with tf.compat.v1.variable_scope('Losses'):
- tf.compat.v1.summary.scalar('Loss', loss)
+ with tf.variable_scope('Losses'):
+ tf.summary.scalar('Loss', loss)
return loss
@@ -613,25 +611,24 @@ def _build_select_slate_op(self):
p_no_click = self._prob_no_click_ph
p = self._doc_affinity_scores_ph
q = self._net_outputs.q_values[0]
- with tf.compat.v1.name_scope('select_slate'):
+ with tf.name_scope('select_slate'):
self._output_slate = self._select_slate_fn(self._slate_size, p_no_click,
p, q)
- self._output_slate = tf.compat.v1.Print(
+ self._output_slate = tf.Print(
self._output_slate, [tf.constant('cp 1'), self._output_slate, p, q],
summarize=10000)
self._output_slate = tf.reshape(self._output_slate, (self._slate_size,))
- self._action_counts = tf.compat.v1.get_variable(
+ self._action_counts = tf.get_variable(
'action_counts',
shape=[self._num_candidates],
- initializer=tf.compat.v1.zeros_initializer())
+ initializer=tf.zeros_initializer())
output_slate = tf.reshape(self._output_slate, [-1])
output_one_hot = tf.one_hot(output_slate, self._num_candidates)
update_ops = []
for i in range(self._slate_size):
- update_ops.append(
- tf.compat.v1.assign_add(self._action_counts, output_one_hot[i]))
+ update_ops.append(tf.assign_add(self._action_counts, output_one_hot[i]))
self._select_action_update_op = tf.group(*update_ops)
def _select_action(self):
@@ -660,7 +657,7 @@ def _select_action(self):
observation = self._raw_observation
user_obs = observation['user']
doc_obs = np.array(list(observation['doc'].values()))
- tf.compat.v1.logging.debug('cp 1: %s, %s', doc_obs, observation)
+ tf.logging.debug('cp 1: %s, %s', doc_obs, observation)
# TODO(cwhsu): Use score_documents_tf() and remove score_documents().
scores, score_no_click = score_documents(user_obs, doc_obs)
output_slate, _ = self._sess.run(
@@ -697,8 +694,8 @@ def _build_replay_buffer(self, use_staging):
def _add_summary(self, tag, value):
if self.summary_writer:
- summary = tf.compat.v1.Summary(
- value=[tf.compat.v1.Summary.Value(tag=tag, simple_value=value)])
+ summary = tf.Summary(
+ value=[tf.Summary.Value(tag=tag, simple_value=value)])
self.summary_writer.add_summary(summary, self.training_steps)
def begin_episode(self, observation):
diff --git a/recsim/environments/interest_exploration.py b/recsim/environments/interest_exploration.py
index 9d6885a..c8aeca1 100644
--- a/recsim/environments/interest_exploration.py
+++ b/recsim/environments/interest_exploration.py
@@ -71,7 +71,6 @@ class IEUserModel(user.AbstractUserModel):
Args:
slate_size: An integer representing the size of the slate.
no_click_mass: A float indicating the mass given to a no-click option.
- Must be positive, otherwise CTR is always 1.
choice_model_ctor: A contructor function to create user choice model.
user_state_ctor: A constructor to create user state.
response_model_ctor: A constructor function to create response. The
@@ -87,8 +86,6 @@ def __init__(self,
user_state_ctor=None,
response_model_ctor=None,
seed=0):
- if no_click_mass < 0:
- raise ValueError('no_click_mass must be positive.')
super(IEUserModel, self).__init__(response_model_ctor, IEClusterUserSampler(
user_ctor=user_state_ctor, seed=seed), slate_size)
diff --git a/setup.py b/setup.py
index 2035764..10b974f 100644
--- a/setup.py
+++ b/setup.py
@@ -44,7 +44,7 @@
setup(
name='recsim',
- version='0.2.3',
+ version='0.2.4',
author='The RecSim Team',
author_email='no-reply@google.com',
description=recsim_description,