- Values, including both state and action-values;
- Values for Non-linear generalizations of the Bellman equations.
- Return Distributions, aka distributional value functions;
- General Value Functions, for cumulants other than the main reward;
- Policies, via policy-gradients in both continuous and discrete action spaces.
rlax
categorical_double_q_learning categorical_l2_project categorical_q_learning categorical_td_learning discounted_returns double_q_learning expected_sarsa general_off_policy_returns_from_action_values general_off_policy_returns_from_q_and_v lambda_returns leaky_vtrace leaky_vtrace_td_error_and_advantage n_step_bootstrapped_returns persistent_q_learning q_lambda q_learning quantile_expected_sarsa quantile_q_learning quantile_regression_loss qv_learning qv_max retrace retrace_continuous sarsa sarsa_lambda td_lambda td_learning transformed_general_off_policy_returns_from_action_values transformed_lambda_returns transformed_n_step_q_learning transformed_n_step_returns transformed_q_lambda transformed_retrace vtrace vtrace_td_error_and_advantage
categorical_double_q_learning
categorical_l2_project
categorical_q_learning
categorical_td_learning
discounted_returns
double_q_learning
expected_sarsa
general_off_policy_returns_from_action_values
general_off_policy_returns_from_q_and_v
lambda_returns
leaky_vtrace
n_step_bootstrapped_returns
leaky_vtrace_td_error_and_advantage
persistent_q_learning
q_lambda
q_learning
quantile_expected_sarsa
quantile_q_learning
qv_learning
qv_max
retrace
retrace_continuous
sarsa
sarsa_lambda
td_lambda
td_learning
transformed_general_off_policy_returns_from_action_values
transformed_lambda_returns
transformed_n_step_q_learning
transformed_n_step_returns
transformed_q_lambda
transformed_retrace
truncated_generalized_advantage_estimation
vtrace
rlax
clipped_surrogate_pg_loss constant_policy_targets dpg_loss entropy_loss mpo_loss mpo_compute_weights_and_temperature_loss policy_gradient_loss qpg_loss rm_loss rpg_loss sampled_policy_distillation_loss zero_policy_targets
clipped_surrogate_pg_loss
compute_parametric_kl_penalty_and_dual_loss
dpg_loss
entropy_loss
mpo_compute_weights_and_temperature_loss
mpo_loss
policy_gradient_loss
qpg_loss
rm_loss
rpg_loss
constant_policy_targets
zero_policy_targets
sampled_policy_distillation_loss
vmpo_compute_weights_and_temperature_loss
vmpo_loss
rlax
add_dirichlet_noise add_gaussian_noise add_ornstein_uhlenbeck_noise episodic_memory_intrinsic_rewards knn_query
add_dirichlet_noise
add_gaussian_noise
add_ornstein_uhlenbeck_noise
episodic_memory_intrinsic_rewards
knn_query
rlax
AllSum batched_index clip_gradient create_ema fix_step_type_on_interruptions lhs_broadcast one_hot embed_oar replace_masked transpose_first_axis_to_last transpose_last_axis_to_first tree_fn tree_map_zipped tree_replace_masked tree_select tree_split_key tree_split_leaves conditional_update periodic_update
AllSum
batched_index
clip_gradient
create_ema
lhs_broadcast
one_hot
embed_oar
fix_step_type_on_interruptions
transpose_first_axis_to_last
transpose_last_axis_to_first
replace_masked
tree_map_zipped
tree_replace_masked
tree_select
tree_split_key
tree_split_leaves
conditional_update
periodic_update
rlax
pixel_control_rewards feature_control_rewards
pixel_control_rewards
feature_control_rewards
rlax
extract_subsequences sample_start_indices
extract_subsequences
sample_start_indices
rlax
art normalize pop popart unnormalize unnormalize_linear
art
normalize
pop
popart
unnormalize
unnormalize_linear
rlax
compose_tx DISCOUNT_TRANSFORM_PAIR HYPERBOLIC_SIN_PAIR identity IDENTITY_PAIR logit muzero_pair power sigmoid signed_expm1 signed_hyperbolic SIGNED_HYPERBOLIC_PAIR signed_logp1 SIGNED_LOGP1_PAIR signed_parabolic transform_from_2hot transform_to_2hot twohot_pair TxPair unbiased_transform_pair
identity
logit
power
sigmoid
signed_expm1
signed_hyperbolic
signed_logp1
signed_parabolic
transform_from_2hot
transform_to_2hot
rlax
l2_loss likelihood log_loss huber_loss pixel_control_loss
l2_loss
likelihood
log_loss
huber_loss
pixel_control_loss
rlax
categorical_cross_entropy categorical_importance_sampling_ratios categorical_kl_divergence categorical_sample clipped_entropy_softmax epsilon_greedy gaussian_diagonal greedy multivariate_normal_kl_divergence softmax squashed_gaussian
categorical_cross_entropy
categorical_importance_sampling_ratios
categorical_kl_divergence
categorical_sample
clipped_entropy_softmax
epsilon_greedy
gaussian_diagonal
greedy
multivariate_normal_kl_divergence
softmax
squashed_gaussian