# Experiment
This notebook contains code to execute the experiments described in the paper. 

To reproduce some experiments you might need to change parameters such as batch size (for streaming).

The largest experiments only take about 90 minutes to execute on CPU.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment.stimulus_response_env import StimulusResponseEnv
from environment.mouse_garden_env import MouseGardenEnv
from disentangled.disentangled_agent import DisentangledAgentConfig, DisentangledAgent
from disentangled.entangled_agent import EntangledAgentConfig, EntangledAgent
from agent.q_learning_agent import QLearningAgent

import logging
logging.basicConfig(level=logging.INFO)


In [None]:
MODEL_TYPE_ENTANGLED = "Entangled"
MODEL_TYPE_DISENTANGLED = "Disentangled"
#model_type = MODEL_TYPE_ENTANGLED
model_type = MODEL_TYPE_DISENTANGLED

EXPERIMENT_TYPE_FEW_SHOT = "Few-shot"
EXPERIMENT_TYPE_ZERO_SHOT = "Zero-shot"
#experiment_type = EXPERIMENT_TYPE_FEW_SHOT
experiment_type = EXPERIMENT_TYPE_ZERO_SHOT

if model_type == MODEL_TYPE_DISENTANGLED:
    TRAINING_STEPS = 6000  # disentangled
elif model_type == MODEL_TYPE_ENTANGLED:
    TRAINING_STEPS = 12000  # entangled

EVALUATE_STEPS = 1000

LOG_PERIOD = 100
BATCH_SIZE = 16
HISTORY_SIZE = 6
TOKEN_EMBEDDING_SIZE = 100
DISCOUNT = 0.9
EPSILON_GREEDY = 0.1

few_shot_training_steps = [10, 10, 20, 40, 80, 160, 320, 640, 1280]  # Cumulative: 10, 20, 40, 80, 160, ...

if model_type == MODEL_TYPE_DISENTANGLED:
    agent_config = DisentangledAgentConfig(

        # BaseAgent
        log_prefix = "stimulus-response",
        log_period = LOG_PERIOD,
        log_combined = True,
        environment_id = MouseGardenEnv.ENV_NAME,
        random_policy = False,  # Use to measure random policy empirically
        print_parameters = False,

        steps_training = TRAINING_STEPS,
        steps_evaluate = EVALUATE_STEPS,

        batch_size = BATCH_SIZE,
        optimizer_learning_rate = 0.0, 

        # Episodic agent
        token_embedding_size = TOKEN_EMBEDDING_SIZE,
        history_size = HISTORY_SIZE,

        # QLearningAgent
        encoding_method = QLearningAgent.ENCODING_METHOD_RECURRENT,
        encoded_observation_size = 200,
        encoded_action_size = 200,
        discount = DISCOUNT,
        epsilon_greedy = EPSILON_GREEDY,

        # Disentangled agent
        memory_size = 10000,
        #sparsity = 64,
        sparsity = 32,
        #sparsity = 16,
        learning_rate_values = 0.1,
    )

    agent = DisentangledAgent(agent_config)
elif model_type == MODEL_TYPE_ENTANGLED:
    agent_config = EntangledAgentConfig(

        # BaseAgent
        log_prefix = "stimulus-response",
        log_period = LOG_PERIOD,
        log_combined = True,
        environment_id = MouseGardenEnv.ENV_NAME,
        random_policy = False,  # Use to measure random policy empirically
        print_parameters = False,

        steps_training = TRAINING_STEPS,
        steps_evaluate = EVALUATE_STEPS,

        batch_size = BATCH_SIZE,

        # Univariate param. sweep
        #optimizer_learning_rate = 0.01,  # Fails to learn
        #optimizer_learning_rate = 0.001,  # Learns but can't adapt during few-shot
        optimizer_learning_rate = 0.0001,  # Best but slow

        # Episodic agent
        token_embedding_size = TOKEN_EMBEDDING_SIZE,
        history_size = HISTORY_SIZE,

        # QLearningAgent
        # Univariate param. sweep
        #encoding_method = QLearningAgent.ENCODING_METHOD_RECURRENT,
        encoding_method = QLearningAgent.ENCODING_METHOD_FLATTEN,
        encoded_observation_size = 200,
        encoded_action_size = 200,
        discount = DISCOUNT,
        epsilon_greedy = EPSILON_GREEDY,

        # Disentangled agent
        model_layers = 2,
        model_bias = True,
        model_nonlinearity = "leaky-relu",
        model_input_layer_norm = True,
        model_input_dropout = 0.25,
        model_input_weight_clip = 0.0,

        # Univariate param. sweep
        #model_hidden_size = 4000, worse
        model_hidden_size = 2000,
        model_hidden_dropout = 0.25,
    )

    agent = EntangledAgent(agent_config)
else:
    raise ValueError("Model type not recognized.")

In [None]:

# Few-shot experiment: 
# We train on all classes, except poisonous red foods.
# In the few-shot phase, we stop training on the original data,
# and only train on red foods (if we train on only poisonous red food,
# the model will rightly learn that all red foods are poisonous).
# We then measure performance on *all* classes to make sure it hasn't reduced.
if experiment_type == EXPERIMENT_TYPE_FEW_SHOT:
    print("Few shot experiment.")
    scenarios_exclude = ["deadly nightshade", "fly agaric mushroom", ]  # BAD Red food
    scenarios_include = ["deadly nightshade", "fly agaric mushroom", "tomato", "radish"]  # ALL Red food

# Zero-shot experiment
# Remove all the dangerous birds from the training set except one.
# We can zero-shot the test set by leveraging similar experiences with predatory birds.
if experiment_type == EXPERIMENT_TYPE_ZERO_SHOT:
    print("Zero shot experiment.")
    scenarios_exclude = ["eagle", "falcon"]  # Excluded from training set
    scenarios_include = ["eagle", "falcon"]  # Included in test set

def filter_scenarios(exclude_scenarios):
    if exclude_scenarios:
        StimulusResponseEnv.set_scenario_filter_for_envs(
            envs = agent.envs,
            scenario_include = None,
            scenario_exclude = scenarios_exclude,
        )
    else:  # include these scenarios, and exclude everything else
        StimulusResponseEnv.set_scenario_filter_for_envs(
            envs = agent.envs,
            scenario_include = scenarios_include,  # Now include ONLY these objects
            scenario_exclude = None,
        )
    agent.reset()

def evaluate():

    # Eval on training data
    print("Evaluating on training data...")
    filter_scenarios(exclude_scenarios=True)
    agent.config.epsilon_greedy = 0  # Select optimal action
    agent.do_evaluate()

    # Eval on new data
    print("Evaluating on new data...")
    filter_scenarios(exclude_scenarios=False)
    agent.config.epsilon_greedy = 0  # Select optimal action
    agent.do_evaluate()

def training(exclude_scenarios:bool, steps:int):

    # Train on new data
    print("Training on new data...")
    filter_scenarios(exclude_scenarios)
    agent.config.epsilon_greedy = EPSILON_GREEDY
    agent.config.steps_training = steps
    agent.do_training()

def few_shot_training():
    cumulative = 0
    for steps in few_shot_training_steps:
        cumulative += steps
        print(f"Few-shot training: +{steps} = {cumulative}")
        training(exclude_scenarios = False, steps = steps)
        evaluate()



In [None]:
training(exclude_scenarios = True, steps = TRAINING_STEPS)

In [None]:
evaluate()


In [None]:
if experiment_type == EXPERIMENT_TYPE_FEW_SHOT:
    few_shot_training()