In [None]:
# --- Load Agents --- #
from agents.agent_reinforce.agent import REINFORCEAgent
from agents.agent_deepqn.agent import DeepQAgent
# from agents.agent_ddpg.agent import DDPG_Agent
# from agents.agent_ppo.agent import PPOAgent

# --- Load Environments --- #
from environment.grid import GridEnv

# --- Load Necessary --- #
from collections import deque
import numpy as np
from utilities.helper import flatten
from utilities.profiler import profile

# --- Load Training --- #
from experiments.experiment import Experiment

%load_ext autoreload
%autoreload 2


# Experiments

In [None]:
from agents.agent_reinforce.agent import REINFORCEAgentConfig
from agents.agent_deepqn.agent import DeepQAgentConfig


In [None]:
    num_episodes = 1
    max_t = 1

    """
    order of config: num_agent, grid_size, agents_start, goals_start, prob_right_direction, actor_critic 
    init environment: num_agent = 2, grid_size = 8, prob_right_direction = 1, agents_start = [], goals_start=[], render_board = False
    init of dqn:  state_size, action_size, config = DeepQAgentConfig(), seed = 1, samp_frames=1
    init of rei:  state_size, action_size, config = REINFORCEAgentConfig(), seed = 1 <- no hidden layer!
    """

# Experiment Batch 1

In [None]:
from experiments.experiment_list import exp1_dqn, exp1_rei

## Experiment Batch 1 - DQN

In [None]:
dqn_experiments = []
for i, config in enumerate(exp1_dqn):
    
    # --- Env --- #
    env = GridEnv(num_agent = config[0], grid_size = config[1], fixed_start = config[2], fixed_goals=config[3], prob_right_direction = config[4], render_board = False)

    # --- Agents --- #
    agent_config = DeepQAgentConfig(HIDDEN_LAYER_SIZE=config[5])
    agent = DeepQAgent(env.state_space, env.action_space.n, seed=1, config=agent_config)
    agents = [agent for _ in range(env.num_agent)]

    # --- Experiment --- #
    new_experiment = Experiment(name="Batch 1 - DQN - Exp_{}".format(str(i)), environment= env, agents=agents, max_t=max_t,                               num_episodes=num_episodes, goal = 0., save_states_every=0, experiment_num=i)
    dqn_experiments.append(new_experiment)

print(len(dqn_experiments))

In [None]:
for experiment in dqn_experiments:
    score_history, state_history = experiment.run()
    #experiment.save(score_history, state_history, display = False)

## Experiment Batch 1 - REINFORCE

In [None]:
rei_experiments = []
for i, config in enumerate(exp1_rei):
    
    # --- Env --- #
    env = GridEnv(num_agent = config[0], grid_size = config[1], fixed_start = config[2], fixed_goals=config[3], prob_right_direction = config[4], render_board = False)

    # --- Agents --- #
    agent_config = REINFORCEAgentConfig(HIDDEN_LAYER_SIZE=config[5])
    agent = REINFORCEAgent(env.state_space, env.action_space.n, seed=1, config=agent_config)
    agents = [agent for _ in range(env.num_agent)]

    # --- Experiment --- #
    new_experiment = Experiment(name="Batch 1 - REI - Exp_{}".format(str(i)), environment= env, agents=agents, max_t=max_t,                               num_episodes=num_episodes, goal = 0., save_states_every=0, experiment_num=i)
    rei_experiments.append(new_experiment)

print(len(rei_experiments))

In [None]:
for experiment in rei_experiments:
    score_history, state_history = experiment.run()
    #experiment.save(score_history, state_history, display = False)

# Experiment Batch 2

In [None]:
from experiments.experiment_list import exp2_dqn, exp2_rei

## Experiment Batch 2 - DQN

In [None]:
dqn_experiments = []
for i, config in enumerate(exp2_dqn):
    
    # --- Env --- #
    env = GridEnv(num_agent = config[0], grid_size = config[1], fixed_start = config[2], fixed_goals=config[3], prob_right_direction = config[4], render_board = False)

    # --- Agents --- #
    agent_config = DeepQAgentConfig(HIDDEN_LAYER_SIZE=config[5])
    agent = DeepQAgent(env.state_space, env.action_space.n, seed=1, config=agent_config)
    agents = [agent for _ in range(env.num_agent)]

    # --- Experiment --- #
    new_experiment = Experiment(name="Batch 2 - DQN - Exp_{}".format(str(i)), environment= env, agents=agents, max_t=max_t,                               num_episodes=num_episodes, goal = 0., save_states_every=0, experiment_num=i)
    dqn_experiments.append(new_experiment)

print(len(dqn_experiments))

In [None]:
for experiment in dqn_experiments:
    score_history, state_history = experiment.run()

## Experiment Batch 2 - REINFORCE

In [None]:
rei_experiments = []
for i, config in enumerate(exp1_rei):
    
    # --- Env --- #
    env = GridEnv(num_agent = config[0], grid_size = config[1], fixed_start = config[2], fixed_goals=config[3], prob_right_direction = config[4], render_board = False)

    # --- Agents --- #
    agent_config = REINFORCEAgentConfig(HIDDEN_LAYER_SIZE=config[5])
    agent = REINFORCEAgent(env.state_space, env.action_space.n, seed=1, config=agent_config)
    agents = [agent for _ in range(env.num_agent)]

    # --- Experiment --- #
    new_experiment = Experiment(name="Batch 2 - REI - Exp_{}".format(str(i)), environment= env, agents=agents, max_t=max_t,                               num_episodes=num_episodes, goal = 0., save_states_every=0, experiment_num=i)
    rei_experiments.append(new_experiment)

print(len(rei_experiments))

In [None]:
for experiment in rei_experiments:
    score_history, state_history = experiment.run()
    #experiment.save(score_history, state_history, display = False)

# Experiment Batch 3

In [None]:
from experiments.experiment_list import exp3_dqn, exp3_rei

## Experiment Batch 3 - DQN

In [None]:
dqn_experiments = []
for i, config in enumerate(exp3_dqn):
    
    # --- Env --- #
    env = GridEnv(num_agent = config[0], grid_size = config[1], fixed_start = config[2], fixed_goals=config[3], prob_right_direction = config[4], render_board = False)

    # --- Agents --- #
    agent_config = DeepQAgentConfig(HIDDEN_LAYER_SIZE=config[5])
    agent = DeepQAgent(env.state_space, env.action_space.n, seed=1, config=agent_config)
    agents = [agent for _ in range(env.num_agent)]

    # --- Experiment --- #
    new_experiment = Experiment(name="Batch 3 - DQN - Exp_{}".format(str(i)), environment= env, agents=agents, max_t=max_t,                               num_episodes=num_episodes, goal = 0., save_states_every=0, experiment_num=i)
    dqn_experiments.append(new_experiment)

print(len(dqn_experiments))

In [None]:
for experiment in dqn_experiments:
    score_history, state_history = experiment.run()
    #experiment.save(score_history, state_history, display = False)

## Experiment Batch 3 - REINFORCE

In [None]:
rei_experiments = []
for i, config in enumerate(exp3_rei):
    
    # --- Env --- #
    env = GridEnv(num_agent = config[0], grid_size = config[1], fixed_start = config[2], fixed_goals=config[3], prob_right_direction = config[4], render_board = False)

    # --- Agents --- #
    agent_config = REINFORCEAgentConfig(HIDDEN_LAYER_SIZE=config[5])
    agent = REINFORCEAgent(env.state_space, env.action_space.n, seed=1, config=agent_config)
    agents = [agent for _ in range(env.num_agent)]

    # --- Experiment --- #
    new_experiment = Experiment(name="Batch 3 - REI - Exp_{}".format(str(i)), environment= env, agents=agents, max_t=max_t,                               num_episodes=num_episodes, goal = 0., save_states_every=0, experiment_num=i)
    rei_experiments.append(new_experiment)

print(len(rei_experiments))

In [None]:
for experiment in rei_experiments:
    score_history, state_history = experiment.run()
    #experiment.save(score_history, state_history, display = False)

# Experiment Batch 4 - To be decided later!

In [None]:
from experiments.experiment_list import exp4_dqn, exp4_rei