In [1]:
# --- Load Agents --- #
from agents.agent_reinforce.agent import REINFORCEAgent
from agents.agent_deepqn.agent import DeepQAgent
from agents.agent_ddpg.agent import DDPG_Agent

# --- Load Environments --- #
from environment.grid import GridEnv

# --- Load Necessary --- #
from collections import deque
import numpy as np
from utilities.helper import flatten


# --- Load Training --- #
from experiments.experiment import Experiment

%load_ext autoreload
%autoreload 2


  return torch._C._cuda_getDeviceCount() > 0


# Experiments with config files

In [6]:
def choose_random(num_agent, grid_size, randomized, distribution):
    if not randomized: return [[] for _ in range(num_agent)]
    else: 
        new_distribution = distribution.copy()
        np.random.shuffle(new_distribution[:grid_size])
        return new_distribution[:num_agent]

In [7]:
from experiments.experiment_list import exp_config_ddpg, exp_config_ppo, exp_config_dqn, possible_starts, possible_goals
from agents.agent_ddpg.config import DDPG_AgentConfig


In [8]:
ddpg_experiments = []
for i, config in enumerate(exp_config_ddpg):
    """
    order of config: num_agent, grid_size, agents_start, goals_start, prob_right_direction, actor_critic 
    init environment: num_agent = 2, grid_size = 8, prob_right_direction = 1, agents_start = [], goals_start=[], render_board = False
    init of ddpg: state_size, action_size, seed=1, config=DDPG_AgentConfig()
    """
    agents_start = choose_random(config[0], config[1], config[2], possible_starts)
    goals_start = choose_random(config[0], config[1], config[3], possible_goals)

    
    env = GridEnv(num_agent = config[0], grid_size = config[1], agents_start = agents_start, goals_start=goals_start, prob_right_direction = config[4], render_board = False)

    agent_config = DDPG_AgentConfig(ACTOR_H=config[5], CRITIC_H=config[5])
    agents = [DDPG_Agent(env.state_space, env.action_space.n, seed=1, config=agent_config) for _ in range(env.num_agent)]

    ddpg_experiments.append(Experiment(name="DDPG Exp {}".format(i), environment= env, agents=agents, max_t=1, num_episodes=1, goal = 0.))

print(len(ddpg_experiments))

(2, 3, True, False, 1, [16])
[(1, 1), (1, 3)]
[[], []]
[(1, 1), (1, 3)]
[[0.3333333333333333, 0.3333333333333333], [0.3333333333333333, 1.0]]
1


In [None]:
for experiment in ddpg_experiments:
    score_history, state_history = experiment.run()
    #experiment.save(score_history, state_history, display = False)