In [1]:
# --- Load Agents --- #
from agents.agent_reinforce.agent import REINFORCEAgent
from agents.agent_deepqn.agent import DeepQAgent
from agents.agent_ddpg.agent import DDPG_Agent
from agents.agent_ppo.agent import PPOAgent

# --- Load Environments --- #
from environment.grid import GridEnv

# --- Load Necessary --- #
from collections import deque
import numpy as np
from utilities.helper import flatten
from utilities.profiler import profile

# --- Load Training --- #
from experiments.experiment import Experiment

%load_ext autoreload
%autoreload 2


  return torch._C._cuda_getDeviceCount() > 0


# Experiments with config files

In [None]:
def choose_random(num_agent, grid_size, randomized, distribution):
    if not randomized: return []
    else: 
        new_distribution = distribution.copy()[:grid_size]
        np.random.shuffle(new_distribution)
        return new_distribution[:num_agent]

In [None]:
from experiments.experiment_list import exp_config_ddpg, exp_config_ppo, exp_config_dqn, possible_starts, possible_goals
from agents.agent_ddpg.config import DDPG_AgentConfig
from agents.agent_deepqn.agent import DeepQAgentConfig


In [None]:
ddpg_experiments = []
for i, config in enumerate(exp_config_ddpg):
    """
    order of config: num_agent, grid_size, agents_start, goals_start, prob_right_direction, actor_critic 
    init environment: num_agent = 2, grid_size = 8, prob_right_direction = 1, agents_start = [], goals_start=[], render_board = False
    init of ddpg: state_size, action_size, seed=1, config=DDPG_AgentConfig()
    """
    # agents_start = choose_random(config[0], config[1], config[2], possible_starts)
    # goals_start = choose_random(config[0], config[1], config[3], possible_goals)
    
    # --- Env --- #
    env = GridEnv(num_agent = config[0], grid_size = config[1], fixed_start = config[2], fixed_goals=config[3], prob_right_direction = config[4], render_board = False)

    # --- Agents --- #
    agent_config = DDPG_AgentConfig(ACTOR_H=config[5], CRITIC_H=config[5])
    agents = [DDPG_Agent(env.state_space.shape[0], env.action_space.n, seed=1, config=agent_config) for _ in range(env.num_agent)]

    # --- Experiment --- #
    num_episodes = 30
    max_t = 75
    new_experiment = Experiment(name="DDPG Exp_{}".format(str(i)), environment= env, agents=agents, brain_name = brain_name, max_t=max_t,num_episodes=num_episodes, goal = 0.5, save_states_every=0, experiment_num=i)
    ddpg_experiments.append(new_experiment)

print(len(ddpg_experiments))

In [None]:
for experiment in ddpg_experiments:
    score_history, state_history = experiment.run()
    experiment.save(score_history, state_history, display = False)

# DQN experiments

In [1]:
dqn_experiments = []
for i, config in enumerate(exp_config_dqn):
    """
    order of config: num_agent, grid_size, agents_start, goals_start, prob_right_direction, actor_critic 
    init environment: num_agent = 2, grid_size = 8, prob_right_direction = 1, fixed_start = , fixed_goals= , render_board = False
    init of DQN: state_size, action_size, seed=1, config=DDPG_AgentConfig()
    """
    
    # --- Env --- #
    env = GridEnv(num_agent = config[0], grid_size = config[1], fixed_start = config[2], fixed_goals=config[3], prob_right_direction = config[4], render_board = False)

    # --- Agents --- #
    agent_config = DeepQAgentConfig()
    agents = [DeepQAgent(env.state_space.shape[0], env.action_space.n, seed=1, config=agent_config) for _ in range(env.num_agent)]

    # --- Experiment --- #
    num_episodes = 100
    max_t = 200
    new_experiment = Experiment(name="DQN Exp_{}__Agents_{}__Grid_{}__Agent fixed_{}__Goal fixed_{}__Stoch_{}".format(i, str(config[0]), str(config[1]), str(config[2]), str(config[3]), str(config[4])), environment= env, agents=agents, max_t=max_t, num_episodes=num_episodes, goal = 0., save_states_every=int(num_episodes/10))
    dqn_experiments.append(new_experiment)

print(len(dqn_experiments))

NameError: name 'exp_config_dqn' is not defined

In [2]:
for experiment in dqn_experiments:
    score_history, state_history = experiment.run()
    experiment.save(score_history, state_history, display = False)

# On one experiment

In [13]:
max_t = 2

env = GridEnv(num_agent = 1, grid_size = 5, fixed_start = True, fixed_goals = True, prob_right_direction = 1, render_board = False)

agents = [PPOAgent(env.state_space, env.action_space.n, seed=1, mode="training", max_t= max_t) for _ in range(env.num_agent)]

experiments = [Experiment(name="DDPG Exp {}".format(1), environment = env, agents=agents, max_t=max_t, num_episodes=10, goal = 0., save_states_every=10, experiment_num=1)]

In [14]:
for experiment in experiments:
    score_history, state_history = experiment.run()
    experiment.save(score_history, state_history)

this is true
rewards:  torch.Size([2, 1])
(8, 2)


new_probs:  torch.Size([2, 4])
old_probs:  torch.Size([2, 4])
ratio:  torch.Size([2, 4])
g_clamped size:  torch.Size([2, 4])
R_norm_future:  torch.Size([8, 2])
g_PPO:  torch.Size([2, 4])
  actions = torch.tensor(actions, dtype=torch.int8, device=device)
  rewards = torch.tensor(rewards, dtype=torch.int8, device=device)
  old_probs = torch.tensor(old_probs, dtype=torch.int8, device=device)


RuntimeError: The size of tensor a (8) must match the size of tensor b (4) at non-singleton dimension 0