# Jupyter notebook sample

In [2]:
import os

import imageio
import gymnasium as gym
import numpy as np
import torch
from agilerl.algorithms.td3 import TD3
from agilerl.components.replay_buffer import ReplayBuffer
from agilerl.hpo.mutation import Mutations
from agilerl.hpo.tournament import TournamentSelection
from agilerl.training.train_off_policy import train_off_policy
#from agilerl.utils.utils import create_population, make_vect_envs
from tqdm import trange

In [3]:
# Initial hyperparameters
INIT_HP = {
    "ALGO": "TD3",
    "POP_SIZE": 4,  # Population size
    "BATCH_SIZE": 128,  # Batch size
    "LR_ACTOR": 0.0001,  # Actor learning rate
    "LR_CRITIC": 0.001,  # Critic learning rate
    "O_U_NOISE": True,  # Ornstein-Uhlenbeck action noise
    "EXPL_NOISE": 0.1,  # Action noise scale
    "MEAN_NOISE": 0.0,  # Mean action noise
    "THETA": 0.15,  # Rate of mean reversion in OU noise
    "DT": 0.01,  # Timestep for OU noise
    "GAMMA": 0.99,  # Discount factor
    "MEMORY_SIZE": 100_000,  # Max memory buffer size
    "POLICY_FREQ": 2,  # Policy network update frequency
    "LEARN_STEP": 1,  # Learning frequency
    "TAU": 0.005,  # For soft update of target parametersy
    # Swap image channels dimension from last to first [H, W, C] -> [C, H, W]
    "CHANNELS_LAST": False,  # Use with RGB states
    "EPISODES": 1000,  # Number of episodes to train for
    "EVO_EPOCHS": 20,  # Evolution frequency, i.e. evolve after every 20 episodes
    "TARGET_SCORE": 200.0,  # Target score that will beat the environment
    "EVO_LOOP": 3,  # Number of evaluation episodes
    "MAX_STEPS": 500,  # Maximum number of steps an agent takes in an environment
    "LEARNING_DELAY": 1000,  # Steps before starting learning
    "EVO_STEPS": 10000,  # Evolution frequency
    "EVAL_STEPS": None,  # Number of evaluation steps per episode
    "EVAL_LOOP": 1,  # Number of evaluation episodes
    "TOURN_SIZE": 2,  # Tournament size
    "ELITISM": True,  # Elitism in tournament selection
}

# Mutation parameters
MUT_P = {
    # Mutation probabilities
    "NO_MUT": 0.4,  # No mutation
    "ARCH_MUT": 0.2,  # Architecture mutation
    "NEW_LAYER": 0.2,  # New layer mutation
    "PARAMS_MUT": 0.2,  # Network parameters mutation
    "ACT_MUT": 0.2,  # Activation layer mutation
    "RL_HP_MUT": 0.2,  # Learning HP mutation
    # Learning HPs to choose from
    "RL_HP_SELECTION": ["lr", "batch_size", "learn_step"],
    "MUT_SD": 0.1,  # Mutation strength
    "RAND_SEED": 42,  # Random seed
    # Define max and min limits for mutating RL hyperparams
    "MIN_LR": 0.0001,
    "MAX_LR": 0.01,
    "MIN_BATCH_SIZE": 8,
    "MAX_BATCH_SIZE": 1024,
    "MIN_LEARN_STEP": 1,
    "MAX_LEARN_STEP": 16,
}

In [6]:
from agilerl.utils.utils import make_vect_envs, create_population
import torch

ImportError: cannot import name 'make_vect_envs' from 'agilerl.utils.utils' (/opt/anaconda3/envs/lunarLanding/lib/python3.8/site-packages/agilerl/utils/utils.py)

In [4]:


num_envs=8
env = make_vect_envs("LunarLanderContinuous-v2", num_envs=num_envs)  # Create environment
try:
    state_dim = env.single_observation_space.n, # Discrete observation space
    one_hot = True  # Requires one-hot encoding
except Exception:
    state_dim = env.single_observation_space.shape  # Continuous observation space
    one_hot = False  # Does not require one-hot encoding
try:
    action_dim = env.single_action_space.n  # Discrete action space
except Exception:
    action_dim = env.single_action_space.shape[0]  # Continuous action space

INIT_HP["MAX_ACTION"] = float(env.single_action_space.high[0])
INIT_HP["MIN_ACTION"] = float(env.single_action_space.low[0])

if INIT_HP["CHANNELS_LAST"]:
    # Adjust dimensions for PyTorch API (C, H, W), for envs with RGB image states
    state_dim = (state_dim[2], state_dim[0], state_dim[1])


NameError: name 'make_vect_envs' is not defined

In [None]:
# Set-up the device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Define the network configuration of a simple mlp with two hidden layers, each with 64 nodes
net_config = {"arch": "mlp", "hidden_size": [64, 64]}

# Define a population
pop = create_population(
    algo="TD3",  # Algorithm
    state_dim=state_dim,  # State dimension
    action_dim=action_dim,  # Action dimension
    one_hot=one_hot,  # One-hot encoding
    net_config=net_config,  # Network configuration
    INIT_HP=INIT_HP,  # Initial hyperparameters
    population_size=INIT_HP["POP_SIZE"],  # Population size
    num_envs=num_envs,
    device=device,
)