# DDPG hyperparameter search with p=0.05 reward

In [1]:
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
from scipy.integrate import solve_ivp

import gym_gyroscope_env
import spinup
import torch
from functools import partial

from custom_functions.custom_functions import env_fn 
from custom_functions.custom_functions import create_env
from custom_functions.custom_functions import load_agent
from custom_functions.custom_functions import test_agent
from custom_functions.custom_functions import plot_test
from custom_functions.custom_functions import evaluate_control

## Baseline

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_base', exp_name='iter0_base'))

## Reward tuning

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_reward01', exp_name='iter0_reward01'))

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0.1, 
    'pu2': 0.1,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_reward02', exp_name='iter0_reward02'))

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0.1, 
    'pu2': 0.1,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_reward03', exp_name='iter0_reward03'))

## Replay size

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 500000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_rs01', exp_name='iter0_rs01'))

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 2000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_rs02', exp_name='iter0_rs02'))

## Discount factor gamma

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.925, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_gamma01', exp_name='iter0_gamma01'))

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.975, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_gamma02', exp_name='iter0_gamma02'))

## Polyak

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.9, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_polyak01', exp_name='iter0_polyak01'))

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.95, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_polyak02', exp_name='iter0_polyak02'))

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.999, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_polyak03', exp_name='iter0_polyak03'))

## Learning rate 

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0012,
                    q_lr = 0.0012,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_lr01', exp_name='iter0_lr01'))

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.005,
                    q_lr = 0.005,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_lr02', exp_name='iter0_lr02'))

## Batch size

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 50, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_bs01', exp_name='iter0_bs01'))

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 200, 
                    start_steps = 10000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_bs02', exp_name='iter0_bs02'))

## Start size

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 5000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_ss01', exp_name='iter0_ss01'))

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 20000,
                    act_noise = 0.1,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_ss02', exp_name='iter0_ss02'))

## Act noise

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.05,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_noise01', exp_name='iter0_noise01'))

In [None]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.02,
    'ep_len': 250,
    'seed': 2
}
reward_func = 'Power'
reward_args = {
    'qx1': 1, 
    'qx2': 0, 
    'qx3': 1, 
    'qx4': 0, 
    'pu1': 0, 
    'pu2': 0,
    'p': 0.05
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 3750, 
                    epochs = 1000, 
                    replay_size = 1000000, 
                    gamma = 0.95, 
                    polyak = 0.995, 
                    pi_lr = 0.0025,
                    q_lr = 0.0025,
                    batch_size = 100, 
                    start_steps = 10000,
                    act_noise = 0.2,
                    max_ep_len = 250, 
                    logger_kwargs = dict(output_dir='iter0_noise02', exp_name='iter0_noise02'))