# DDPG hyperparameter search with sparse and e = 10 reward

In [2]:
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
from scipy.integrate import solve_ivp

import gym_gyroscope_env
import spinup
import torch
from functools import partial

from custom_functions.custom_functions import env_fn 
from custom_functions.custom_functions import create_env
from custom_functions.custom_functions import load_agent
from custom_functions.custom_functions import test_agent
from custom_functions.custom_functions import plot_test
from custom_functions.custom_functions import evaluate_control

## Baseline

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.95, 
                            polyak = 0.999, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_base', exp_name='iter1_base'),
                            her = True)

## Reward tuning

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0.05, 
    'pu2': 0.05,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.95, 
                            polyak = 0.999, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_reward01', exp_name='iter1_reward01'),
                            her = True)

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.05, 
    'qx3': 1, 
    'qx4': 0.05, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.95, 
                            polyak = 0.999, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_reward02', exp_name='iter1_reward02'),
                            her = True)

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.2, 
    'qx3': 1, 
    'qx4': 0.2, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.95, 
                            polyak = 0.999, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_reward03', exp_name='iter1_reward03'),
                            her = True)

## Replay size

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 4000000, 
                            gamma = 0.95, 
                            polyak = 0.999, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_rs01', exp_name='iter1_rs01'),
                            her = True)

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 8000000, 
                            gamma = 0.95, 
                            polyak = 0.999, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_rs02', exp_name='iter1_rs02'),
                            her = True)

## Discount factor gamma

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.925, 
                            polyak = 0.999, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_gamma01', exp_name='iter1_gamma01'),
                            her = True)

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.975, 
                            polyak = 0.999, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_gamma02', exp_name='iter1_gamma02'),
                            her = True)

## Polyak

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.95, 
                            polyak = 0.998, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_polyak01', exp_name='iter1_polyak01'),
                            her = True)

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.95, 
                            polyak = 0.9995, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_polyak02', exp_name='iter1_polyak02'),
                            her = True)

## Learning rate 

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.95, 
                            polyak = 0.999, 
                            pi_lr = 0.002,
                            q_lr = 0.002,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_lr01', exp_name='iter1_lr01'),
                            her = True)

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.95, 
                            polyak = 0.999, 
                            pi_lr = 0.003,
                            q_lr = 0.003,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_lr02', exp_name='iter1_lr02'),
                            her = True)

## Batch size

## Start size

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.95, 
                            polyak = 0.999, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 15000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_ss01', exp_name='iter1_ss01'),
                            her = True)

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.95, 
                            polyak = 0.999, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 25000,
                            act_noise = 0.05,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_ss02', exp_name='iter1_ss02'),
                            her = True)

## Act noise

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.95, 
                            polyak = 0.999, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.025,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_noise01', exp_name='iter1_noise01'),
                            her = True)

In [None]:
# Parameters chosen based on the result of 1st iteration

# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Sparse with exp'
reward_args = {
    'qx1': 1, 
    'qx2': 0.1, 
    'qx3': 1, 
    'qx4': 0.1, 
    'pu1': 0, 
    'pu2': 0,
    'e': 10,
    'bound': 0.001,
    'reward': 10
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.ddpg_mher_pytorch(env_fn_, 
                            ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                            seed = 0, 
                            steps_per_epoch = 1500, 
                            epochs = 2000, 
                            replay_size = 2000000, 
                            gamma = 0.95, 
                            polyak = 0.999, 
                            pi_lr = 0.0025,
                            q_lr = 0.0025,
                            batch_size = 100, 
                            start_steps = 20000,
                            act_noise = 0.075,
                            max_ep_len = 100, 
                            logger_kwargs = dict(output_dir='iter1_noise02', exp_name='iter1_noise02'),
                            her = True)