# TD3

# Model training with quadratic, exponential, and other reward functions on Env-v1 & GyroscopeEnvNew4Paper-v0

In [7]:
# trained in reward_training.py

In [8]:
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
from scipy.integrate import solve_ivp

import gym_gyroscope_env
import spinup
import torch
from functools import partial

from custom_functions.custom_functions import env_fn 
from custom_functions.custom_functions import create_env
from custom_functions.custom_functions import load_agent
from custom_functions.custom_functions import test_agent
from custom_functions.custom_functions import plot_test
from custom_functions.custom_functions import evaluate_control

#### PE reward

In [9]:
# Env function
env_name = 'GyroscopeEnvNew4Paper-v0'   # GyroscopeEnvNew4Paper-v0; GyroscopeEnv-v1
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'PE'
reward_args = {
    'qx1': 1,
    'qx2': 0.2,
    'qx3': 1,
    'qx4': 0.2,
    'pu1': 0.1,
    'pu2': 0.1,
    'p': 0.1,
    'e': 40
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.td3_pytorch(env_fn_,
                   ac_kwargs= dict(hidden_sizes=[128,32], activation=torch.nn.ReLU),
                   seed=0,
                   steps_per_epoch=1500,
                   epochs=100000,
                   replay_size=1000000,
                   gamma=0.95,
                   polyak=0.995,
                   pi_lr=1e-3,
                   q_lr=1e-3,
                   batch_size=100,
                   start_steps=10000,
                   update_after=1000,
                   update_every=50,
                   act_noise=0.1,
                   target_noise=0.2,
                   noise_clip=0.5,
                   policy_delay=2,
                   num_test_episodes=10,
                   max_ep_len=100,   # 不能太大，大于100不收敛
                   logger_kwargs=dict(output_dir='td3_pe_opt_ing_100000', exp_name='td3_pe_opt_ing_100000')
                   )

[32;1mLogging data to td3_pe_opt_ing_100000/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            128,
            32
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"functools.partial(<function env_fn at 0x7f810e46a8c8>, 'GyroscopeEnvNew4Paper-v0', simu_args={'dt': 0.05, 'ep_len': 100, 'seed': 2}, reward_func='PE', reward_args={'qx1': 1, 'qx2': 0.2, 'qx3': 1, 'qx4': 0.2, 'pu1': 0.1, 'pu2': 0.1, 'p': 0.1, 'e': 40})",
    "epochs":	100000,
    "exp_name":	"td3_pe_opt_ing_100000",
    "gamma":	0.95,
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x7f80f89a2e48>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_pe_opt_ing_100000",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"td3_pe_opt_ing_100000",
            "output_file":	{
         

KeyboardInterrupt: 