# Soft Actor-Critic (SAC)

# Model training with quadratic, exponential, and other reward functions on Env-v1

In [1]:
# trained in reward_training.py

In [2]:
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
from scipy.integrate import solve_ivp

import gym_gyroscope_env
import spinup
import torch
from functools import partial

from custom_functions.custom_functions import env_fn 
from custom_functions.custom_functions import create_env
from custom_functions.custom_functions import load_agent
from custom_functions.custom_functions import test_agent
from custom_functions.custom_functions import plot_test
from custom_functions.custom_functions import evaluate_control

In /home/xiongyan/anaconda3/envs/spinningup/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The text.latex.preview rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /home/xiongyan/anaconda3/envs/spinningup/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The mathtext.fallback_to_cm rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /home/xiongyan/anaconda3/envs/spinningup/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: Support for setting the 'mathtext.fallback_to_cm' rcParam is deprecated since 3.3 and will be removed two minor releases later; use 'mathtext.fallback : 'cm' instead.
In /home/xiongyan/anaconda3/envs/spinningup/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The validate_bool_maybe_none function was deprecated in Matplotlib 3.3 and will be removed two minor rele

#### PE reward

#### Quadratic reward

In [3]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'Quadratic'
reward_args = {
    'qx1': 1,
    'qx2': 0.2,
    'qx3': 1,
    'qx4': 0.2,
    'pu1': 0.2,
    'pu2': 0.2
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.sac_pytorch(env_fn_,
                   ac_kwargs= dict(hidden_sizes=[128,32], activation=torch.nn.ReLU),
                   seed=0,
                   steps_per_epoch=1500,
                   epochs=500,
                   replay_size=1000000,
                   gamma=0.995,
                   polyak=0.995,
                   lr=0.00125,
                   alpha=0.1,   # Entropy regularization coefficient. (Equivalent to inverse of reward scale in the original SAC paper.
                   batch_size=100,
                   start_steps=10000,
                   update_after=1000,
                   update_every=50,
                   num_test_episodes=10,
                   max_ep_len=100,
                   logger_kwargs=dict(output_dir='sac_q_opt_001', exp_name='sac_q_opt_001')
                   )

[32;1mLogging data to sac_q_opt_001/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            128,
            32
        ]
    },
    "actor_critic":	"MLPActorCritic",
    "alpha":	0.1,
    "batch_size":	100,
    "env_fn":	"functools.partial(<function env_fn at 0x7f143751d8c8>, 'GyroscopeEnv-v1', simu_args={'dt': 0.05, 'ep_len': 100, 'seed': 2}, reward_func='Quadratic', reward_args={'qx1': 1, 'qx2': 0.2, 'qx3': 1, 'qx4': 0.2, 'pu1': 0.2, 'pu2': 0.2})",
    "epochs":	500,
    "exp_name":	"sac_q_opt_001",
    "gamma":	0.995,
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x7f1437530940>":	{
            "epoch_dict":	{},
            "exp_name":	"sac_q_opt_001",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"sac_q_opt_001",
            "output_file":	{
                "<_io.TextIOWrapper name='sac_q_opt_001/progress.txt