# Soft Actor-Critic (SAC)

# Model training with quadratic, exponential, and other reward functions on Env-v1

In [1]:
# trained in reward_training.py

In [2]:
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
from scipy.integrate import solve_ivp

import gym_gyroscope_env
import spinup
import torch
from functools import partial

from custom_functions.custom_functions import env_fn 
from custom_functions.custom_functions import create_env
from custom_functions.custom_functions import load_agent
from custom_functions.custom_functions import test_agent
from custom_functions.custom_functions import plot_test
from custom_functions.custom_functions import evaluate_control

In /home/xiongyan/anaconda3/envs/spinningup/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The text.latex.preview rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /home/xiongyan/anaconda3/envs/spinningup/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The mathtext.fallback_to_cm rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /home/xiongyan/anaconda3/envs/spinningup/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: Support for setting the 'mathtext.fallback_to_cm' rcParam is deprecated since 3.3 and will be removed two minor releases later; use 'mathtext.fallback : 'cm' instead.
In /home/xiongyan/anaconda3/envs/spinningup/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The validate_bool_maybe_none function was deprecated in Matplotlib 3.3 and will be removed two minor rele

#### PE reward

In [3]:
# Env function
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'PE'
reward_args = {
    'qx1': 1,
    'qx2': 0.2,
    'qx3': 1,
    'qx4': 0.2,
    'pu1': 0.1,
    'pu2': 0.1,
    'p': 0.1,
    'e': 40
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

# Baseline 0 training
spinup.td3_pytorch(env_fn_,
                   ac_kwargs= dict(hidden_sizes=[128,32], activation=torch.nn.ReLU),
                   seed=0,
                   steps_per_epoch=1500,
                   epochs=2000,
                   replay_size=1000000,
                   gamma=0.95,
                   polyak=0.995,
                   pi_lr=1e-3,
                   q_lr=1e-3,
                   batch_size=100,
                   start_steps=10000,
                   update_after=1000,
                   update_every=50,
                   act_noise=0.1,
                   target_noise=0.2,
                   noise_clip=0.5,
                   policy_delay=2,
                   num_test_episodes=10,
                   max_ep_len=100,   # 不能太大，大于100不收敛
                   logger_kwargs=dict(output_dir='td3_pe_opt_ing', exp_name='td3_pe_opt_ing')
                   )

[32;1mLogging data to td3_pe_opt_ing/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            128,
            32
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"functools.partial(<function env_fn at 0x7f27817ae840>, 'GyroscopeEnv-v1', simu_args={'dt': 0.05, 'ep_len': 100, 'seed': 2}, reward_func='PE', reward_args={'qx1': 1, 'qx2': 0.2, 'qx3': 1, 'qx4': 0.2, 'pu1': 0.1, 'pu2': 0.1, 'p': 0.1, 'e': 40})",
    "epochs":	200,
    "exp_name":	"td3_pe_opt_ing",
    "gamma":	0.95,
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x7f27817bd048>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_pe_opt_ing",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"td3_pe_opt_ing",
            "output_file":	{
                "<_io.TextIOWrapper name='td3_pe_

---------------------------------------
|             Epoch |               8 |
|      AverageEpRet |            -477 |
|          StdEpRet |            9.83 |
|          MaxEpRet |            -456 |
|          MinEpRet |            -489 |
|  AverageTestEpRet |            -484 |
|      StdTestEpRet |            18.2 |
|      MaxTestEpRet |            -443 |
|      MinTestEpRet |            -502 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |         1.2e+04 |
|     AverageQ1Vals |           -70.4 |
|         StdQ1Vals |            1.77 |
|         MaxQ1Vals |           -65.1 |
|         MinQ1Vals |             -77 |
|     AverageQ2Vals |           -70.4 |
|         StdQ2Vals |            1.77 |
|         MaxQ2Vals |           -65.1 |
|         MinQ2Vals |             -77 |
|            LossPi |            70.3 |
|             LossQ |           0.299 |
|              Time |            37.3 |
---------------------------------------


---------------------------------------
|             Epoch |              17 |
|      AverageEpRet |            -450 |
|          StdEpRet |            23.7 |
|          MaxEpRet |            -383 |
|          MinEpRet |            -491 |
|  AverageTestEpRet |            -460 |
|      StdTestEpRet |            27.4 |
|      MaxTestEpRet |            -396 |
|      MinTestEpRet |            -497 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        2.55e+04 |
|     AverageQ1Vals |           -92.2 |
|         StdQ1Vals |            1.73 |
|         MaxQ1Vals |           -86.1 |
|         MinQ1Vals |           -99.8 |
|     AverageQ2Vals |           -92.2 |
|         StdQ2Vals |            1.73 |
|         MaxQ2Vals |           -86.2 |
|         MinQ2Vals |            -100 |
|            LossPi |            92.1 |
|             LossQ |           0.416 |
|              Time |            93.6 |
---------------------------------------


---------------------------------------
|             Epoch |              26 |
|      AverageEpRet |            -438 |
|          StdEpRet |            25.8 |
|          MaxEpRet |            -394 |
|          MinEpRet |            -480 |
|  AverageTestEpRet |            -415 |
|      StdTestEpRet |            37.4 |
|      MaxTestEpRet |            -353 |
|      MinTestEpRet |            -479 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |         3.9e+04 |
|     AverageQ1Vals |             -94 |
|         StdQ1Vals |            2.94 |
|         MaxQ1Vals |           -85.1 |
|         MinQ1Vals |            -105 |
|     AverageQ2Vals |             -94 |
|         StdQ2Vals |            2.93 |
|         MaxQ2Vals |             -85 |
|         MinQ2Vals |            -106 |
|            LossPi |            93.9 |
|             LossQ |           0.664 |
|              Time |             143 |
---------------------------------------


---------------------------------------
|             Epoch |              35 |
|      AverageEpRet |            -419 |
|          StdEpRet |            37.9 |
|          MaxEpRet |            -330 |
|          MinEpRet |            -477 |
|  AverageTestEpRet |            -408 |
|      StdTestEpRet |            49.1 |
|      MaxTestEpRet |            -328 |
|      MinTestEpRet |            -486 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        5.25e+04 |
|     AverageQ1Vals |           -92.3 |
|         StdQ1Vals |             3.9 |
|         MaxQ1Vals |           -81.3 |
|         MinQ1Vals |            -107 |
|     AverageQ2Vals |           -92.3 |
|         StdQ2Vals |             3.9 |
|         MaxQ2Vals |           -81.3 |
|         MinQ2Vals |            -108 |
|            LossPi |            92.1 |
|             LossQ |           0.897 |
|              Time |             190 |
---------------------------------------


---------------------------------------
|             Epoch |              44 |
|      AverageEpRet |            -422 |
|          StdEpRet |            22.5 |
|          MaxEpRet |            -374 |
|          MinEpRet |            -459 |
|  AverageTestEpRet |            -377 |
|      StdTestEpRet |              47 |
|      MaxTestEpRet |            -304 |
|      MinTestEpRet |            -448 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |         6.6e+04 |
|     AverageQ1Vals |           -90.3 |
|         StdQ1Vals |            4.62 |
|         MaxQ1Vals |           -77.8 |
|         MinQ1Vals |            -108 |
|     AverageQ2Vals |           -90.3 |
|         StdQ2Vals |            4.62 |
|         MaxQ2Vals |           -77.8 |
|         MinQ2Vals |            -108 |
|            LossPi |            90.1 |
|             LossQ |            1.04 |
|              Time |             237 |
---------------------------------------


---------------------------------------
|             Epoch |              53 |
|      AverageEpRet |            -404 |
|          StdEpRet |            39.1 |
|          MaxEpRet |            -321 |
|          MinEpRet |            -460 |
|  AverageTestEpRet |            -350 |
|      StdTestEpRet |            56.7 |
|      MaxTestEpRet |            -227 |
|      MinTestEpRet |            -421 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        7.95e+04 |
|     AverageQ1Vals |           -88.3 |
|         StdQ1Vals |            5.15 |
|         MaxQ1Vals |             -76 |
|         MinQ1Vals |            -108 |
|     AverageQ2Vals |           -88.3 |
|         StdQ2Vals |            5.14 |
|         MaxQ2Vals |           -75.7 |
|         MinQ2Vals |            -108 |
|            LossPi |            88.1 |
|             LossQ |            1.15 |
|              Time |             287 |
---------------------------------------


---------------------------------------
|             Epoch |              62 |
|      AverageEpRet |            -399 |
|          StdEpRet |            29.5 |
|          MaxEpRet |            -328 |
|          MinEpRet |            -442 |
|  AverageTestEpRet |            -363 |
|      StdTestEpRet |            43.2 |
|      MaxTestEpRet |            -273 |
|      MinTestEpRet |            -429 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |         9.3e+04 |
|     AverageQ1Vals |           -86.6 |
|         StdQ1Vals |            5.47 |
|         MaxQ1Vals |           -73.7 |
|         MinQ1Vals |            -110 |
|     AverageQ2Vals |           -86.6 |
|         StdQ2Vals |            5.47 |
|         MaxQ2Vals |           -73.6 |
|         MinQ2Vals |            -110 |
|            LossPi |            86.3 |
|             LossQ |            1.11 |
|              Time |             337 |
---------------------------------------


---------------------------------------
|             Epoch |              71 |
|      AverageEpRet |            -417 |
|          StdEpRet |            33.6 |
|          MaxEpRet |            -358 |
|          MinEpRet |            -467 |
|  AverageTestEpRet |            -393 |
|      StdTestEpRet |            46.8 |
|      MaxTestEpRet |            -272 |
|      MinTestEpRet |            -465 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        1.06e+05 |
|     AverageQ1Vals |             -85 |
|         StdQ1Vals |            5.84 |
|         MaxQ1Vals |           -72.5 |
|         MinQ1Vals |            -110 |
|     AverageQ2Vals |             -85 |
|         StdQ2Vals |            5.84 |
|         MaxQ2Vals |           -72.1 |
|         MinQ2Vals |            -110 |
|            LossPi |            84.7 |
|             LossQ |            1.15 |
|              Time |             389 |
---------------------------------------


---------------------------------------
|             Epoch |              80 |
|      AverageEpRet |            -396 |
|          StdEpRet |            38.4 |
|          MaxEpRet |            -310 |
|          MinEpRet |            -448 |
|  AverageTestEpRet |            -374 |
|      StdTestEpRet |            54.6 |
|      MaxTestEpRet |            -271 |
|      MinTestEpRet |            -475 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |         1.2e+05 |
|     AverageQ1Vals |             -84 |
|         StdQ1Vals |            6.09 |
|         MaxQ1Vals |           -71.7 |
|         MinQ1Vals |            -111 |
|     AverageQ2Vals |             -84 |
|         StdQ2Vals |            6.09 |
|         MaxQ2Vals |           -71.8 |
|         MinQ2Vals |            -111 |
|            LossPi |            83.7 |
|             LossQ |            1.14 |
|              Time |             443 |
---------------------------------------


---------------------------------------
|             Epoch |              89 |
|      AverageEpRet |            -386 |
|          StdEpRet |              41 |
|          MaxEpRet |            -310 |
|          MinEpRet |            -473 |
|  AverageTestEpRet |            -381 |
|      StdTestEpRet |            58.3 |
|      MaxTestEpRet |            -300 |
|      MinTestEpRet |            -482 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        1.33e+05 |
|     AverageQ1Vals |           -83.2 |
|         StdQ1Vals |            6.27 |
|         MaxQ1Vals |             -71 |
|         MinQ1Vals |            -110 |
|     AverageQ2Vals |           -83.2 |
|         StdQ2Vals |            6.27 |
|         MaxQ2Vals |             -71 |
|         MinQ2Vals |            -111 |
|            LossPi |              83 |
|             LossQ |            1.15 |
|              Time |             493 |
---------------------------------------


---------------------------------------
|             Epoch |              98 |
|      AverageEpRet |            -394 |
|          StdEpRet |              41 |
|          MaxEpRet |            -301 |
|          MinEpRet |            -461 |
|  AverageTestEpRet |            -375 |
|      StdTestEpRet |            40.6 |
|      MaxTestEpRet |            -316 |
|      MinTestEpRet |            -446 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        1.47e+05 |
|     AverageQ1Vals |           -82.7 |
|         StdQ1Vals |            6.47 |
|         MaxQ1Vals |           -70.3 |
|         MinQ1Vals |            -112 |
|     AverageQ2Vals |           -82.7 |
|         StdQ2Vals |            6.47 |
|         MaxQ2Vals |           -69.5 |
|         MinQ2Vals |            -111 |
|            LossPi |            82.4 |
|             LossQ |            1.18 |
|              Time |             546 |
---------------------------------------


---------------------------------------
|             Epoch |             107 |
|      AverageEpRet |            -382 |
|          StdEpRet |            37.9 |
|          MaxEpRet |            -323 |
|          MinEpRet |            -453 |
|  AverageTestEpRet |            -330 |
|      StdTestEpRet |            50.8 |
|      MaxTestEpRet |            -263 |
|      MinTestEpRet |            -448 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |         1.6e+05 |
|     AverageQ1Vals |           -82.2 |
|         StdQ1Vals |            6.64 |
|         MaxQ1Vals |           -69.3 |
|         MinQ1Vals |            -112 |
|     AverageQ2Vals |           -82.2 |
|         StdQ2Vals |            6.65 |
|         MaxQ2Vals |           -68.8 |
|         MinQ2Vals |            -112 |
|            LossPi |            81.9 |
|             LossQ |            1.22 |
|              Time |             598 |
---------------------------------------


---------------------------------------
|             Epoch |             116 |
|      AverageEpRet |            -380 |
|          StdEpRet |            29.2 |
|          MaxEpRet |            -335 |
|          MinEpRet |            -443 |
|  AverageTestEpRet |            -338 |
|      StdTestEpRet |            49.8 |
|      MaxTestEpRet |            -232 |
|      MinTestEpRet |            -402 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        1.74e+05 |
|     AverageQ1Vals |           -81.8 |
|         StdQ1Vals |            6.73 |
|         MaxQ1Vals |           -69.1 |
|         MinQ1Vals |            -114 |
|     AverageQ2Vals |           -81.8 |
|         StdQ2Vals |            6.74 |
|         MaxQ2Vals |           -68.5 |
|         MinQ2Vals |            -114 |
|            LossPi |            81.5 |
|             LossQ |            1.26 |
|              Time |             649 |
---------------------------------------


---------------------------------------
|             Epoch |             125 |
|      AverageEpRet |            -381 |
|          StdEpRet |            31.2 |
|          MaxEpRet |            -332 |
|          MinEpRet |            -437 |
|  AverageTestEpRet |            -356 |
|      StdTestEpRet |            42.8 |
|      MaxTestEpRet |            -281 |
|      MinTestEpRet |            -433 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        1.87e+05 |
|     AverageQ1Vals |           -81.1 |
|         StdQ1Vals |            6.86 |
|         MaxQ1Vals |           -67.9 |
|         MinQ1Vals |            -113 |
|     AverageQ2Vals |           -81.1 |
|         StdQ2Vals |            6.87 |
|         MaxQ2Vals |           -67.2 |
|         MinQ2Vals |            -115 |
|            LossPi |            80.8 |
|             LossQ |            1.28 |
|              Time |             703 |
---------------------------------------


---------------------------------------
|             Epoch |             134 |
|      AverageEpRet |            -380 |
|          StdEpRet |            33.2 |
|          MaxEpRet |            -317 |
|          MinEpRet |            -435 |
|  AverageTestEpRet |            -355 |
|      StdTestEpRet |            64.3 |
|      MaxTestEpRet |            -217 |
|      MinTestEpRet |            -463 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        2.01e+05 |
|     AverageQ1Vals |           -80.3 |
|         StdQ1Vals |            7.05 |
|         MaxQ1Vals |           -66.8 |
|         MinQ1Vals |            -116 |
|     AverageQ2Vals |           -80.3 |
|         StdQ2Vals |            7.06 |
|         MaxQ2Vals |           -66.5 |
|         MinQ2Vals |            -116 |
|            LossPi |              80 |
|             LossQ |            1.29 |
|              Time |             755 |
---------------------------------------


---------------------------------------
|             Epoch |             143 |
|      AverageEpRet |            -366 |
|          StdEpRet |            55.5 |
|          MaxEpRet |            -290 |
|          MinEpRet |            -465 |
|  AverageTestEpRet |            -337 |
|      StdTestEpRet |            52.2 |
|      MaxTestEpRet |            -260 |
|      MinTestEpRet |            -408 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        2.14e+05 |
|     AverageQ1Vals |           -79.6 |
|         StdQ1Vals |            7.15 |
|         MaxQ1Vals |           -65.9 |
|         MinQ1Vals |            -115 |
|     AverageQ2Vals |           -79.6 |
|         StdQ2Vals |            7.16 |
|         MaxQ2Vals |           -65.7 |
|         MinQ2Vals |            -114 |
|            LossPi |            79.3 |
|             LossQ |             1.3 |
|              Time |             808 |
---------------------------------------


---------------------------------------
|             Epoch |             152 |
|      AverageEpRet |            -384 |
|          StdEpRet |            38.3 |
|          MaxEpRet |            -339 |
|          MinEpRet |            -454 |
|  AverageTestEpRet |            -336 |
|      StdTestEpRet |            47.8 |
|      MaxTestEpRet |            -251 |
|      MinTestEpRet |            -415 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        2.28e+05 |
|     AverageQ1Vals |           -79.2 |
|         StdQ1Vals |            7.29 |
|         MaxQ1Vals |           -65.1 |
|         MinQ1Vals |            -115 |
|     AverageQ2Vals |           -79.2 |
|         StdQ2Vals |            7.31 |
|         MaxQ2Vals |           -64.9 |
|         MinQ2Vals |            -117 |
|            LossPi |            78.9 |
|             LossQ |            1.31 |
|              Time |             860 |
---------------------------------------


---------------------------------------
|             Epoch |             161 |
|      AverageEpRet |            -377 |
|          StdEpRet |            46.7 |
|          MaxEpRet |            -304 |
|          MinEpRet |            -441 |
|  AverageTestEpRet |            -347 |
|      StdTestEpRet |            58.2 |
|      MaxTestEpRet |            -252 |
|      MinTestEpRet |            -455 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        2.41e+05 |
|     AverageQ1Vals |             -79 |
|         StdQ1Vals |            7.39 |
|         MaxQ1Vals |           -64.4 |
|         MinQ1Vals |            -115 |
|     AverageQ2Vals |             -79 |
|         StdQ2Vals |             7.4 |
|         MaxQ2Vals |           -64.4 |
|         MinQ2Vals |            -113 |
|            LossPi |            78.6 |
|             LossQ |            1.31 |
|              Time |             912 |
---------------------------------------


---------------------------------------
|             Epoch |             170 |
|      AverageEpRet |            -382 |
|          StdEpRet |            36.5 |
|          MaxEpRet |            -309 |
|          MinEpRet |            -444 |
|  AverageTestEpRet |            -335 |
|      StdTestEpRet |            52.8 |
|      MaxTestEpRet |            -250 |
|      MinTestEpRet |            -414 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        2.55e+05 |
|     AverageQ1Vals |           -79.1 |
|         StdQ1Vals |            7.56 |
|         MaxQ1Vals |           -64.5 |
|         MinQ1Vals |            -114 |
|     AverageQ2Vals |           -79.1 |
|         StdQ2Vals |            7.57 |
|         MaxQ2Vals |           -64.5 |
|         MinQ2Vals |            -115 |
|            LossPi |            78.8 |
|             LossQ |            1.31 |
|              Time |             963 |
---------------------------------------


---------------------------------------
|             Epoch |             179 |
|      AverageEpRet |            -359 |
|          StdEpRet |            34.7 |
|          MaxEpRet |            -279 |
|          MinEpRet |            -419 |
|  AverageTestEpRet |            -318 |
|      StdTestEpRet |            48.4 |
|      MaxTestEpRet |            -238 |
|      MinTestEpRet |            -421 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        2.68e+05 |
|     AverageQ1Vals |           -78.9 |
|         StdQ1Vals |            7.64 |
|         MaxQ1Vals |           -64.6 |
|         MinQ1Vals |            -114 |
|     AverageQ2Vals |           -78.9 |
|         StdQ2Vals |            7.65 |
|         MaxQ2Vals |           -64.2 |
|         MinQ2Vals |            -113 |
|            LossPi |            78.6 |
|             LossQ |            1.32 |
|              Time |        1.01e+03 |
---------------------------------------


---------------------------------------
|             Epoch |             188 |
|      AverageEpRet |            -374 |
|          StdEpRet |            28.2 |
|          MaxEpRet |            -339 |
|          MinEpRet |            -438 |
|  AverageTestEpRet |            -276 |
|      StdTestEpRet |              48 |
|      MaxTestEpRet |            -175 |
|      MinTestEpRet |            -325 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        2.82e+05 |
|     AverageQ1Vals |           -78.6 |
|         StdQ1Vals |            7.67 |
|         MaxQ1Vals |           -64.3 |
|         MinQ1Vals |            -111 |
|     AverageQ2Vals |           -78.6 |
|         StdQ2Vals |            7.68 |
|         MaxQ2Vals |           -63.9 |
|         MinQ2Vals |            -112 |
|            LossPi |            78.3 |
|             LossQ |            1.32 |
|              Time |        1.06e+03 |
---------------------------------------


---------------------------------------
|             Epoch |             197 |
|      AverageEpRet |            -362 |
|          StdEpRet |            36.7 |
|          MaxEpRet |            -290 |
|          MinEpRet |            -418 |
|  AverageTestEpRet |            -349 |
|      StdTestEpRet |            54.6 |
|      MaxTestEpRet |            -277 |
|      MinTestEpRet |            -434 |
|             EpLen |             100 |
|         TestEpLen |             100 |
| TotalEnvInteracts |        2.95e+05 |
|     AverageQ1Vals |           -78.3 |
|         StdQ1Vals |            7.67 |
|         MaxQ1Vals |           -63.7 |
|         MinQ1Vals |            -113 |
|     AverageQ2Vals |           -78.3 |
|         StdQ2Vals |            7.68 |
|         MaxQ2Vals |           -63.8 |
|         MinQ2Vals |            -112 |
|            LossPi |              78 |
|             LossQ |             1.3 |
|              Time |        1.12e+03 |
---------------------------------------
