# Janus gym environment

In [332]:
import gym
from gym import spaces
import random
import stable_baselines3
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import DQN, DDPG
from math import sqrt
import numpy as np
import pandas as pd
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import preprocessing
from sklearn.datasets import make_regression

import math
import pickle

class Janus(gym.Env):
    metadata = {'render.modes': ['human']}
    template_filename = 'data/dataset-S_public/public/dataset_S-{}.csv'

    def __init__(self):
        super(Janus, self).__init__()
        #actions: move on the grid, by continuous value in -1,1
        #0,0 no move
        #based on 94 controlable parameters
        #"We recommend you to use a symmetric and normalized Box action space (range=[-1, 1]) ", we will multiply effect by 2
#         self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(94, ))
        #we focus on the 1 most influencal action
        nbr_actions = 4
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(nbr_actions, ))
    

        # all the observation_space
        self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(228, ))

        file1 = pd.read_csv(self.template_filename.format('file1'), index_col=0)
        file2 = pd.read_csv(self.template_filename.format('file2'), index_col=0)
        file3 = pd.read_csv(self.template_filename.format('file3'), index_col=0)
        vav = pd.read_csv(self.template_filename.format('VAV'), index_col=0)
        self.ti = pd.read_csv(self.template_filename.format('TI'), index_col=0)
        self.ts = pd.read_csv(self.template_filename.format('TS'), index_col=0)
        x_df = file1.copy()
        x_df = x_df.loc[:, (x_df != 0).any(axis=0)]  ## remove static columns
        x_df = x_df.fillna(x_df.mean())  ## replace nan with mean

        self.y_df = file2.copy()
        self.y_df.dropna(how='all', axis=1,
                    inplace=True)  ## drop full tempty columns
        self.y_df = self.y_df.fillna(self.y_df.mean())

        self.vav_df = vav.copy()
        # Dropping few columns
        for dataset in [self.y_df, self.vav_df, self.ti, self.ts]:
            dataset.drop(['target_1', 'target_2', 'target_3', 'target_4'],
                  axis=1,
                  inplace=True)  #to simplify with a 2-dimension target space

        print('features shape: {}, \ntargets shape: {}'.format(
            x_df.shape, self.y_df.shape))

        x_train, x_test, y_train, y_test = train_test_split(x_df,
                                                            self.y_df,
                                                            test_size=0.1,
                                                            random_state=14)
        print('\nLength of train is {}, test is {}'.format(
            len(x_train), len(x_test)))
        ## Random forest
        filename = 'data/models/janus_RF.pkl'  # janus_LinearReg, janus_RF

        # pickle.dump(ml_model, open(filename, 'wb'))

        # # load the model from disk
        self.ml_model = pickle.load(open(filename, 'rb'))
        print(f'R squared: {self.ml_model.score(x_test, y_test.values):0.04f}')

        self.full_x = file3.copy()[x_df.columns]
        self.full_x = self.full_x.fillna(x_df.mean())
        
        self.partial_x = x_train.copy()

        inferred_y = pd.DataFrame(self.ml_model.predict(self.full_x),
                                  columns=self.y_df.columns)

        # list of [min, max, step, range] for each var
        scale = 100
        decimals = 3
        
        self.list_important_actions = np.argsort(self.ml_model.feature_importances_[:94])[::-1][:nbr_actions]

        ## get limits for Rewards
        self.output_steps = [round((self.y_df[i].max() - self.y_df[i].min())/scale, decimals) \
                        for i in self.y_df.columns]
        print('Output steps: ', self.output_steps)

    def reset(self):
#         self.current_position = self.revert_to_obs_space(
#             self.full_x.sample().values.reshape(-1), self.full_x)
        random.seed(13)
        idx = random.randint(0,len(janus_env.partial_x)-1)
        idx = 47
        self.current_position = self.revert_to_obs_space(
            self.full_x.iloc[idx].values.reshape(-1), self.full_x)
        
        self.last_action = np.array([])
        self.last_effect = False
        self.global_reward = 0
        self.episode_length = 0
        #print(f'reset at position {self.current_position[:10]}...')
        return self.current_position

    def step(self, action):
#         self.current_position[0:len(action)] = action
        for index, act in enumerate(self.list_important_actions):
            self.current_position[act]=action[index]
        self.last_action = action
        self.episode_length += 1
        
        reward = self.discrete_reward_from_obs(
            self.convert_to_real_obs(self.current_position,
                                     self.full_x).values.reshape(1,-1))
        done = reward >= -0.1*self.y_df.shape[1]
        if done:
            reward = 100
        
        if self.episode_length>100:
            #print('episode too long -> reset')
            done = True
            
        if (max(abs(action))):
            # if on border, we kill episode
            done = True
            

        self.global_reward += reward
        return self.current_position, reward, done, {}

    def render(self):
        print(
            f'position {self.current_position[:10]}, action {self.last_action[:5]}, effect {self.last_effect}, done {done}, global_reward {self.global_reward}'
        )

    def convert_to_real_obs(self, observation, observation_dataset):
        '''
        to convert an observation from observation space ([-1, 1],325) to  real world
        -1 matches with min() of each column
        1 matches with max() of each column
        
        observation: instance of observation_space
        observation_dataset: the full real dataset (obfuscated in that case)
        '''
        return (observation + np.ones(self.observation_space.shape)) / 2 * (
            observation_dataset.max() -
            observation_dataset.min()) + observation_dataset.min()

    def revert_to_obs_space(self, real_observation, observation_dataset):
        '''
        to revert an observation sample (from real world) to observation space
        min() of each column will match with -1
        max() of each column will match with +1
        
        real_observation: instance of real_world
        observation_dataset: the full real dataset (obfuscated in that case)
        '''
        return np.nan_to_num(
            2 * (real_observation - observation_dataset.min()) /
            (observation_dataset.max() - observation_dataset.min()) -
            np.ones(self.observation_space.shape)).reshape(-1)

    def discrete_reward_from_obs(self, observation):
        ''' Discrete reward 
        observation if from real world not observation space
        '''

        new_y = self.ml_model.predict(observation).reshape(-1)
        return self.discrete_reward_continuous(new_y)

    def discrete_reward(self, new_y):
        ''' Discrete reward '''

        new_val = [
            sqrt((self.vav_df.iloc[:, i].values[0] - new_y[i])**2)
            for i in range(len(new_y))
        ]
        k = 10
        k1 = 1
        if new_val[0] < k * self.output_steps[0] and new_val[
                1] < k * self.output_steps[1]:
            reward = 1  #dans les 10% d'amplitude max autour de la vav
            if new_val[0] < k1 * self.output_steps[0] and new_val[
                    1] < k1 * self.output_steps[1]:
                reward = 10  #dans les 1% d'amplitude max autour de la vav
                on_target = True
#                 print('On Target : ', new_y)
        else:
            reward = -1
        return reward
    
    
    
    def discrete_reward_continuous(self, new_y):
        ''' Continuous reward '''
        final_reward = 0 

        for i in range(len(new_y)):
            reward = -9
            if ( self.ti.iloc[:,i].values[0] <=  new_y[i] <= self.ts.iloc[:,i].values[0]):
                if ( new_y[i] >= self.vav_df.iloc[:,i].values[0] ):
                    reward = 1-(new_y[i]-self.vav_df.iloc[:,i].values[0])/(self.ts.iloc[:,i].values[0]-self.vav_df.iloc[:,i].values[0])
                else:
                    reward = 1-(new_y[i]-self.ti.iloc[:,i].values[0])/(self.vav_df.iloc[:,i].values[0]-self.ti.iloc[:,i].values[0])
            reward += -1
            final_reward+=reward
    #         print(f'reward {reward} final_reward {final_reward} i {i}')

        if (final_reward>0.7*len(new_y)):
            on_target = True
    #         print('On Target : ', new_y)

        return final_reward




In [317]:
janus_env = Janus()
from stable_baselines3.common.env_checker import check_env

check_env(janus_env)

features shape: (782, 228), 
targets shape: (782, 2)

Length of train is 703, test is 79
R squared: 0.6550
Output steps:  [0.064, 0.057]


AssertionError: The `done` signal must be a boolean

# SAC training

In [127]:
from stable_baselines3 import SAC

janus_env = Janus()
check_env(janus_env)

model_janus_sac = SAC("MlpPolicy", janus_env, verbose=2,tensorboard_log="./tensorboard/")
model_janus_sac.learn(total_timesteps=100000, log_interval=4, tb_log_name="janus partial sac")

janus_env.reset()
for i in range(100):
    action, _ = model_janus_sac.predict(janus_env.current_position)
    print(f'action {action}')
    obs, rewards, done, info = janus_env.step(action)
    janus_env.render()
    if done: break
janus_env.close()

features shape: (782, 228), 
targets shape: (782, 2)

Length of train is 703, test is 79
R squared: 0.6550
Output steps:  [0.064, 0.057]
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./tensorboard/janus partial sac_3
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 27       |
|    time_elapsed    | 14       |
|    total timesteps | 404      |
| train/             |          |
|    actor_loss      | 0.392    |
|    critic_loss     | 0.0128   |
|    ent_coef        | 0.913    |
|    ent_coef_loss   | -0.153   |
|    learning_rate   | 0.0003   |
|    n_updates       | 303      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/   

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 60       |
|    fps             | 24       |
|    time_elapsed    | 244      |
|    total timesteps | 6060     |
| train/             |          |
|    actor_loss      | 16.6     |
|    critic_loss     | 0.957    |
|    ent_coef        | 0.168    |
|    ent_coef_loss   | -3.01    |
|    learning_rate   | 0.0003   |
|    n_updates       | 5959     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 64       |
|    fps             | 24       |
|    time_elapsed    | 260      |
|    total timesteps | 6464     |
| train/             |          |
|    actor_loss      | 17.7     |
|    critic_loss     | 3.35     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 120      |
|    fps             | 23       |
|    time_elapsed    | 517      |
|    total timesteps | 12120    |
| train/             |          |
|    actor_loss      | 30.4     |
|    critic_loss     | 17.2     |
|    ent_coef        | 0.0301   |
|    ent_coef_loss   | -2.55    |
|    learning_rate   | 0.0003   |
|    n_updates       | 12019    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 124      |
|    fps             | 23       |
|    time_elapsed    | 536      |
|    total timesteps | 12524    |
| train/             |          |
|    actor_loss      | 31       |
|    critic_loss     | 3.8      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 180      |
|    fps             | 22       |
|    time_elapsed    | 799      |
|    total timesteps | 18180    |
| train/             |          |
|    actor_loss      | 39.1     |
|    critic_loss     | 46.2     |
|    ent_coef        | 0.00898  |
|    ent_coef_loss   | -0.818   |
|    learning_rate   | 0.0003   |
|    n_updates       | 18079    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 184      |
|    fps             | 22       |
|    time_elapsed    | 817      |
|    total timesteps | 18584    |
| train/             |          |
|    actor_loss      | 39.4     |
|    critic_loss     | 17.5     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 240      |
|    fps             | 22       |
|    time_elapsed    | 1059     |
|    total timesteps | 24240    |
| train/             |          |
|    actor_loss      | 42.9     |
|    critic_loss     | 29       |
|    ent_coef        | 0.00689  |
|    ent_coef_loss   | 1.22     |
|    learning_rate   | 0.0003   |
|    n_updates       | 24139    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 244      |
|    fps             | 22       |
|    time_elapsed    | 1076     |
|    total timesteps | 24644    |
| train/             |          |
|    actor_loss      | 43.8     |
|    critic_loss     | 28.9     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 300      |
|    fps             | 23       |
|    time_elapsed    | 1304     |
|    total timesteps | 30300    |
| train/             |          |
|    actor_loss      | 46.5     |
|    critic_loss     | 24.5     |
|    ent_coef        | 0.00742  |
|    ent_coef_loss   | -0.483   |
|    learning_rate   | 0.0003   |
|    n_updates       | 30199    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 304      |
|    fps             | 23       |
|    time_elapsed    | 1320     |
|    total timesteps | 30704    |
| train/             |          |
|    actor_loss      | 46.5     |
|    critic_loss     | 57       |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 360      |
|    fps             | 23       |
|    time_elapsed    | 1547     |
|    total timesteps | 36360    |
| train/             |          |
|    actor_loss      | 48.3     |
|    critic_loss     | 17.7     |
|    ent_coef        | 0.00579  |
|    ent_coef_loss   | -2       |
|    learning_rate   | 0.0003   |
|    n_updates       | 36259    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 364      |
|    fps             | 23       |
|    time_elapsed    | 1563     |
|    total timesteps | 36764    |
| train/             |          |
|    actor_loss      | 48.4     |
|    critic_loss     | 26.6     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 420      |
|    fps             | 23       |
|    time_elapsed    | 1795     |
|    total timesteps | 42420    |
| train/             |          |
|    actor_loss      | 49.1     |
|    critic_loss     | 36.3     |
|    ent_coef        | 0.0104   |
|    ent_coef_loss   | -0.571   |
|    learning_rate   | 0.0003   |
|    n_updates       | 42319    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 424      |
|    fps             | 23       |
|    time_elapsed    | 1811     |
|    total timesteps | 42824    |
| train/             |          |
|    actor_loss      | 49.4     |
|    critic_loss     | 0.308    |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 480      |
|    fps             | 23       |
|    time_elapsed    | 2044     |
|    total timesteps | 48480    |
| train/             |          |
|    actor_loss      | 48.9     |
|    critic_loss     | 19.5     |
|    ent_coef        | 0.00886  |
|    ent_coef_loss   | 0.605    |
|    learning_rate   | 0.0003   |
|    n_updates       | 48379    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 484      |
|    fps             | 23       |
|    time_elapsed    | 2061     |
|    total timesteps | 48884    |
| train/             |          |
|    actor_loss      | 49.4     |
|    critic_loss     | 27.9     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 540      |
|    fps             | 23       |
|    time_elapsed    | 2301     |
|    total timesteps | 54540    |
| train/             |          |
|    actor_loss      | 49.9     |
|    critic_loss     | 0.438    |
|    ent_coef        | 0.0116   |
|    ent_coef_loss   | 0.546    |
|    learning_rate   | 0.0003   |
|    n_updates       | 54439    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 544      |
|    fps             | 23       |
|    time_elapsed    | 2319     |
|    total timesteps | 54944    |
| train/             |          |
|    actor_loss      | 50.1     |
|    critic_loss     | 28.3     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 600      |
|    fps             | 23       |
|    time_elapsed    | 2563     |
|    total timesteps | 60600    |
| train/             |          |
|    actor_loss      | 49.9     |
|    critic_loss     | 37.9     |
|    ent_coef        | 0.0121   |
|    ent_coef_loss   | 1.63     |
|    learning_rate   | 0.0003   |
|    n_updates       | 60499    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 604      |
|    fps             | 23       |
|    time_elapsed    | 2581     |
|    total timesteps | 61004    |
| train/             |          |
|    actor_loss      | 49.9     |
|    critic_loss     | 38       |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 660      |
|    fps             | 23       |
|    time_elapsed    | 2831     |
|    total timesteps | 66660    |
| train/             |          |
|    actor_loss      | 50.4     |
|    critic_loss     | 28.8     |
|    ent_coef        | 0.00798  |
|    ent_coef_loss   | 0.869    |
|    learning_rate   | 0.0003   |
|    n_updates       | 66559    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 664      |
|    fps             | 23       |
|    time_elapsed    | 2848     |
|    total timesteps | 67064    |
| train/             |          |
|    actor_loss      | 50.5     |
|    critic_loss     | 9.69     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 720      |
|    fps             | 23       |
|    time_elapsed    | 3104     |
|    total timesteps | 72720    |
| train/             |          |
|    actor_loss      | 50.7     |
|    critic_loss     | 9.71     |
|    ent_coef        | 0.00558  |
|    ent_coef_loss   | 1.4      |
|    learning_rate   | 0.0003   |
|    n_updates       | 72619    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 724      |
|    fps             | 23       |
|    time_elapsed    | 3123     |
|    total timesteps | 73124    |
| train/             |          |
|    actor_loss      | 50       |
|    critic_loss     | 19.5     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 780      |
|    fps             | 23       |
|    time_elapsed    | 3378     |
|    total timesteps | 78780    |
| train/             |          |
|    actor_loss      | 50.3     |
|    critic_loss     | 29       |
|    ent_coef        | 0.012    |
|    ent_coef_loss   | 1.35     |
|    learning_rate   | 0.0003   |
|    n_updates       | 78679    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 784      |
|    fps             | 23       |
|    time_elapsed    | 3397     |
|    total timesteps | 79184    |
| train/             |          |
|    actor_loss      | 50.4     |
|    critic_loss     | 38.6     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 840      |
|    fps             | 23       |
|    time_elapsed    | 3652     |
|    total timesteps | 84840    |
| train/             |          |
|    actor_loss      | 50.2     |
|    critic_loss     | 38.5     |
|    ent_coef        | 0.00897  |
|    ent_coef_loss   | 1.4      |
|    learning_rate   | 0.0003   |
|    n_updates       | 84739    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 844      |
|    fps             | 23       |
|    time_elapsed    | 3670     |
|    total timesteps | 85244    |
| train/             |          |
|    actor_loss      | 50.6     |
|    critic_loss     | 19.4     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 900      |
|    fps             | 23       |
|    time_elapsed    | 3923     |
|    total timesteps | 90900    |
| train/             |          |
|    actor_loss      | 50       |
|    critic_loss     | 19.9     |
|    ent_coef        | 0.00513  |
|    ent_coef_loss   | -1.88    |
|    learning_rate   | 0.0003   |
|    n_updates       | 90799    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 904      |
|    fps             | 23       |
|    time_elapsed    | 3941     |
|    total timesteps | 91304    |
| train/             |          |
|    actor_loss      | 50.1     |
|    critic_loss     | 9.91     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 960      |
|    fps             | 23       |
|    time_elapsed    | 4196     |
|    total timesteps | 96960    |
| train/             |          |
|    actor_loss      | 51       |
|    critic_loss     | 29.2     |
|    ent_coef        | 0.00438  |
|    ent_coef_loss   | -0.846   |
|    learning_rate   | 0.0003   |
|    n_updates       | 96859    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 101      |
|    ep_rew_mean     | -101     |
| time/              |          |
|    episodes        | 964      |
|    fps             | 23       |
|    time_elapsed    | 4214     |
|    total timesteps | 97364    |
| train/             |          |
|    actor_loss      | 50       |
|    critic_loss     | 10.2     |
|    ent_coef 

position [ 0.874066   -0.76       -0.67567568 -0.52941176  0.93877551 -0.98089959
 -0.63076923 -0.63076923 -0.2         0.        ], action [0.874066], effect False, done False, global_reward -24
action [0.96536136]
position [ 0.96536136 -0.76       -0.67567568 -0.52941176  0.93877551 -0.98089959
 -0.63076923 -0.63076923 -0.2         0.        ], action [0.96536136], effect False, done False, global_reward -25
action [0.9551518]
position [ 0.9551518  -0.76       -0.67567568 -0.52941176  0.93877551 -0.98089959
 -0.63076923 -0.63076923 -0.2         0.        ], action [0.9551518], effect False, done False, global_reward -26
action [0.98472524]
position [ 0.98472524 -0.76       -0.67567568 -0.52941176  0.93877551 -0.98089959
 -0.63076923 -0.63076923 -0.2         0.        ], action [0.98472524], effect False, done False, global_reward -27
action [0.29568815]
position [ 0.29568815 -0.76       -0.67567568 -0.52941176  0.93877551 -0.98089959
 -0.63076923 -0.63076923 -0.2         0.        ],

position [ 0.9500246  -0.76       -0.67567568 -0.52941176  0.93877551 -0.98089959
 -0.63076923 -0.63076923 -0.2         0.        ], action [0.9500246], effect False, done False, global_reward -62
action [0.8876209]
position [ 0.88762093 -0.76       -0.67567568 -0.52941176  0.93877551 -0.98089959
 -0.63076923 -0.63076923 -0.2         0.        ], action [0.8876209], effect False, done False, global_reward -63
action [0.9898963]
position [ 0.9898963  -0.76       -0.67567568 -0.52941176  0.93877551 -0.98089959
 -0.63076923 -0.63076923 -0.2         0.        ], action [0.9898963], effect False, done False, global_reward -64
action [0.9649997]
position [ 0.96499968 -0.76       -0.67567568 -0.52941176  0.93877551 -0.98089959
 -0.63076923 -0.63076923 -0.2         0.        ], action [0.9649997], effect False, done False, global_reward -65
action [0.99591553]
position [ 0.99591553 -0.76       -0.67567568 -0.52941176  0.93877551 -0.98089959
 -0.63076923 -0.63076923 -0.2         0.        ], ac

# TD3 training

In [None]:
from stable_baselines3 import TD3
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

janus_env = Janus()
# check_env(janus_env)

n_actions = janus_env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))


model_janus_td3 = TD3("MlpPolicy", janus_env, action_noise=action_noise, verbose=2,tensorboard_log="./tensorboard/")
model_janus_td3.learn(total_timesteps=10000, log_interval=4, tb_log_name="janus partial td3")

janus_env.reset()
for i in range(100):
    action, _ = model_janus_td3.predict(janus_env.current_position)
    print(f'action {action}')
    obs, rewards, done, info = janus_env.step(action)
    janus_env.render()
    if done: break
janus_env.close()

features shape: (782, 228), 
targets shape: (782, 2)

Length of train is 703, test is 79
R squared: 0.6550
Output steps:  [0.064, 0.057]
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./tensorboard/janus partial td3_12
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 24.3     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 20       |
|    time_elapsed    | 0        |
|    total timesteps | 4        |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 11.7     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 22       |
|    time_elapsed    | 0        |
|    total timesteps | 8        |
---------------------------------
---------------------------------
| rollout

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 0.0637   |
| time/              |          |
|    episodes        | 100      |
|    fps             | 22       |
|    time_elapsed    | 4        |
|    total timesteps | 100      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.946   |
| time/              |          |
|    episodes        | 104      |
|    fps             | 22       |
|    time_elapsed    | 4        |
|    total timesteps | 104      |
| train/             |          |
|    actor_loss      | -0.322   |
|    critic_loss     | 599      |
|    learning_rate   | 0.001    |
|    n_updates       | 3        |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.948   |
| time/       

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 42.5     |
| time/              |          |
|    episodes        | 168      |
|    fps             | 19       |
|    time_elapsed    | 8        |
|    total timesteps | 168      |
| train/             |          |
|    actor_loss      | -26.8    |
|    critic_loss     | 4.25e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 67       |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 45.5     |
| time/              |          |
|    episodes        | 172      |
|    fps             | 19       |
|    time_elapsed    | 8        |
|    total timesteps | 172      |
| train/             |          |
|    actor_loss      | -30.6    |
|    critic_loss     | 3.86e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 71       |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 75.8     |
| time/              |          |
|    episodes        | 236      |
|    fps             | 19       |
|    time_elapsed    | 12       |
|    total timesteps | 236      |
| train/             |          |
|    actor_loss      | -71.5    |
|    critic_loss     | 2.52e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 135      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 74.8     |
| time/              |          |
|    episodes        | 240      |
|    fps             | 19       |
|    time_elapsed    | 12       |
|    total timesteps | 240      |
| train/             |          |
|    actor_loss      | -65.9    |
|    critic_loss     | 2.91e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 139      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 80.8     |
| time/              |          |
|    episodes        | 304      |
|    fps             | 18       |
|    time_elapsed    | 16       |
|    total timesteps | 304      |
| train/             |          |
|    actor_loss      | -91      |
|    critic_loss     | 2.16e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 203      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 80.8     |
| time/              |          |
|    episodes        | 308      |
|    fps             | 18       |
|    time_elapsed    | 16       |
|    total timesteps | 308      |
| train/             |          |
|    actor_loss      | -68.5    |
|    critic_loss     | 2.28e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 207      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 72.7     |
| time/              |          |
|    episodes        | 372      |
|    fps             | 18       |
|    time_elapsed    | 20       |
|    total timesteps | 372      |
| train/             |          |
|    actor_loss      | -94.8    |
|    critic_loss     | 2.83e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 271      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 72.7     |
| time/              |          |
|    episodes        | 376      |
|    fps             | 18       |
|    time_elapsed    | 20       |
|    total timesteps | 376      |
| train/             |          |
|    actor_loss      | -68.5    |
|    critic_loss     | 2.63e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 275      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 74.8     |
| time/              |          |
|    episodes        | 440      |
|    fps             | 18       |
|    time_elapsed    | 24       |
|    total timesteps | 440      |
| train/             |          |
|    actor_loss      | -69.2    |
|    critic_loss     | 2.72e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 339      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 73.7     |
| time/              |          |
|    episodes        | 444      |
|    fps             | 18       |
|    time_elapsed    | 24       |
|    total timesteps | 444      |
| train/             |          |
|    actor_loss      | -96.3    |
|    critic_loss     | 3.91e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 343      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 74.8     |
| time/              |          |
|    episodes        | 508      |
|    fps             | 18       |
|    time_elapsed    | 27       |
|    total timesteps | 508      |
| train/             |          |
|    actor_loss      | -71.5    |
|    critic_loss     | 2.74e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 407      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 75.8     |
| time/              |          |
|    episodes        | 512      |
|    fps             | 18       |
|    time_elapsed    | 27       |
|    total timesteps | 512      |
| train/             |          |
|    actor_loss      | -83.5    |
|    critic_loss     | 1.81e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 411      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 75.8     |
| time/              |          |
|    episodes        | 576      |
|    fps             | 18       |
|    time_elapsed    | 31       |
|    total timesteps | 576      |
| train/             |          |
|    actor_loss      | -90.6    |
|    critic_loss     | 3.45e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 475      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 73.7     |
| time/              |          |
|    episodes        | 580      |
|    fps             | 18       |
|    time_elapsed    | 31       |
|    total timesteps | 580      |
| train/             |          |
|    actor_loss      | -73.8    |
|    critic_loss     | 2.82e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 479      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 75.8     |
| time/              |          |
|    episodes        | 644      |
|    fps             | 18       |
|    time_elapsed    | 35       |
|    total timesteps | 644      |
| train/             |          |
|    actor_loss      | -87.1    |
|    critic_loss     | 2.69e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 543      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 76.8     |
| time/              |          |
|    episodes        | 648      |
|    fps             | 18       |
|    time_elapsed    | 35       |
|    total timesteps | 648      |
| train/             |          |
|    actor_loss      | -79.7    |
|    critic_loss     | 2.88e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 547      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 66.7     |
| time/              |          |
|    episodes        | 712      |
|    fps             | 18       |
|    time_elapsed    | 39       |
|    total timesteps | 712      |
| train/             |          |
|    actor_loss      | -92.7    |
|    critic_loss     | 2.84e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 611      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 66.7     |
| time/              |          |
|    episodes        | 716      |
|    fps             | 18       |
|    time_elapsed    | 39       |
|    total timesteps | 716      |
| train/             |          |
|    actor_loss      | -69.6    |
|    critic_loss     | 3.22e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 615      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 62.6     |
| time/              |          |
|    episodes        | 780      |
|    fps             | 17       |
|    time_elapsed    | 43       |
|    total timesteps | 780      |
| train/             |          |
|    actor_loss      | -85.6    |
|    critic_loss     | 2.63e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 679      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 61.6     |
| time/              |          |
|    episodes        | 784      |
|    fps             | 17       |
|    time_elapsed    | 43       |
|    total timesteps | 784      |
| train/             |          |
|    actor_loss      | -83.2    |
|    critic_loss     | 2.48e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 683      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 848      |
|    fps             | 18       |
|    time_elapsed    | 47       |
|    total timesteps | 848      |
| train/             |          |
|    actor_loss      | -83.9    |
|    critic_loss     | 3.06e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 747      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 852      |
|    fps             | 17       |
|    time_elapsed    | 47       |
|    total timesteps | 852      |
| train/             |          |
|    actor_loss      | -94.2    |
|    critic_loss     | 2.91e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 751      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 75.8     |
| time/              |          |
|    episodes        | 916      |
|    fps             | 17       |
|    time_elapsed    | 51       |
|    total timesteps | 916      |
| train/             |          |
|    actor_loss      | -72.7    |
|    critic_loss     | 3.32e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 815      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 75.8     |
| time/              |          |
|    episodes        | 920      |
|    fps             | 17       |
|    time_elapsed    | 51       |
|    total timesteps | 920      |
| train/             |          |
|    actor_loss      | -100     |
|    critic_loss     | 3.67e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 819      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 64.7     |
| time/              |          |
|    episodes        | 984      |
|    fps             | 17       |
|    time_elapsed    | 54       |
|    total timesteps | 984      |
| train/             |          |
|    actor_loss      | -114     |
|    critic_loss     | 4.15e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 883      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 64.7     |
| time/              |          |
|    episodes        | 988      |
|    fps             | 17       |
|    time_elapsed    | 55       |
|    total timesteps | 988      |
| train/             |          |
|    actor_loss      | -97.8    |
|    critic_loss     | 2.43e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 887      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 65.7     |
| time/              |          |
|    episodes        | 1052     |
|    fps             | 17       |
|    time_elapsed    | 59       |
|    total timesteps | 1052     |
| train/             |          |
|    actor_loss      | -107     |
|    critic_loss     | 2.79e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 951      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 64.7     |
| time/              |          |
|    episodes        | 1056     |
|    fps             | 17       |
|    time_elapsed    | 59       |
|    total timesteps | 1056     |
| train/             |          |
|    actor_loss      | -91.2    |
|    critic_loss     | 2.04e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 955      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 63.7     |
| time/              |          |
|    episodes        | 1120     |
|    fps             | 17       |
|    time_elapsed    | 63       |
|    total timesteps | 1120     |
| train/             |          |
|    actor_loss      | -81      |
|    critic_loss     | 3.06e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1019     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 63.7     |
| time/              |          |
|    episodes        | 1124     |
|    fps             | 17       |
|    time_elapsed    | 63       |
|    total timesteps | 1124     |
| train/             |          |
|    actor_loss      | -96.1    |
|    critic_loss     | 2.32e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1023     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 54.6     |
| time/              |          |
|    episodes        | 1188     |
|    fps             | 17       |
|    time_elapsed    | 67       |
|    total timesteps | 1188     |
| train/             |          |
|    actor_loss      | -93.3    |
|    critic_loss     | 2e+03    |
|    learning_rate   | 0.001    |
|    n_updates       | 1087     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 56.6     |
| time/              |          |
|    episodes        | 1192     |
|    fps             | 17       |
|    time_elapsed    | 67       |
|    total timesteps | 1192     |
| train/             |          |
|    actor_loss      | -87.3    |
|    critic_loss     | 2.27e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1091     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 1256     |
|    fps             | 17       |
|    time_elapsed    | 71       |
|    total timesteps | 1256     |
| train/             |          |
|    actor_loss      | -102     |
|    critic_loss     | 2.85e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1155     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 70.7     |
| time/              |          |
|    episodes        | 1260     |
|    fps             | 17       |
|    time_elapsed    | 71       |
|    total timesteps | 1260     |
| train/             |          |
|    actor_loss      | -103     |
|    critic_loss     | 2.33e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1159     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 74.8     |
| time/              |          |
|    episodes        | 1324     |
|    fps             | 17       |
|    time_elapsed    | 75       |
|    total timesteps | 1324     |
| train/             |          |
|    actor_loss      | -105     |
|    critic_loss     | 3.03e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1223     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 74.8     |
| time/              |          |
|    episodes        | 1328     |
|    fps             | 17       |
|    time_elapsed    | 76       |
|    total timesteps | 1328     |
| train/             |          |
|    actor_loss      | -105     |
|    critic_loss     | 1.72e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1227     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 76.8     |
| time/              |          |
|    episodes        | 1392     |
|    fps             | 17       |
|    time_elapsed    | 79       |
|    total timesteps | 1392     |
| train/             |          |
|    actor_loss      | -103     |
|    critic_loss     | 2.56e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1291     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 76.8     |
| time/              |          |
|    episodes        | 1396     |
|    fps             | 17       |
|    time_elapsed    | 79       |
|    total timesteps | 1396     |
| train/             |          |
|    actor_loss      | -99.3    |
|    critic_loss     | 1.67e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1295     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 74.8     |
| time/              |          |
|    episodes        | 1460     |
|    fps             | 17       |
|    time_elapsed    | 83       |
|    total timesteps | 1460     |
| train/             |          |
|    actor_loss      | -111     |
|    critic_loss     | 3.07e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1359     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 74.8     |
| time/              |          |
|    episodes        | 1464     |
|    fps             | 17       |
|    time_elapsed    | 84       |
|    total timesteps | 1464     |
| train/             |          |
|    actor_loss      | -107     |
|    critic_loss     | 1.96e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1363     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 1528     |
|    fps             | 17       |
|    time_elapsed    | 87       |
|    total timesteps | 1528     |
| train/             |          |
|    actor_loss      | -104     |
|    critic_loss     | 2.44e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1427     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 1532     |
|    fps             | 17       |
|    time_elapsed    | 88       |
|    total timesteps | 1532     |
| train/             |          |
|    actor_loss      | -107     |
|    critic_loss     | 2.26e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1431     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 71.7     |
| time/              |          |
|    episodes        | 1596     |
|    fps             | 17       |
|    time_elapsed    | 91       |
|    total timesteps | 1596     |
| train/             |          |
|    actor_loss      | -103     |
|    critic_loss     | 2.08e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1495     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 71.7     |
| time/              |          |
|    episodes        | 1600     |
|    fps             | 17       |
|    time_elapsed    | 91       |
|    total timesteps | 1600     |
| train/             |          |
|    actor_loss      | -94.6    |
|    critic_loss     | 2.11e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1499     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 74.8     |
| time/              |          |
|    episodes        | 1664     |
|    fps             | 17       |
|    time_elapsed    | 95       |
|    total timesteps | 1664     |
| train/             |          |
|    actor_loss      | -99.9    |
|    critic_loss     | 1.69e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1563     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 72.7     |
| time/              |          |
|    episodes        | 1668     |
|    fps             | 17       |
|    time_elapsed    | 95       |
|    total timesteps | 1668     |
| train/             |          |
|    actor_loss      | -106     |
|    critic_loss     | 2.27e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1567     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 72.7     |
| time/              |          |
|    episodes        | 1732     |
|    fps             | 17       |
|    time_elapsed    | 99       |
|    total timesteps | 1732     |
| train/             |          |
|    actor_loss      | -105     |
|    critic_loss     | 1.81e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1631     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 72.7     |
| time/              |          |
|    episodes        | 1736     |
|    fps             | 17       |
|    time_elapsed    | 99       |
|    total timesteps | 1736     |
| train/             |          |
|    actor_loss      | -104     |
|    critic_loss     | 1.68e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1635     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 66.7     |
| time/              |          |
|    episodes        | 1800     |
|    fps             | 17       |
|    time_elapsed    | 103      |
|    total timesteps | 1800     |
| train/             |          |
|    actor_loss      | -107     |
|    critic_loss     | 1.58e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1699     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 1804     |
|    fps             | 17       |
|    time_elapsed    | 103      |
|    total timesteps | 1804     |
| train/             |          |
|    actor_loss      | -101     |
|    critic_loss     | 1.54e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1703     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 72.7     |
| time/              |          |
|    episodes        | 1868     |
|    fps             | 17       |
|    time_elapsed    | 107      |
|    total timesteps | 1868     |
| train/             |          |
|    actor_loss      | -116     |
|    critic_loss     | 1.87e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1767     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 73.7     |
| time/              |          |
|    episodes        | 1872     |
|    fps             | 17       |
|    time_elapsed    | 107      |
|    total timesteps | 1872     |
| train/             |          |
|    actor_loss      | -114     |
|    critic_loss     | 2.85e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1771     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 62.6     |
| time/              |          |
|    episodes        | 1936     |
|    fps             | 17       |
|    time_elapsed    | 111      |
|    total timesteps | 1936     |
| train/             |          |
|    actor_loss      | -98      |
|    critic_loss     | 934      |
|    learning_rate   | 0.001    |
|    n_updates       | 1835     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 63.7     |
| time/              |          |
|    episodes        | 1940     |
|    fps             | 17       |
|    time_elapsed    | 112      |
|    total timesteps | 1940     |
| train/             |          |
|    actor_loss      | -96.5    |
|    critic_loss     | 1.88e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1839     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 65.7     |
| time/              |          |
|    episodes        | 2004     |
|    fps             | 17       |
|    time_elapsed    | 116      |
|    total timesteps | 2004     |
| train/             |          |
|    actor_loss      | -103     |
|    critic_loss     | 1.54e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1903     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 66.7     |
| time/              |          |
|    episodes        | 2008     |
|    fps             | 17       |
|    time_elapsed    | 116      |
|    total timesteps | 2008     |
| train/             |          |
|    actor_loss      | -101     |
|    critic_loss     | 1.66e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1907     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 75.8     |
| time/              |          |
|    episodes        | 2072     |
|    fps             | 17       |
|    time_elapsed    | 120      |
|    total timesteps | 2072     |
| train/             |          |
|    actor_loss      | -102     |
|    critic_loss     | 1.71e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1971     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 75.8     |
| time/              |          |
|    episodes        | 2076     |
|    fps             | 17       |
|    time_elapsed    | 121      |
|    total timesteps | 2076     |
| train/             |          |
|    actor_loss      | -101     |
|    critic_loss     | 1.51e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 1975     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 69.7     |
| time/              |          |
|    episodes        | 2140     |
|    fps             | 17       |
|    time_elapsed    | 124      |
|    total timesteps | 2140     |
| train/             |          |
|    actor_loss      | -114     |
|    critic_loss     | 2.4e+03  |
|    learning_rate   | 0.001    |
|    n_updates       | 2039     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 2144     |
|    fps             | 17       |
|    time_elapsed    | 125      |
|    total timesteps | 2144     |
| train/             |          |
|    actor_loss      | -105     |
|    critic_loss     | 1.22e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2043     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 2208     |
|    fps             | 17       |
|    time_elapsed    | 128      |
|    total timesteps | 2208     |
| train/             |          |
|    actor_loss      | -112     |
|    critic_loss     | 1.16e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2107     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 2212     |
|    fps             | 17       |
|    time_elapsed    | 129      |
|    total timesteps | 2212     |
| train/             |          |
|    actor_loss      | -107     |
|    critic_loss     | 1.1e+03  |
|    learning_rate   | 0.001    |
|    n_updates       | 2111     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 69.7     |
| time/              |          |
|    episodes        | 2276     |
|    fps             | 17       |
|    time_elapsed    | 132      |
|    total timesteps | 2276     |
| train/             |          |
|    actor_loss      | -91.5    |
|    critic_loss     | 1.4e+03  |
|    learning_rate   | 0.001    |
|    n_updates       | 2175     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 2280     |
|    fps             | 17       |
|    time_elapsed    | 132      |
|    total timesteps | 2280     |
| train/             |          |
|    actor_loss      | -106     |
|    critic_loss     | 1.09e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2179     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 73.7     |
| time/              |          |
|    episodes        | 2344     |
|    fps             | 17       |
|    time_elapsed    | 136      |
|    total timesteps | 2344     |
| train/             |          |
|    actor_loss      | -98.2    |
|    critic_loss     | 1.56e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2243     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 75.8     |
| time/              |          |
|    episodes        | 2348     |
|    fps             | 17       |
|    time_elapsed    | 136      |
|    total timesteps | 2348     |
| train/             |          |
|    actor_loss      | -99.3    |
|    critic_loss     | 1.02e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2247     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 72.7     |
| time/              |          |
|    episodes        | 2412     |
|    fps             | 17       |
|    time_elapsed    | 141      |
|    total timesteps | 2412     |
| train/             |          |
|    actor_loss      | -104     |
|    critic_loss     | 1.1e+03  |
|    learning_rate   | 0.001    |
|    n_updates       | 2311     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 69.7     |
| time/              |          |
|    episodes        | 2416     |
|    fps             | 17       |
|    time_elapsed    | 141      |
|    total timesteps | 2416     |
| train/             |          |
|    actor_loss      | -104     |
|    critic_loss     | 1.02e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2315     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 70.7     |
| time/              |          |
|    episodes        | 2480     |
|    fps             | 17       |
|    time_elapsed    | 144      |
|    total timesteps | 2480     |
| train/             |          |
|    actor_loss      | -107     |
|    critic_loss     | 1.05e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2379     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 2484     |
|    fps             | 17       |
|    time_elapsed    | 145      |
|    total timesteps | 2484     |
| train/             |          |
|    actor_loss      | -105     |
|    critic_loss     | 717      |
|    learning_rate   | 0.001    |
|    n_updates       | 2383     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 70.7     |
| time/              |          |
|    episodes        | 2548     |
|    fps             | 17       |
|    time_elapsed    | 148      |
|    total timesteps | 2548     |
| train/             |          |
|    actor_loss      | -101     |
|    critic_loss     | 1.28e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2447     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 2552     |
|    fps             | 17       |
|    time_elapsed    | 149      |
|    total timesteps | 2552     |
| train/             |          |
|    actor_loss      | -109     |
|    critic_loss     | 1.13e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2451     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 2616     |
|    fps             | 17       |
|    time_elapsed    | 152      |
|    total timesteps | 2616     |
| train/             |          |
|    actor_loss      | -106     |
|    critic_loss     | 914      |
|    learning_rate   | 0.001    |
|    n_updates       | 2515     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 69.7     |
| time/              |          |
|    episodes        | 2620     |
|    fps             | 17       |
|    time_elapsed    | 153      |
|    total timesteps | 2620     |
| train/             |          |
|    actor_loss      | -105     |
|    critic_loss     | 843      |
|    learning_rate   | 0.001    |
|    n_updates       | 2519     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 77.8     |
| time/              |          |
|    episodes        | 2684     |
|    fps             | 17       |
|    time_elapsed    | 156      |
|    total timesteps | 2684     |
| train/             |          |
|    actor_loss      | -113     |
|    critic_loss     | 1.04e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2583     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 76.8     |
| time/              |          |
|    episodes        | 2688     |
|    fps             | 17       |
|    time_elapsed    | 157      |
|    total timesteps | 2688     |
| train/             |          |
|    actor_loss      | -100     |
|    critic_loss     | 717      |
|    learning_rate   | 0.001    |
|    n_updates       | 2587     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 70.7     |
| time/              |          |
|    episodes        | 2752     |
|    fps             | 17       |
|    time_elapsed    | 161      |
|    total timesteps | 2752     |
| train/             |          |
|    actor_loss      | -94.9    |
|    critic_loss     | 1.56e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2651     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 71.7     |
| time/              |          |
|    episodes        | 2756     |
|    fps             | 17       |
|    time_elapsed    | 161      |
|    total timesteps | 2756     |
| train/             |          |
|    actor_loss      | -107     |
|    critic_loss     | 875      |
|    learning_rate   | 0.001    |
|    n_updates       | 2655     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 69.7     |
| time/              |          |
|    episodes        | 2820     |
|    fps             | 17       |
|    time_elapsed    | 164      |
|    total timesteps | 2820     |
| train/             |          |
|    actor_loss      | -115     |
|    critic_loss     | 1.56e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2719     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 2824     |
|    fps             | 17       |
|    time_elapsed    | 165      |
|    total timesteps | 2824     |
| train/             |          |
|    actor_loss      | -99.7    |
|    critic_loss     | 775      |
|    learning_rate   | 0.001    |
|    n_updates       | 2723     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 60.6     |
| time/              |          |
|    episodes        | 2888     |
|    fps             | 17       |
|    time_elapsed    | 169      |
|    total timesteps | 2888     |
| train/             |          |
|    actor_loss      | -102     |
|    critic_loss     | 906      |
|    learning_rate   | 0.001    |
|    n_updates       | 2787     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 61.6     |
| time/              |          |
|    episodes        | 2892     |
|    fps             | 17       |
|    time_elapsed    | 169      |
|    total timesteps | 2892     |
| train/             |          |
|    actor_loss      | -105     |
|    critic_loss     | 1.04e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2791     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 66.7     |
| time/              |          |
|    episodes        | 2956     |
|    fps             | 17       |
|    time_elapsed    | 173      |
|    total timesteps | 2956     |
| train/             |          |
|    actor_loss      | -112     |
|    critic_loss     | 1.47e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 2855     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 64.7     |
| time/              |          |
|    episodes        | 2960     |
|    fps             | 17       |
|    time_elapsed    | 173      |
|    total timesteps | 2960     |
| train/             |          |
|    actor_loss      | -108     |
|    critic_loss     | 923      |
|    learning_rate   | 0.001    |
|    n_updates       | 2859     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 66.7     |
| time/              |          |
|    episodes        | 3024     |
|    fps             | 17       |
|    time_elapsed    | 177      |
|    total timesteps | 3024     |
| train/             |          |
|    actor_loss      | -101     |
|    critic_loss     | 782      |
|    learning_rate   | 0.001    |
|    n_updates       | 2923     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 66.7     |
| time/              |          |
|    episodes        | 3028     |
|    fps             | 17       |
|    time_elapsed    | 177      |
|    total timesteps | 3028     |
| train/             |          |
|    actor_loss      | -99.2    |
|    critic_loss     | 771      |
|    learning_rate   | 0.001    |
|    n_updates       | 2927     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 69.7     |
| time/              |          |
|    episodes        | 3092     |
|    fps             | 17       |
|    time_elapsed    | 181      |
|    total timesteps | 3092     |
| train/             |          |
|    actor_loss      | -111     |
|    critic_loss     | 984      |
|    learning_rate   | 0.001    |
|    n_updates       | 2991     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 69.7     |
| time/              |          |
|    episodes        | 3096     |
|    fps             | 17       |
|    time_elapsed    | 181      |
|    total timesteps | 3096     |
| train/             |          |
|    actor_loss      | -107     |
|    critic_loss     | 644      |
|    learning_rate   | 0.001    |
|    n_updates       | 2995     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 63.7     |
| time/              |          |
|    episodes        | 3160     |
|    fps             | 17       |
|    time_elapsed    | 185      |
|    total timesteps | 3160     |
| train/             |          |
|    actor_loss      | -103     |
|    critic_loss     | 948      |
|    learning_rate   | 0.001    |
|    n_updates       | 3059     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 62.6     |
| time/              |          |
|    episodes        | 3164     |
|    fps             | 17       |
|    time_elapsed    | 185      |
|    total timesteps | 3164     |
| train/             |          |
|    actor_loss      | -102     |
|    critic_loss     | 633      |
|    learning_rate   | 0.001    |
|    n_updates       | 3063     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 3228     |
|    fps             | 17       |
|    time_elapsed    | 189      |
|    total timesteps | 3228     |
| train/             |          |
|    actor_loss      | -104     |
|    critic_loss     | 1.01e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3127     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 69.7     |
| time/              |          |
|    episodes        | 3232     |
|    fps             | 17       |
|    time_elapsed    | 189      |
|    total timesteps | 3232     |
| train/             |          |
|    actor_loss      | -101     |
|    critic_loss     | 1.02e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3131     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 3296     |
|    fps             | 17       |
|    time_elapsed    | 193      |
|    total timesteps | 3296     |
| train/             |          |
|    actor_loss      | -109     |
|    critic_loss     | 841      |
|    learning_rate   | 0.001    |
|    n_updates       | 3195     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 3300     |
|    fps             | 17       |
|    time_elapsed    | 193      |
|    total timesteps | 3300     |
| train/             |          |
|    actor_loss      | -114     |
|    critic_loss     | 731      |
|    learning_rate   | 0.001    |
|    n_updates       | 3199     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 71.7     |
| time/              |          |
|    episodes        | 3364     |
|    fps             | 17       |
|    time_elapsed    | 197      |
|    total timesteps | 3364     |
| train/             |          |
|    actor_loss      | -104     |
|    critic_loss     | 667      |
|    learning_rate   | 0.001    |
|    n_updates       | 3263     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 71.7     |
| time/              |          |
|    episodes        | 3368     |
|    fps             | 17       |
|    time_elapsed    | 197      |
|    total timesteps | 3368     |
| train/             |          |
|    actor_loss      | -101     |
|    critic_loss     | 977      |
|    learning_rate   | 0.001    |
|    n_updates       | 3267     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 3432     |
|    fps             | 17       |
|    time_elapsed    | 201      |
|    total timesteps | 3432     |
| train/             |          |
|    actor_loss      | -99.4    |
|    critic_loss     | 1.04e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3331     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 3436     |
|    fps             | 17       |
|    time_elapsed    | 202      |
|    total timesteps | 3436     |
| train/             |          |
|    actor_loss      | -106     |
|    critic_loss     | 653      |
|    learning_rate   | 0.001    |
|    n_updates       | 3335     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 65.7     |
| time/              |          |
|    episodes        | 3500     |
|    fps             | 17       |
|    time_elapsed    | 205      |
|    total timesteps | 3500     |
| train/             |          |
|    actor_loss      | -93.9    |
|    critic_loss     | 1.07e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3399     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 3504     |
|    fps             | 17       |
|    time_elapsed    | 206      |
|    total timesteps | 3504     |
| train/             |          |
|    actor_loss      | -83.3    |
|    critic_loss     | 1.09e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3403     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 70.7     |
| time/              |          |
|    episodes        | 3568     |
|    fps             | 16       |
|    time_elapsed    | 209      |
|    total timesteps | 3568     |
| train/             |          |
|    actor_loss      | -103     |
|    critic_loss     | 1.04e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3467     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 72.7     |
| time/              |          |
|    episodes        | 3572     |
|    fps             | 17       |
|    time_elapsed    | 210      |
|    total timesteps | 3572     |
| train/             |          |
|    actor_loss      | -96.2    |
|    critic_loss     | 1.09e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3471     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 76.8     |
| time/              |          |
|    episodes        | 3636     |
|    fps             | 17       |
|    time_elapsed    | 213      |
|    total timesteps | 3636     |
| train/             |          |
|    actor_loss      | -98      |
|    critic_loss     | 1.37e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3535     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 77.8     |
| time/              |          |
|    episodes        | 3640     |
|    fps             | 17       |
|    time_elapsed    | 213      |
|    total timesteps | 3640     |
| train/             |          |
|    actor_loss      | -97.4    |
|    critic_loss     | 1.24e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3539     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 72.7     |
| time/              |          |
|    episodes        | 3704     |
|    fps             | 17       |
|    time_elapsed    | 217      |
|    total timesteps | 3704     |
| train/             |          |
|    actor_loss      | -97.9    |
|    critic_loss     | 982      |
|    learning_rate   | 0.001    |
|    n_updates       | 3603     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 71.7     |
| time/              |          |
|    episodes        | 3708     |
|    fps             | 17       |
|    time_elapsed    | 217      |
|    total timesteps | 3708     |
| train/             |          |
|    actor_loss      | -105     |
|    critic_loss     | 535      |
|    learning_rate   | 0.001    |
|    n_updates       | 3607     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 64.7     |
| time/              |          |
|    episodes        | 3772     |
|    fps             | 17       |
|    time_elapsed    | 221      |
|    total timesteps | 3772     |
| train/             |          |
|    actor_loss      | -108     |
|    critic_loss     | 1.06e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3671     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 65.7     |
| time/              |          |
|    episodes        | 3776     |
|    fps             | 17       |
|    time_elapsed    | 221      |
|    total timesteps | 3776     |
| train/             |          |
|    actor_loss      | -111     |
|    critic_loss     | 1.27e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3675     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 66.7     |
| time/              |          |
|    episodes        | 3840     |
|    fps             | 17       |
|    time_elapsed    | 225      |
|    total timesteps | 3840     |
| train/             |          |
|    actor_loss      | -95      |
|    critic_loss     | 1.24e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3739     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 3844     |
|    fps             | 17       |
|    time_elapsed    | 225      |
|    total timesteps | 3844     |
| train/             |          |
|    actor_loss      | -93.6    |
|    critic_loss     | 1.14e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3743     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 3908     |
|    fps             | 17       |
|    time_elapsed    | 229      |
|    total timesteps | 3908     |
| train/             |          |
|    actor_loss      | -96.3    |
|    critic_loss     | 682      |
|    learning_rate   | 0.001    |
|    n_updates       | 3807     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 3912     |
|    fps             | 17       |
|    time_elapsed    | 229      |
|    total timesteps | 3912     |
| train/             |          |
|    actor_loss      | -101     |
|    critic_loss     | 619      |
|    learning_rate   | 0.001    |
|    n_updates       | 3811     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 62.6     |
| time/              |          |
|    episodes        | 3976     |
|    fps             | 17       |
|    time_elapsed    | 233      |
|    total timesteps | 3976     |
| train/             |          |
|    actor_loss      | -88.1    |
|    critic_loss     | 1.88e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3875     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 61.6     |
| time/              |          |
|    episodes        | 3980     |
|    fps             | 17       |
|    time_elapsed    | 233      |
|    total timesteps | 3980     |
| train/             |          |
|    actor_loss      | -103     |
|    critic_loss     | 1.08e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 3879     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 71.7     |
| time/              |          |
|    episodes        | 4044     |
|    fps             | 17       |
|    time_elapsed    | 237      |
|    total timesteps | 4044     |
| train/             |          |
|    actor_loss      | -103     |
|    critic_loss     | 537      |
|    learning_rate   | 0.001    |
|    n_updates       | 3943     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 71.7     |
| time/              |          |
|    episodes        | 4048     |
|    fps             | 17       |
|    time_elapsed    | 237      |
|    total timesteps | 4048     |
| train/             |          |
|    actor_loss      | -100     |
|    critic_loss     | 558      |
|    learning_rate   | 0.001    |
|    n_updates       | 3947     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 71.7     |
| time/              |          |
|    episodes        | 4112     |
|    fps             | 17       |
|    time_elapsed    | 241      |
|    total timesteps | 4112     |
| train/             |          |
|    actor_loss      | -106     |
|    critic_loss     | 818      |
|    learning_rate   | 0.001    |
|    n_updates       | 4011     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 72.7     |
| time/              |          |
|    episodes        | 4116     |
|    fps             | 17       |
|    time_elapsed    | 241      |
|    total timesteps | 4116     |
| train/             |          |
|    actor_loss      | -103     |
|    critic_loss     | 541      |
|    learning_rate   | 0.001    |
|    n_updates       | 4015     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 4180     |
|    fps             | 17       |
|    time_elapsed    | 245      |
|    total timesteps | 4180     |
| train/             |          |
|    actor_loss      | -111     |
|    critic_loss     | 943      |
|    learning_rate   | 0.001    |
|    n_updates       | 4079     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 4184     |
|    fps             | 17       |
|    time_elapsed    | 245      |
|    total timesteps | 4184     |
| train/             |          |
|    actor_loss      | -105     |
|    critic_loss     | 445      |
|    learning_rate   | 0.001    |
|    n_updates       | 4083     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 71.7     |
| time/              |          |
|    episodes        | 4248     |
|    fps             | 17       |
|    time_elapsed    | 249      |
|    total timesteps | 4248     |
| train/             |          |
|    actor_loss      | -99.1    |
|    critic_loss     | 485      |
|    learning_rate   | 0.001    |
|    n_updates       | 4147     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 69.7     |
| time/              |          |
|    episodes        | 4252     |
|    fps             | 17       |
|    time_elapsed    | 249      |
|    total timesteps | 4252     |
| train/             |          |
|    actor_loss      | -98.7    |
|    critic_loss     | 623      |
|    learning_rate   | 0.001    |
|    n_updates       | 4151     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 4316     |
|    fps             | 16       |
|    time_elapsed    | 253      |
|    total timesteps | 4316     |
| train/             |          |
|    actor_loss      | -111     |
|    critic_loss     | 2.19e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 4215     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 64.7     |
| time/              |          |
|    episodes        | 4320     |
|    fps             | 16       |
|    time_elapsed    | 254      |
|    total timesteps | 4320     |
| train/             |          |
|    actor_loss      | -78.9    |
|    critic_loss     | 1.49e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 4219     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 4384     |
|    fps             | 16       |
|    time_elapsed    | 258      |
|    total timesteps | 4384     |
| train/             |          |
|    actor_loss      | -102     |
|    critic_loss     | 675      |
|    learning_rate   | 0.001    |
|    n_updates       | 4283     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 4388     |
|    fps             | 16       |
|    time_elapsed    | 258      |
|    total timesteps | 4388     |
| train/             |          |
|    actor_loss      | -100     |
|    critic_loss     | 807      |
|    learning_rate   | 0.001    |
|    n_updates       | 4287     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 71.7     |
| time/              |          |
|    episodes        | 4452     |
|    fps             | 16       |
|    time_elapsed    | 262      |
|    total timesteps | 4452     |
| train/             |          |
|    actor_loss      | -105     |
|    critic_loss     | 666      |
|    learning_rate   | 0.001    |
|    n_updates       | 4351     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 70.7     |
| time/              |          |
|    episodes        | 4456     |
|    fps             | 16       |
|    time_elapsed    | 262      |
|    total timesteps | 4456     |
| train/             |          |
|    actor_loss      | -103     |
|    critic_loss     | 652      |
|    learning_rate   | 0.001    |
|    n_updates       | 4355     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 4520     |
|    fps             | 16       |
|    time_elapsed    | 267      |
|    total timesteps | 4520     |
| train/             |          |
|    actor_loss      | -108     |
|    critic_loss     | 1.31e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 4419     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 70.7     |
| time/              |          |
|    episodes        | 4524     |
|    fps             | 16       |
|    time_elapsed    | 267      |
|    total timesteps | 4524     |
| train/             |          |
|    actor_loss      | -106     |
|    critic_loss     | 914      |
|    learning_rate   | 0.001    |
|    n_updates       | 4423     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 70.7     |
| time/              |          |
|    episodes        | 4588     |
|    fps             | 16       |
|    time_elapsed    | 271      |
|    total timesteps | 4588     |
| train/             |          |
|    actor_loss      | -106     |
|    critic_loss     | 618      |
|    learning_rate   | 0.001    |
|    n_updates       | 4487     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 70.7     |
| time/              |          |
|    episodes        | 4592     |
|    fps             | 16       |
|    time_elapsed    | 271      |
|    total timesteps | 4592     |
| train/             |          |
|    actor_loss      | -104     |
|    critic_loss     | 598      |
|    learning_rate   | 0.001    |
|    n_updates       | 4491     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 71.7     |
| time/              |          |
|    episodes        | 4656     |
|    fps             | 16       |
|    time_elapsed    | 275      |
|    total timesteps | 4656     |
| train/             |          |
|    actor_loss      | -101     |
|    critic_loss     | 390      |
|    learning_rate   | 0.001    |
|    n_updates       | 4555     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 72.7     |
| time/              |          |
|    episodes        | 4660     |
|    fps             | 16       |
|    time_elapsed    | 276      |
|    total timesteps | 4660     |
| train/             |          |
|    actor_loss      | -106     |
|    critic_loss     | 703      |
|    learning_rate   | 0.001    |
|    n_updates       | 4559     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 61.6     |
| time/              |          |
|    episodes        | 4724     |
|    fps             | 16       |
|    time_elapsed    | 279      |
|    total timesteps | 4724     |
| train/             |          |
|    actor_loss      | -98.7    |
|    critic_loss     | 539      |
|    learning_rate   | 0.001    |
|    n_updates       | 4623     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 61.6     |
| time/              |          |
|    episodes        | 4728     |
|    fps             | 16       |
|    time_elapsed    | 280      |
|    total timesteps | 4728     |
| train/             |          |
|    actor_loss      | -95      |
|    critic_loss     | 739      |
|    learning_rate   | 0.001    |
|    n_updates       | 4627     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 67.7     |
| time/              |          |
|    episodes        | 4792     |
|    fps             | 16       |
|    time_elapsed    | 284      |
|    total timesteps | 4792     |
| train/             |          |
|    actor_loss      | -108     |
|    critic_loss     | 553      |
|    learning_rate   | 0.001    |
|    n_updates       | 4691     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 66.7     |
| time/              |          |
|    episodes        | 4796     |
|    fps             | 16       |
|    time_elapsed    | 284      |
|    total timesteps | 4796     |
| train/             |          |
|    actor_loss      | -102     |
|    critic_loss     | 202      |
|    learning_rate   | 0.001    |
|    n_updates       | 4695     |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 68.7     |
| time/              |          |
|    episodes        | 4860     |
|    fps             | 16       |
|    time_elapsed    | 288      |
|    total timesteps | 4860     |
| train/             |          |
|    actor_loss      | -104     |
|    critic_loss     | 616      |
|    learning_rate   | 0.001    |
|    n_updates       | 4759     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 66.7     |
| time/              |          |
|    episodes        | 4864     |
|    fps             | 16       |
|    time_elapsed    | 289      |
|    total timesteps | 4864     |
| train/             |          |
|    actor_loss      | -101     |
|    critic_loss     | 368      |
|    learning_rate   | 0.001    |
|    n_updates       | 4763     |
--------------