# Distribution of influences

To get a grip on how much randomness influences the results, we run the same model lots of times and compare the results.

In [1]:
# for Colab, install fin_benefits and unemployment-gym from Github
#!pip install -q git+https://github.com/ajtanskanen/benefits.git  
#!pip install -q git+https://github.com/ajtanskanen/econogym.git
#!pip install -q git+https://github.com/ajtanskanen/lifecycle-rl.git

# and then restart kernel
  
  # For a specific version:
#!pip install tensorflow==1.15
#!pip install stable-baselines==2.8
  
# restart kernel after running pip's

Then load all modules and set parameters for simulations.

In [4]:
import numpy as np
import matplotlib.pyplot as plt
from lifecycle_rl import Lifecycle

%matplotlib inline
%pylab inline

# varoitukset piiloon (Stable baseline ei ole vielä Tensorflow 2.0-yhteensopiva, ja Tensorflow 1.15 valittaa paljon)
# ei taida toimia piilottaminen
import warnings
warnings.filterwarnings('ignore')

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Populating the interactive namespace from numpy and matplotlib


In [7]:
# parameters for the simulation
# episode = 51 / 205 timesteps (1y/3m timestep)
pop_size=10_000 # size of the population to be simulated
size1=10_000_000 #0_000 # number of timesteps in phase 1 training (callback not used)
size2=100 #0_000 # number of timesteps in phase 2 training (callback is used to save the best results)
size3=100 # number of timesteps in phase 1 training (callback not used) for policy changes
batch1=1 # size of minibatch in phase 1 as number of episodes
batch2=9_00  # size of minibatch in phase 1 as number of episodes
callback_minsteps=batch2 # how many episodes callback needs 
deterministic=False # use deterministic prediction (True) or probabilitic prediction (False)
mortality=False # include mortality in computations
randomness=True # include externally given, random state-transitions (parental leaves, disability, lay-offs) 
pinkslip=True # include lay-offs at 5 percent level each year
rlmodel='acktr' # use ACKTR algorithm
twostage=False # ajataan kahdessa vaiheessa vai ei
perusmalli='best/malli_perus3'

# Baseline

Lasketaan työllisyysasteet nykymallissa.

In [9]:
cc1=Lifecycle(env='unemployment-v1',minimal=False,mortality=mortality,perustulo=False,
              randomness=randomness,pinkslip=pinkslip,plotdebug=False)
cc1.explain()
cc1.run_distrib(n=30,debug=False,steps1=size1,steps2=size2,pop=pop_size,deterministic=deterministic,
                train=True,predict=True,batch1=batch1,batch2=batch2,
                save=perusmalli,plot=True,cont=True,start_from=perusmalli,results='results/distrib_base',
                callback_minsteps=callback_minsteps,rlmodel=rlmodel,twostage=twostage)

No mortality included
Parameters of lifecycle:
timestep 0.25
gamma 0.9793703613355593 (0.9200000000000003 per anno)
min_age 20
max_age 70
min_retirementage 63.5
max_retirementage 68.5
ansiopvraha_kesto300 None
ansiopvraha_kesto400 None
ansiopvraha_toe None
perustulo False
karenssi_kesto 0.25
mortality False
randomness True
include_putki None
include_pinkslip True
step 0.25

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...

---------------------------------
| explained_variance | 0.942    |
| fps                | 1487     |
| nupdates           | 1        |
| policy_entropy     | 0.84     |
| policy_loss        | -0.175   |
| total_timesteps    | 0        |
| value_loss         | 0.931    |
---------------------------------


---------------------------------
| explained_variance | 0.942    |
| fps                | 2555     |
| nupdates           | 10       |
| policy_entropy     | 0.784    |
| policy_loss        | 0.206    |
| total_timesteps    | 21825    |
| valu

---------------------------------
| explained_variance | 0.973    |
| fps                | 1998     |
| nupdates           | 240      |
| policy_entropy     | 0.846    |
| policy_loss        | 0.13     |
| total_timesteps    | 579575   |
| value_loss         | 0.354    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 1998     |
| nupdates           | 250      |
| policy_entropy     | 0.889    |
| policy_loss        | 0.0634   |
| total_timesteps    | 603825   |
| value_loss         | 0.207    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 1997     |
| nupdates           | 260      |
| policy_entropy     | 0.953    |
| policy_loss        | -0.029   |
| total_timesteps    | 628075   |
| value_loss         | 0.367    |
---------------------------------
---------------------------------
| explained_variance | 0.946    |
| fps         

---------------------------------
| explained_variance | 0.968    |
| fps                | 2006     |
| nupdates           | 510      |
| policy_entropy     | 0.773    |
| policy_loss        | -0.0727  |
| total_timesteps    | 1234325  |
| value_loss         | 0.447    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2007     |
| nupdates           | 520      |
| policy_entropy     | 0.839    |
| policy_loss        | -0.126   |
| total_timesteps    | 1258575  |
| value_loss         | 0.568    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2007     |
| nupdates           | 530      |
| policy_entropy     | 0.824    |
| policy_loss        | -0.00129 |
| total_timesteps    | 1282825  |
| value_loss         | 0.257    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.975    |
| fps                | 2017     |
| nupdates           | 780      |
| policy_entropy     | 0.89     |
| policy_loss        | -0.0497  |
| total_timesteps    | 1889075  |
| value_loss         | 0.344    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps                | 2018     |
| nupdates           | 790      |
| policy_entropy     | 0.775    |
| policy_loss        | 0.111    |
| total_timesteps    | 1913325  |
| value_loss         | 0.155    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps                | 2018     |
| nupdates           | 800      |
| policy_entropy     | 0.867    |
| policy_loss        | 0.0216   |
| total_timesteps    | 1937575  |
| value_loss         | 0.349    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps         

---------------------------------
| explained_variance | 0.942    |
| fps                | 2026     |
| nupdates           | 1050     |
| policy_entropy     | 0.899    |
| policy_loss        | -0.0671  |
| total_timesteps    | 2543825  |
| value_loss         | 0.434    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2026     |
| nupdates           | 1060     |
| policy_entropy     | 0.826    |
| policy_loss        | 0.0884   |
| total_timesteps    | 2568075  |
| value_loss         | 0.154    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2027     |
| nupdates           | 1070     |
| policy_entropy     | 0.887    |
| policy_loss        | -0.1     |
| total_timesteps    | 2592325  |
| value_loss         | 0.307    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps         

---------------------------------
| explained_variance | 0.959    |
| fps                | 2023     |
| nupdates           | 1320     |
| policy_entropy     | 0.87     |
| policy_loss        | -0.0822  |
| total_timesteps    | 3198575  |
| value_loss         | 0.452    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2023     |
| nupdates           | 1330     |
| policy_entropy     | 0.965    |
| policy_loss        | -0.164   |
| total_timesteps    | 3222825  |
| value_loss         | 0.402    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps                | 2023     |
| nupdates           | 1340     |
| policy_entropy     | 0.908    |
| policy_loss        | 0.00426  |
| total_timesteps    | 3247075  |
| value_loss         | 0.542    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps         

---------------------------------
| explained_variance | 0.972    |
| fps                | 2028     |
| nupdates           | 1590     |
| policy_entropy     | 0.922    |
| policy_loss        | -0.0432  |
| total_timesteps    | 3853325  |
| value_loss         | 0.381    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2028     |
| nupdates           | 1600     |
| policy_entropy     | 0.939    |
| policy_loss        | 0.0871   |
| total_timesteps    | 3877575  |
| value_loss         | 0.231    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2028     |
| nupdates           | 1610     |
| policy_entropy     | 0.913    |
| policy_loss        | 0.0452   |
| total_timesteps    | 3901825  |
| value_loss         | 0.259    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

---------------------------------
| explained_variance | 0.965    |
| fps                | 2034     |
| nupdates           | 1860     |
| policy_entropy     | 0.926    |
| policy_loss        | -0.0868  |
| total_timesteps    | 4508075  |
| value_loss         | 0.622    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2034     |
| nupdates           | 1870     |
| policy_entropy     | 0.886    |
| policy_loss        | 0.134    |
| total_timesteps    | 4532325  |
| value_loss         | 0.25     |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps                | 2035     |
| nupdates           | 1880     |
| policy_entropy     | 0.882    |
| policy_loss        | -0.0212  |
| total_timesteps    | 4556575  |
| value_loss         | 0.277    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2040     |
| nupdates           | 2130     |
| policy_entropy     | 0.872    |
| policy_loss        | 0.0874   |
| total_timesteps    | 5162825  |
| value_loss         | 0.245    |
---------------------------------
---------------------------------
| explained_variance | 0.948    |
| fps                | 2040     |
| nupdates           | 2140     |
| policy_entropy     | 0.815    |
| policy_loss        | -0.176   |
| total_timesteps    | 5187075  |
| value_loss         | 0.588    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2041     |
| nupdates           | 2150     |
| policy_entropy     | 0.907    |
| policy_loss        | 0.0269   |
| total_timesteps    | 5211325  |
| value_loss         | 0.138    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2045     |
| nupdates           | 2400     |
| policy_entropy     | 0.933    |
| policy_loss        | -0.0886  |
| total_timesteps    | 5817575  |
| value_loss         | 0.418    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2045     |
| nupdates           | 2410     |
| policy_entropy     | 0.991    |
| policy_loss        | -0.0761  |
| total_timesteps    | 5841825  |
| value_loss         | 0.433    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2046     |
| nupdates           | 2420     |
| policy_entropy     | 0.913    |
| policy_loss        | -0.166   |
| total_timesteps    | 5866075  |
| value_loss         | 0.531    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps         

---------------------------------
| explained_variance | 0.95     |
| fps                | 2049     |
| nupdates           | 2670     |
| policy_entropy     | 0.894    |
| policy_loss        | -0.145   |
| total_timesteps    | 6472325  |
| value_loss         | 0.682    |
---------------------------------
----------------------------------
| explained_variance | 0.979     |
| fps                | 2049      |
| nupdates           | 2680      |
| policy_entropy     | 0.933     |
| policy_loss        | -8.41e-05 |
| total_timesteps    | 6496575   |
| value_loss         | 0.343     |
----------------------------------
---------------------------------
| explained_variance | 0.993    |
| fps                | 2049     |
| nupdates           | 2690     |
| policy_entropy     | 0.899    |
| policy_loss        | 0.0905   |
| total_timesteps    | 6520825  |
| value_loss         | 0.174    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps

---------------------------------
| explained_variance | 0.984    |
| fps                | 2051     |
| nupdates           | 2940     |
| policy_entropy     | 0.879    |
| policy_loss        | -0.0081  |
| total_timesteps    | 7127075  |
| value_loss         | 0.218    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2051     |
| nupdates           | 2950     |
| policy_entropy     | 0.865    |
| policy_loss        | 0.0381   |
| total_timesteps    | 7151325  |
| value_loss         | 0.166    |
---------------------------------
---------------------------------
| explained_variance | 0.943    |
| fps                | 2051     |
| nupdates           | 2960     |
| policy_entropy     | 0.874    |
| policy_loss        | -0.213   |
| total_timesteps    | 7175575  |
| value_loss         | 0.746    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps         

---------------------------------
| explained_variance | 0.952    |
| fps                | 2051     |
| nupdates           | 3210     |
| policy_entropy     | 0.941    |
| policy_loss        | -0.171   |
| total_timesteps    | 7781825  |
| value_loss         | 0.572    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2051     |
| nupdates           | 3220     |
| policy_entropy     | 0.931    |
| policy_loss        | -0.0175  |
| total_timesteps    | 7806075  |
| value_loss         | 0.259    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2051     |
| nupdates           | 3230     |
| policy_entropy     | 0.923    |
| policy_loss        | 0.108    |
| total_timesteps    | 7830325  |
| value_loss         | 0.171    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps         

---------------------------------
| explained_variance | 0.933    |
| fps                | 2049     |
| nupdates           | 3480     |
| policy_entropy     | 0.93     |
| policy_loss        | -0.267   |
| total_timesteps    | 8436575  |
| value_loss         | 0.818    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2049     |
| nupdates           | 3490     |
| policy_entropy     | 0.858    |
| policy_loss        | 0.0441   |
| total_timesteps    | 8460825  |
| value_loss         | 0.421    |
---------------------------------
---------------------------------
| explained_variance | 0.959    |
| fps                | 2049     |
| nupdates           | 3500     |
| policy_entropy     | 0.881    |
| policy_loss        | -0.0856  |
| total_timesteps    | 8485075  |
| value_loss         | 0.554    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.981    |
| fps                | 2044     |
| nupdates           | 3750     |
| policy_entropy     | 0.876    |
| policy_loss        | -0.104   |
| total_timesteps    | 9091325  |
| value_loss         | 0.343    |
---------------------------------
---------------------------------
| explained_variance | 0.993    |
| fps                | 2044     |
| nupdates           | 3760     |
| policy_entropy     | 0.874    |
| policy_loss        | 0.109    |
| total_timesteps    | 9115575  |
| value_loss         | 0.124    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2043     |
| nupdates           | 3770     |
| policy_entropy     | 0.925    |
| policy_loss        | 0.00451  |
| total_timesteps    | 9139825  |
| value_loss         | 0.354    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps         

---------------------------------
| explained_variance | 0.96     |
| fps                | 2034     |
| nupdates           | 4020     |
| policy_entropy     | 0.886    |
| policy_loss        | -0.101   |
| total_timesteps    | 9746075  |
| value_loss         | 0.497    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2034     |
| nupdates           | 4030     |
| policy_entropy     | 0.899    |
| policy_loss        | -0.122   |
| total_timesteps    | 9770325  |
| value_loss         | 0.435    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps                | 2033     |
| nupdates           | 4040     |
| policy_entropy     | 0.883    |
| policy_loss        | -0.00546 |
| total_timesteps    | 9794575  |
| value_loss         | 0.548    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.956    |
| fps                | 1536     |
| nupdates           | 1        |
| policy_entropy     | 0.89     |
| policy_loss        | -0.103   |
| total_timesteps    | 0        |
| value_loss         | 0.521    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2572     |
| nupdates           | 10       |
| policy_entropy     | 0.759    |
| policy_loss        | 0.543    |
| total_timesteps    | 21825    |
| value_loss         | 1.09     |
---------------------------------
---------------------------------
| explained_variance | 0.932    |
| fps                | 2238     |
| nupdates           | 20       |
| policy_entropy     | 0.851    |
| policy_loss        | -1.15    |
| total_timesteps    | 46075    |
| value_loss         | 2.7      |
---------------------------------
------------

---------------------------------
| explained_variance | 0.972    |
| fps                | 2024     |
| nupdates           | 270      |
| policy_entropy     | 0.861    |
| policy_loss        | -0.00567 |
| total_timesteps    | 652325   |
| value_loss         | 0.341    |
---------------------------------
---------------------------------
| explained_variance | 0.942    |
| fps                | 2024     |
| nupdates           | 280      |
| policy_entropy     | 0.889    |
| policy_loss        | -0.145   |
| total_timesteps    | 676575   |
| value_loss         | 0.599    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2024     |
| nupdates           | 290      |
| policy_entropy     | 0.861    |
| policy_loss        | -0.0584  |
| total_timesteps    | 700825   |
| value_loss         | 0.386    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps         

---------------------------------
| explained_variance | 0.968    |
| fps                | 2023     |
| nupdates           | 540      |
| policy_entropy     | 0.854    |
| policy_loss        | -0.0496  |
| total_timesteps    | 1307075  |
| value_loss         | 0.353    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2023     |
| nupdates           | 550      |
| policy_entropy     | 0.843    |
| policy_loss        | 0.0288   |
| total_timesteps    | 1331325  |
| value_loss         | 0.333    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2023     |
| nupdates           | 560      |
| policy_entropy     | 0.813    |
| policy_loss        | -0.0753  |
| total_timesteps    | 1355575  |
| value_loss         | 0.488    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps         

---------------------------------
| explained_variance | 0.966    |
| fps                | 2026     |
| nupdates           | 810      |
| policy_entropy     | 0.915    |
| policy_loss        | -0.0098  |
| total_timesteps    | 1961825  |
| value_loss         | 0.539    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2026     |
| nupdates           | 820      |
| policy_entropy     | 0.871    |
| policy_loss        | -0.0173  |
| total_timesteps    | 1986075  |
| value_loss         | 0.337    |
---------------------------------
---------------------------------
| explained_variance | 0.955    |
| fps                | 2026     |
| nupdates           | 830      |
| policy_entropy     | 0.943    |
| policy_loss        | -0.0765  |
| total_timesteps    | 2010325  |
| value_loss         | 0.34     |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.95     |
| fps                | 2029     |
| nupdates           | 1080     |
| policy_entropy     | 0.807    |
| policy_loss        | -0.0112  |
| total_timesteps    | 2616575  |
| value_loss         | 0.816    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2029     |
| nupdates           | 1090     |
| policy_entropy     | 0.829    |
| policy_loss        | -0.0536  |
| total_timesteps    | 2640825  |
| value_loss         | 0.322    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2029     |
| nupdates           | 1100     |
| policy_entropy     | 0.921    |
| policy_loss        | -0.0415  |
| total_timesteps    | 2665075  |
| value_loss         | 0.313    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps         

---------------------------------
| explained_variance | 0.984    |
| fps                | 2033     |
| nupdates           | 1350     |
| policy_entropy     | 0.857    |
| policy_loss        | 0.0148   |
| total_timesteps    | 3271325  |
| value_loss         | 0.2      |
---------------------------------
---------------------------------
| explained_variance | 0.955    |
| fps                | 2033     |
| nupdates           | 1360     |
| policy_entropy     | 0.977    |
| policy_loss        | -0.102   |
| total_timesteps    | 3295575  |
| value_loss         | 0.568    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2033     |
| nupdates           | 1370     |
| policy_entropy     | 0.91     |
| policy_loss        | -0.0606  |
| total_timesteps    | 3319825  |
| value_loss         | 0.494    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps         

---------------------------------
| explained_variance | 0.975    |
| fps                | 2036     |
| nupdates           | 1620     |
| policy_entropy     | 0.852    |
| policy_loss        | -0.157   |
| total_timesteps    | 3926075  |
| value_loss         | 0.432    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps                | 2037     |
| nupdates           | 1630     |
| policy_entropy     | 0.985    |
| policy_loss        | -0.0269  |
| total_timesteps    | 3950325  |
| value_loss         | 0.412    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2037     |
| nupdates           | 1640     |
| policy_entropy     | 0.866    |
| policy_loss        | 0.2      |
| total_timesteps    | 3974575  |
| value_loss         | 0.39     |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps         

---------------------------------
| explained_variance | 0.962    |
| fps                | 2040     |
| nupdates           | 1890     |
| policy_entropy     | 0.874    |
| policy_loss        | -0.0526  |
| total_timesteps    | 4580825  |
| value_loss         | 0.454    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2040     |
| nupdates           | 1900     |
| policy_entropy     | 0.912    |
| policy_loss        | -0.0873  |
| total_timesteps    | 4605075  |
| value_loss         | 0.315    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2040     |
| nupdates           | 1910     |
| policy_entropy     | 0.885    |
| policy_loss        | 0.139    |
| total_timesteps    | 4629325  |
| value_loss         | 0.132    |
---------------------------------
---------------------------------
| explained_variance | 0.942    |
| fps         

---------------------------------
| explained_variance | 0.967    |
| fps                | 2043     |
| nupdates           | 2160     |
| policy_entropy     | 0.824    |
| policy_loss        | 0.000496 |
| total_timesteps    | 5235575  |
| value_loss         | 0.396    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2043     |
| nupdates           | 2170     |
| policy_entropy     | 0.896    |
| policy_loss        | -0.0917  |
| total_timesteps    | 5259825  |
| value_loss         | 0.341    |
---------------------------------
---------------------------------
| explained_variance | 0.952    |
| fps                | 2044     |
| nupdates           | 2180     |
| policy_entropy     | 0.817    |
| policy_loss        | -0.172   |
| total_timesteps    | 5284075  |
| value_loss         | 0.524    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps         

---------------------------------
| explained_variance | 0.955    |
| fps                | 2047     |
| nupdates           | 2430     |
| policy_entropy     | 0.947    |
| policy_loss        | -0.141   |
| total_timesteps    | 5890325  |
| value_loss         | 0.72     |
---------------------------------
---------------------------------
| explained_variance | 0.949    |
| fps                | 2047     |
| nupdates           | 2440     |
| policy_entropy     | 0.94     |
| policy_loss        | -0.168   |
| total_timesteps    | 5914575  |
| value_loss         | 0.593    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2047     |
| nupdates           | 2450     |
| policy_entropy     | 0.892    |
| policy_loss        | 0.0278   |
| total_timesteps    | 5938825  |
| value_loss         | 0.25     |
---------------------------------
---------------------------------
| explained_variance | 0.939    |
| fps         

---------------------------------
| explained_variance | 0.987    |
| fps                | 2049     |
| nupdates           | 2700     |
| policy_entropy     | 0.808    |
| policy_loss        | 0.0234   |
| total_timesteps    | 6545075  |
| value_loss         | 0.215    |
---------------------------------
---------------------------------
| explained_variance | 0.95     |
| fps                | 2049     |
| nupdates           | 2710     |
| policy_entropy     | 0.895    |
| policy_loss        | -0.166   |
| total_timesteps    | 6569325  |
| value_loss         | 0.601    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2049     |
| nupdates           | 2720     |
| policy_entropy     | 0.892    |
| policy_loss        | 0.037    |
| total_timesteps    | 6593575  |
| value_loss         | 0.386    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.978    |
| fps                | 2050     |
| nupdates           | 2970     |
| policy_entropy     | 0.857    |
| policy_loss        | 0.0432   |
| total_timesteps    | 7199825  |
| value_loss         | 0.275    |
---------------------------------
---------------------------------
| explained_variance | 0.947    |
| fps                | 2050     |
| nupdates           | 2980     |
| policy_entropy     | 0.937    |
| policy_loss        | -0.141   |
| total_timesteps    | 7224075  |
| value_loss         | 0.487    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps                | 2050     |
| nupdates           | 2990     |
| policy_entropy     | 0.937    |
| policy_loss        | -0.00543 |
| total_timesteps    | 7248325  |
| value_loss         | 0.381    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.967    |
| fps                | 2050     |
| nupdates           | 3240     |
| policy_entropy     | 0.97     |
| policy_loss        | -0.144   |
| total_timesteps    | 7854575  |
| value_loss         | 0.48     |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2049     |
| nupdates           | 3250     |
| policy_entropy     | 0.918    |
| policy_loss        | 0.0256   |
| total_timesteps    | 7878825  |
| value_loss         | 0.215    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2049     |
| nupdates           | 3260     |
| policy_entropy     | 0.896    |
| policy_loss        | 0.0116   |
| total_timesteps    | 7903075  |
| value_loss         | 0.277    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.992    |
| fps                | 2047     |
| nupdates           | 3510     |
| policy_entropy     | 0.845    |
| policy_loss        | 0.0753   |
| total_timesteps    | 8509325  |
| value_loss         | 0.143    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2047     |
| nupdates           | 3520     |
| policy_entropy     | 0.84     |
| policy_loss        | 0.0297   |
| total_timesteps    | 8533575  |
| value_loss         | 0.3      |
---------------------------------
---------------------------------
| explained_variance | 0.949    |
| fps                | 2047     |
| nupdates           | 3530     |
| policy_entropy     | 0.856    |
| policy_loss        | -0.187   |
| total_timesteps    | 8557825  |
| value_loss         | 0.646    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps         

---------------------------------
| explained_variance | 0.987    |
| fps                | 2043     |
| nupdates           | 3780     |
| policy_entropy     | 0.84     |
| policy_loss        | 0.0566   |
| total_timesteps    | 9164075  |
| value_loss         | 0.194    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2043     |
| nupdates           | 3790     |
| policy_entropy     | 0.918    |
| policy_loss        | -0.0159  |
| total_timesteps    | 9188325  |
| value_loss         | 0.448    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2043     |
| nupdates           | 3800     |
| policy_entropy     | 0.931    |
| policy_loss        | 0.0558   |
| total_timesteps    | 9212575  |
| value_loss         | 0.154    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2036     |
| nupdates           | 4050     |
| policy_entropy     | 0.914    |
| policy_loss        | -0.0108  |
| total_timesteps    | 9818825  |
| value_loss         | 0.348    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2036     |
| nupdates           | 4060     |
| policy_entropy     | 0.878    |
| policy_loss        | -0.0325  |
| total_timesteps    | 9843075  |
| value_loss         | 0.389    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2035     |
| nupdates           | 4070     |
| policy_entropy     | 0.859    |
| policy_loss        | 0.0673   |
| total_timesteps    | 9867325  |
| value_loss         | 0.241    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.982    |
| fps                | 1555     |
| nupdates           | 1        |
| policy_entropy     | 0.952    |
| policy_loss        | -0.0784  |
| total_timesteps    | 0        |
| value_loss         | 0.316    |
---------------------------------
---------------------------------
| explained_variance | 0.944    |
| fps                | 2567     |
| nupdates           | 10       |
| policy_entropy     | 0.946    |
| policy_loss        | -2       |
| total_timesteps    | 21825    |
| value_loss         | 5.62     |
---------------------------------
---------------------------------
| explained_variance | 0.891    |
| fps                | 2250     |
| nupdates           | 20       |
| policy_entropy     | 0.86     |
| policy_loss        | 2.47     |
| total_timesteps    | 46075    |
| value_loss         | 9.77     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.959    |
| fps                | 2030     |
| nupdates           | 270      |
| policy_entropy     | 0.932    |
| policy_loss        | -0.108   |
| total_timesteps    | 652325   |
| value_loss         | 0.478    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2029     |
| nupdates           | 280      |
| policy_entropy     | 0.91     |
| policy_loss        | 0.0752   |
| total_timesteps    | 676575   |
| value_loss         | 0.209    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2029     |
| nupdates           | 290      |
| policy_entropy     | 0.868    |
| policy_loss        | 0.00875  |
| total_timesteps    | 700825   |
| value_loss         | 0.378    |
---------------------------------
---------------------------------
| explained_variance | 0.96     |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2030     |
| nupdates           | 540      |
| policy_entropy     | 0.894    |
| policy_loss        | 0.0695   |
| total_timesteps    | 1307075  |
| value_loss         | 0.112    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2030     |
| nupdates           | 550      |
| policy_entropy     | 0.933    |
| policy_loss        | -0.0542  |
| total_timesteps    | 1331325  |
| value_loss         | 0.389    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2030     |
| nupdates           | 560      |
| policy_entropy     | 0.93     |
| policy_loss        | -0.0181  |
| total_timesteps    | 1355575  |
| value_loss         | 0.383    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps         

---------------------------------
| explained_variance | 0.975    |
| fps                | 2033     |
| nupdates           | 810      |
| policy_entropy     | 0.914    |
| policy_loss        | 0.0694   |
| total_timesteps    | 1961825  |
| value_loss         | 0.294    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2033     |
| nupdates           | 820      |
| policy_entropy     | 0.944    |
| policy_loss        | -0.192   |
| total_timesteps    | 1986075  |
| value_loss         | 0.362    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2034     |
| nupdates           | 830      |
| policy_entropy     | 0.944    |
| policy_loss        | -0.0221  |
| total_timesteps    | 2010325  |
| value_loss         | 0.291    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.991    |
| fps                | 2037     |
| nupdates           | 1080     |
| policy_entropy     | 0.899    |
| policy_loss        | 0.0313   |
| total_timesteps    | 2616575  |
| value_loss         | 0.173    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2037     |
| nupdates           | 1090     |
| policy_entropy     | 0.857    |
| policy_loss        | -0.0198  |
| total_timesteps    | 2640825  |
| value_loss         | 0.24     |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2038     |
| nupdates           | 1100     |
| policy_entropy     | 0.903    |
| policy_loss        | 0.0162   |
| total_timesteps    | 2665075  |
| value_loss         | 0.404    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps         

---------------------------------
| explained_variance | 0.995    |
| fps                | 2041     |
| nupdates           | 1350     |
| policy_entropy     | 0.92     |
| policy_loss        | 0.0307   |
| total_timesteps    | 3271325  |
| value_loss         | 0.0853   |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2041     |
| nupdates           | 1360     |
| policy_entropy     | 0.91     |
| policy_loss        | -0.00907 |
| total_timesteps    | 3295575  |
| value_loss         | 0.301    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2041     |
| nupdates           | 1370     |
| policy_entropy     | 0.908    |
| policy_loss        | 0.0675   |
| total_timesteps    | 3319825  |
| value_loss         | 0.153    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.969    |
| fps                | 2045     |
| nupdates           | 1620     |
| policy_entropy     | 0.89     |
| policy_loss        | 0.083    |
| total_timesteps    | 3926075  |
| value_loss         | 0.408    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2045     |
| nupdates           | 1630     |
| policy_entropy     | 0.953    |
| policy_loss        | -0.0206  |
| total_timesteps    | 3950325  |
| value_loss         | 0.337    |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps                | 2045     |
| nupdates           | 1640     |
| policy_entropy     | 0.892    |
| policy_loss        | 0.046    |
| total_timesteps    | 3974575  |
| value_loss         | 0.563    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps         

---------------------------------
| explained_variance | 0.986    |
| fps                | 2048     |
| nupdates           | 1890     |
| policy_entropy     | 0.925    |
| policy_loss        | 0.00825  |
| total_timesteps    | 4580825  |
| value_loss         | 0.224    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2048     |
| nupdates           | 1900     |
| policy_entropy     | 0.893    |
| policy_loss        | -0.162   |
| total_timesteps    | 4605075  |
| value_loss         | 0.534    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2048     |
| nupdates           | 1910     |
| policy_entropy     | 0.953    |
| policy_loss        | -0.0213  |
| total_timesteps    | 4629325  |
| value_loss         | 0.269    |
---------------------------------
---------------------------------
| explained_variance | 0.916    |
| fps         

---------------------------------
| explained_variance | 0.986    |
| fps                | 2051     |
| nupdates           | 2160     |
| policy_entropy     | 0.886    |
| policy_loss        | -0.0261  |
| total_timesteps    | 5235575  |
| value_loss         | 0.262    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2052     |
| nupdates           | 2170     |
| policy_entropy     | 0.864    |
| policy_loss        | 0.0714   |
| total_timesteps    | 5259825  |
| value_loss         | 0.142    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2052     |
| nupdates           | 2180     |
| policy_entropy     | 0.92     |
| policy_loss        | -0.022   |
| total_timesteps    | 5284075  |
| value_loss         | 0.325    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps         

---------------------------------
| explained_variance | 0.985    |
| fps                | 2054     |
| nupdates           | 2430     |
| policy_entropy     | 0.841    |
| policy_loss        | 0.0154   |
| total_timesteps    | 5890325  |
| value_loss         | 0.246    |
---------------------------------
---------------------------------
| explained_variance | 0.96     |
| fps                | 2054     |
| nupdates           | 2440     |
| policy_entropy     | 0.927    |
| policy_loss        | -0.0593  |
| total_timesteps    | 5914575  |
| value_loss         | 0.516    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2054     |
| nupdates           | 2450     |
| policy_entropy     | 0.869    |
| policy_loss        | -0.0512  |
| total_timesteps    | 5938825  |
| value_loss         | 0.348    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.968    |
| fps                | 2055     |
| nupdates           | 2700     |
| policy_entropy     | 0.839    |
| policy_loss        | -0.0422  |
| total_timesteps    | 6545075  |
| value_loss         | 0.381    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2055     |
| nupdates           | 2710     |
| policy_entropy     | 0.849    |
| policy_loss        | 0.0694   |
| total_timesteps    | 6569325  |
| value_loss         | 0.5      |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2055     |
| nupdates           | 2720     |
| policy_entropy     | 0.834    |
| policy_loss        | 0.0516   |
| total_timesteps    | 6593575  |
| value_loss         | 0.28     |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2055     |
| nupdates           | 2970     |
| policy_entropy     | 0.898    |
| policy_loss        | -0.0397  |
| total_timesteps    | 7199825  |
| value_loss         | 0.265    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2054     |
| nupdates           | 2980     |
| policy_entropy     | 0.942    |
| policy_loss        | 0.0992   |
| total_timesteps    | 7224075  |
| value_loss         | 0.214    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2054     |
| nupdates           | 2990     |
| policy_entropy     | 0.917    |
| policy_loss        | -0.201   |
| total_timesteps    | 7248325  |
| value_loss         | 0.531    |
---------------------------------
---------------------------------
| explained_variance | 0.955    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 2052     |
| nupdates           | 3240     |
| policy_entropy     | 0.863    |
| policy_loss        | -0.0559  |
| total_timesteps    | 7854575  |
| value_loss         | 0.3      |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2052     |
| nupdates           | 3250     |
| policy_entropy     | 0.886    |
| policy_loss        | 0.0276   |
| total_timesteps    | 7878825  |
| value_loss         | 0.41     |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2052     |
| nupdates           | 3260     |
| policy_entropy     | 0.886    |
| policy_loss        | 0.0754   |
| total_timesteps    | 7903075  |
| value_loss         | 0.293    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps         

---------------------------------
| explained_variance | 0.987    |
| fps                | 2048     |
| nupdates           | 3510     |
| policy_entropy     | 0.856    |
| policy_loss        | 0.0614   |
| total_timesteps    | 8509325  |
| value_loss         | 0.151    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2047     |
| nupdates           | 3520     |
| policy_entropy     | 0.921    |
| policy_loss        | -0.0127  |
| total_timesteps    | 8533575  |
| value_loss         | 0.333    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2047     |
| nupdates           | 3530     |
| policy_entropy     | 0.946    |
| policy_loss        | -0.0851  |
| total_timesteps    | 8557825  |
| value_loss         | 0.261    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2039     |
| nupdates           | 3780     |
| policy_entropy     | 0.925    |
| policy_loss        | -0.075   |
| total_timesteps    | 9164075  |
| value_loss         | 0.413    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2039     |
| nupdates           | 3790     |
| policy_entropy     | 0.913    |
| policy_loss        | -0.0275  |
| total_timesteps    | 9188325  |
| value_loss         | 0.245    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2038     |
| nupdates           | 3800     |
| policy_entropy     | 0.887    |
| policy_loss        | -0.08    |
| total_timesteps    | 9212575  |
| value_loss         | 0.3      |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.971    |
| fps                | 2027     |
| nupdates           | 4050     |
| policy_entropy     | 0.853    |
| policy_loss        | 0.0604   |
| total_timesteps    | 9818825  |
| value_loss         | 0.293    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2026     |
| nupdates           | 4060     |
| policy_entropy     | 0.906    |
| policy_loss        | -0.0167  |
| total_timesteps    | 9843075  |
| value_loss         | 0.271    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2026     |
| nupdates           | 4070     |
| policy_entropy     | 0.906    |
| policy_loss        | 0.017    |
| total_timesteps    | 9867325  |
| value_loss         | 0.25     |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.933    |
| fps                | 1568     |
| nupdates           | 1        |
| policy_entropy     | 0.918    |
| policy_loss        | -0.07    |
| total_timesteps    | 0        |
| value_loss         | 0.563    |
---------------------------------
---------------------------------
| explained_variance | 0.922    |
| fps                | 2593     |
| nupdates           | 10       |
| policy_entropy     | 0.898    |
| policy_loss        | 1.69     |
| total_timesteps    | 21825    |
| value_loss         | 4.35     |
---------------------------------
---------------------------------
| explained_variance | 0.959    |
| fps                | 2256     |
| nupdates           | 20       |
| policy_entropy     | 0.811    |
| policy_loss        | -0.401   |
| total_timesteps    | 46075    |
| value_loss         | 0.959    |
---------------------------------
------------

---------------------------------
| explained_variance | 0.972    |
| fps                | 2034     |
| nupdates           | 270      |
| policy_entropy     | 0.806    |
| policy_loss        | 0.0182   |
| total_timesteps    | 652325   |
| value_loss         | 0.316    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2034     |
| nupdates           | 280      |
| policy_entropy     | 0.862    |
| policy_loss        | -0.0406  |
| total_timesteps    | 676575   |
| value_loss         | 0.523    |
---------------------------------
---------------------------------
| explained_variance | 0.952    |
| fps                | 2034     |
| nupdates           | 290      |
| policy_entropy     | 0.859    |
| policy_loss        | 0.0233   |
| total_timesteps    | 700825   |
| value_loss         | 0.505    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

---------------------------------
| explained_variance | 0.957    |
| fps                | 2034     |
| nupdates           | 540      |
| policy_entropy     | 0.892    |
| policy_loss        | -0.0165  |
| total_timesteps    | 1307075  |
| value_loss         | 0.383    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2034     |
| nupdates           | 550      |
| policy_entropy     | 0.951    |
| policy_loss        | 0.037    |
| total_timesteps    | 1331325  |
| value_loss         | 0.229    |
---------------------------------
---------------------------------
| explained_variance | 0.956    |
| fps                | 2034     |
| nupdates           | 560      |
| policy_entropy     | 0.893    |
| policy_loss        | -0.00997 |
| total_timesteps    | 1355575  |
| value_loss         | 0.553    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps         

---------------------------------
| explained_variance | 0.976    |
| fps                | 2036     |
| nupdates           | 810      |
| policy_entropy     | 0.88     |
| policy_loss        | 0.0494   |
| total_timesteps    | 1961825  |
| value_loss         | 0.263    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2036     |
| nupdates           | 820      |
| policy_entropy     | 0.867    |
| policy_loss        | 0.0878   |
| total_timesteps    | 1986075  |
| value_loss         | 0.433    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2036     |
| nupdates           | 830      |
| policy_entropy     | 0.867    |
| policy_loss        | -0.0296  |
| total_timesteps    | 2010325  |
| value_loss         | 0.615    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.984    |
| fps                | 2039     |
| nupdates           | 1080     |
| policy_entropy     | 0.931    |
| policy_loss        | -0.0135  |
| total_timesteps    | 2616575  |
| value_loss         | 0.243    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps                | 2039     |
| nupdates           | 1090     |
| policy_entropy     | 0.906    |
| policy_loss        | -0.00826 |
| total_timesteps    | 2640825  |
| value_loss         | 0.386    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2040     |
| nupdates           | 1100     |
| policy_entropy     | 0.929    |
| policy_loss        | 0.0427   |
| total_timesteps    | 2665075  |
| value_loss         | 0.374    |
---------------------------------
---------------------------------
| explained_variance | 0.935    |
| fps         

---------------------------------
| explained_variance | 0.966    |
| fps                | 2043     |
| nupdates           | 1350     |
| policy_entropy     | 0.899    |
| policy_loss        | -0.0469  |
| total_timesteps    | 3271325  |
| value_loss         | 0.331    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2043     |
| nupdates           | 1360     |
| policy_entropy     | 0.879    |
| policy_loss        | 0.049    |
| total_timesteps    | 3295575  |
| value_loss         | 0.359    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2043     |
| nupdates           | 1370     |
| policy_entropy     | 0.999    |
| policy_loss        | -0.0662  |
| total_timesteps    | 3319825  |
| value_loss         | 0.324    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

---------------------------------
| explained_variance | 0.961    |
| fps                | 2046     |
| nupdates           | 1620     |
| policy_entropy     | 0.947    |
| policy_loss        | -0.0225  |
| total_timesteps    | 3926075  |
| value_loss         | 0.43     |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2047     |
| nupdates           | 1630     |
| policy_entropy     | 0.911    |
| policy_loss        | 0.00247  |
| total_timesteps    | 3950325  |
| value_loss         | 0.237    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2047     |
| nupdates           | 1640     |
| policy_entropy     | 0.948    |
| policy_loss        | -0.139   |
| total_timesteps    | 3974575  |
| value_loss         | 0.522    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps         

---------------------------------
| explained_variance | 0.972    |
| fps                | 2049     |
| nupdates           | 1890     |
| policy_entropy     | 0.846    |
| policy_loss        | -0.0698  |
| total_timesteps    | 4580825  |
| value_loss         | 0.389    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2049     |
| nupdates           | 1900     |
| policy_entropy     | 0.896    |
| policy_loss        | 0.00474  |
| total_timesteps    | 4605075  |
| value_loss         | 0.316    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2049     |
| nupdates           | 1910     |
| policy_entropy     | 0.94     |
| policy_loss        | -0.0556  |
| total_timesteps    | 4629325  |
| value_loss         | 0.308    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.964    |
| fps                | 2051     |
| nupdates           | 2160     |
| policy_entropy     | 0.862    |
| policy_loss        | -0.0122  |
| total_timesteps    | 5235575  |
| value_loss         | 0.364    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2051     |
| nupdates           | 2170     |
| policy_entropy     | 0.968    |
| policy_loss        | 0.011    |
| total_timesteps    | 5259825  |
| value_loss         | 0.174    |
---------------------------------
---------------------------------
| explained_variance | 0.933    |
| fps                | 2051     |
| nupdates           | 2180     |
| policy_entropy     | 0.89     |
| policy_loss        | -0.148   |
| total_timesteps    | 5284075  |
| value_loss         | 0.744    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

---------------------------------
| explained_variance | 0.959    |
| fps                | 2053     |
| nupdates           | 2430     |
| policy_entropy     | 1        |
| policy_loss        | -0.111   |
| total_timesteps    | 5890325  |
| value_loss         | 0.479    |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps                | 2053     |
| nupdates           | 2440     |
| policy_entropy     | 0.951    |
| policy_loss        | -0.209   |
| total_timesteps    | 5914575  |
| value_loss         | 0.62     |
---------------------------------
---------------------------------
| explained_variance | 0.958    |
| fps                | 2053     |
| nupdates           | 2450     |
| policy_entropy     | 0.845    |
| policy_loss        | -0.105   |
| total_timesteps    | 5938825  |
| value_loss         | 0.563    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps         

---------------------------------
| explained_variance | 0.969    |
| fps                | 2055     |
| nupdates           | 2700     |
| policy_entropy     | 0.905    |
| policy_loss        | -0.0165  |
| total_timesteps    | 6545075  |
| value_loss         | 0.267    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2055     |
| nupdates           | 2710     |
| policy_entropy     | 0.944    |
| policy_loss        | -0.175   |
| total_timesteps    | 6569325  |
| value_loss         | 0.354    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2055     |
| nupdates           | 2720     |
| policy_entropy     | 0.917    |
| policy_loss        | -0.0193  |
| total_timesteps    | 6593575  |
| value_loss         | 0.433    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps         

---------------------------------
| explained_variance | 0.979    |
| fps                | 2055     |
| nupdates           | 2970     |
| policy_entropy     | 0.896    |
| policy_loss        | 0.0233   |
| total_timesteps    | 7199825  |
| value_loss         | 0.325    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2055     |
| nupdates           | 2980     |
| policy_entropy     | 0.943    |
| policy_loss        | -0.122   |
| total_timesteps    | 7224075  |
| value_loss         | 0.377    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2055     |
| nupdates           | 2990     |
| policy_entropy     | 0.98     |
| policy_loss        | -0.0647  |
| total_timesteps    | 7248325  |
| value_loss         | 0.275    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2053     |
| nupdates           | 3240     |
| policy_entropy     | 0.914    |
| policy_loss        | -0.0577  |
| total_timesteps    | 7854575  |
| value_loss         | 0.405    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2053     |
| nupdates           | 3250     |
| policy_entropy     | 1.02     |
| policy_loss        | -0.0351  |
| total_timesteps    | 7878825  |
| value_loss         | 0.235    |
---------------------------------
---------------------------------
| explained_variance | 0.997    |
| fps                | 2053     |
| nupdates           | 3260     |
| policy_entropy     | 0.842    |
| policy_loss        | 0.114    |
| total_timesteps    | 7903075  |
| value_loss         | 0.0959   |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2049     |
| nupdates           | 3510     |
| policy_entropy     | 0.841    |
| policy_loss        | 0.0296   |
| total_timesteps    | 8509325  |
| value_loss         | 0.277    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2049     |
| nupdates           | 3520     |
| policy_entropy     | 0.854    |
| policy_loss        | 0.0396   |
| total_timesteps    | 8533575  |
| value_loss         | 0.251    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2048     |
| nupdates           | 3530     |
| policy_entropy     | 0.88     |
| policy_loss        | -0.0197  |
| total_timesteps    | 8557825  |
| value_loss         | 0.28     |
---------------------------------
---------------------------------
| explained_variance | 0.913    |
| fps         

---------------------------------
| explained_variance | 0.987    |
| fps                | 2041     |
| nupdates           | 3780     |
| policy_entropy     | 0.898    |
| policy_loss        | 0.0264   |
| total_timesteps    | 9164075  |
| value_loss         | 0.207    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2040     |
| nupdates           | 3790     |
| policy_entropy     | 0.878    |
| policy_loss        | 0.0699   |
| total_timesteps    | 9188325  |
| value_loss         | 0.236    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2040     |
| nupdates           | 3800     |
| policy_entropy     | 0.875    |
| policy_loss        | 0.00749  |
| total_timesteps    | 9212575  |
| value_loss         | 0.269    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2029     |
| nupdates           | 4050     |
| policy_entropy     | 0.856    |
| policy_loss        | 0.0647   |
| total_timesteps    | 9818825  |
| value_loss         | 0.178    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2028     |
| nupdates           | 4060     |
| policy_entropy     | 0.889    |
| policy_loss        | 0.0315   |
| total_timesteps    | 9843075  |
| value_loss         | 0.293    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2027     |
| nupdates           | 4070     |
| policy_entropy     | 0.878    |
| policy_loss        | 0.0721   |
| total_timesteps    | 9867325  |
| value_loss         | 0.212    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.978    |
| fps                | 1534     |
| nupdates           | 1        |
| policy_entropy     | 0.864    |
| policy_loss        | -0.0337  |
| total_timesteps    | 0        |
| value_loss         | 0.358    |
---------------------------------
---------------------------------
| explained_variance | 0.837    |
| fps                | 2575     |
| nupdates           | 10       |
| policy_entropy     | 0.835    |
| policy_loss        | 2.05     |
| total_timesteps    | 21825    |
| value_loss         | 8.12     |
---------------------------------
---------------------------------
| explained_variance | 0.937    |
| fps                | 2250     |
| nupdates           | 20       |
| policy_entropy     | 0.842    |
| policy_loss        | -0.109   |
| total_timesteps    | 46075    |
| value_loss         | 0.994    |
---------------------------------
------------

---------------------------------
| explained_variance | 0.978    |
| fps                | 2035     |
| nupdates           | 270      |
| policy_entropy     | 0.934    |
| policy_loss        | -0.1     |
| total_timesteps    | 652325   |
| value_loss         | 0.358    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2035     |
| nupdates           | 280      |
| policy_entropy     | 0.87     |
| policy_loss        | 0.00749  |
| total_timesteps    | 676575   |
| value_loss         | 0.131    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps                | 2034     |
| nupdates           | 290      |
| policy_entropy     | 0.851    |
| policy_loss        | 0.13     |
| total_timesteps    | 700825   |
| value_loss         | 0.116    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.989    |
| fps                | 2036     |
| nupdates           | 540      |
| policy_entropy     | 0.84     |
| policy_loss        | 0.0386   |
| total_timesteps    | 1307075  |
| value_loss         | 0.151    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2036     |
| nupdates           | 550      |
| policy_entropy     | 0.782    |
| policy_loss        | 0.00119  |
| total_timesteps    | 1331325  |
| value_loss         | 0.309    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps                | 2036     |
| nupdates           | 560      |
| policy_entropy     | 0.77     |
| policy_loss        | 0.0165   |
| total_timesteps    | 1355575  |
| value_loss         | 0.125    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps         

---------------------------------
| explained_variance | 0.984    |
| fps                | 2039     |
| nupdates           | 810      |
| policy_entropy     | 0.855    |
| policy_loss        | 0.0837   |
| total_timesteps    | 1961825  |
| value_loss         | 0.224    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2039     |
| nupdates           | 820      |
| policy_entropy     | 0.884    |
| policy_loss        | -0.0337  |
| total_timesteps    | 1986075  |
| value_loss         | 0.47     |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2039     |
| nupdates           | 830      |
| policy_entropy     | 0.899    |
| policy_loss        | -0.0477  |
| total_timesteps    | 2010325  |
| value_loss         | 0.303    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.957    |
| fps                | 2042     |
| nupdates           | 1080     |
| policy_entropy     | 0.932    |
| policy_loss        | -0.221   |
| total_timesteps    | 2616575  |
| value_loss         | 0.72     |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2043     |
| nupdates           | 1090     |
| policy_entropy     | 0.91     |
| policy_loss        | -0.0189  |
| total_timesteps    | 2640825  |
| value_loss         | 0.286    |
---------------------------------
---------------------------------
| explained_variance | 0.94     |
| fps                | 2043     |
| nupdates           | 1100     |
| policy_entropy     | 0.874    |
| policy_loss        | -0.0457  |
| total_timesteps    | 2665075  |
| value_loss         | 0.397    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2046     |
| nupdates           | 1350     |
| policy_entropy     | 0.922    |
| policy_loss        | -0.134   |
| total_timesteps    | 3271325  |
| value_loss         | 0.429    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2047     |
| nupdates           | 1360     |
| policy_entropy     | 0.94     |
| policy_loss        | 0.00288  |
| total_timesteps    | 3295575  |
| value_loss         | 0.274    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2047     |
| nupdates           | 1370     |
| policy_entropy     | 0.879    |
| policy_loss        | 0.000104 |
| total_timesteps    | 3319825  |
| value_loss         | 0.254    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.989    |
| fps                | 2050     |
| nupdates           | 1620     |
| policy_entropy     | 0.95     |
| policy_loss        | -0.0446  |
| total_timesteps    | 3926075  |
| value_loss         | 0.162    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2050     |
| nupdates           | 1630     |
| policy_entropy     | 0.966    |
| policy_loss        | 0.137    |
| total_timesteps    | 3950325  |
| value_loss         | 0.339    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2050     |
| nupdates           | 1640     |
| policy_entropy     | 0.913    |
| policy_loss        | -0.0925  |
| total_timesteps    | 3974575  |
| value_loss         | 0.444    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps         

---------------------------------
| explained_variance | 0.976    |
| fps                | 2053     |
| nupdates           | 1890     |
| policy_entropy     | 0.929    |
| policy_loss        | -0.0437  |
| total_timesteps    | 4580825  |
| value_loss         | 0.377    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2053     |
| nupdates           | 1900     |
| policy_entropy     | 0.865    |
| policy_loss        | 0.0143   |
| total_timesteps    | 4605075  |
| value_loss         | 0.199    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2053     |
| nupdates           | 1910     |
| policy_entropy     | 0.924    |
| policy_loss        | 0.0207   |
| total_timesteps    | 4629325  |
| value_loss         | 0.205    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

---------------------------------
| explained_variance | 0.996    |
| fps                | 2056     |
| nupdates           | 2160     |
| policy_entropy     | 0.867    |
| policy_loss        | 0.101    |
| total_timesteps    | 5235575  |
| value_loss         | 0.0838   |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2056     |
| nupdates           | 2170     |
| policy_entropy     | 0.902    |
| policy_loss        | -0.122   |
| total_timesteps    | 5259825  |
| value_loss         | 0.314    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2056     |
| nupdates           | 2180     |
| policy_entropy     | 0.893    |
| policy_loss        | -0.0677  |
| total_timesteps    | 5284075  |
| value_loss         | 0.304    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps         

---------------------------------
| explained_variance | 0.974    |
| fps                | 2059     |
| nupdates           | 2430     |
| policy_entropy     | 0.822    |
| policy_loss        | 0.0369   |
| total_timesteps    | 5890325  |
| value_loss         | 0.389    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2059     |
| nupdates           | 2440     |
| policy_entropy     | 0.875    |
| policy_loss        | -0.0703  |
| total_timesteps    | 5914575  |
| value_loss         | 0.351    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2059     |
| nupdates           | 2450     |
| policy_entropy     | 0.938    |
| policy_loss        | -0.0641  |
| total_timesteps    | 5938825  |
| value_loss         | 0.51     |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.986    |
| fps                | 2061     |
| nupdates           | 2700     |
| policy_entropy     | 0.896    |
| policy_loss        | 0.0161   |
| total_timesteps    | 6545075  |
| value_loss         | 0.179    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2061     |
| nupdates           | 2710     |
| policy_entropy     | 0.839    |
| policy_loss        | 0.117    |
| total_timesteps    | 6569325  |
| value_loss         | 0.191    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2061     |
| nupdates           | 2720     |
| policy_entropy     | 0.834    |
| policy_loss        | -0.079   |
| total_timesteps    | 6593575  |
| value_loss         | 0.486    |
---------------------------------
---------------------------------
| explained_variance | 0.958    |
| fps         

---------------------------------
| explained_variance | 0.953    |
| fps                | 2062     |
| nupdates           | 2970     |
| policy_entropy     | 0.893    |
| policy_loss        | -0.126   |
| total_timesteps    | 7199825  |
| value_loss         | 0.484    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2062     |
| nupdates           | 2980     |
| policy_entropy     | 0.852    |
| policy_loss        | 0.00134  |
| total_timesteps    | 7224075  |
| value_loss         | 0.172    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2062     |
| nupdates           | 2990     |
| policy_entropy     | 0.911    |
| policy_loss        | -0.0749  |
| total_timesteps    | 7248325  |
| value_loss         | 0.278    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps         

---------------------------------
| explained_variance | 0.985    |
| fps                | 2062     |
| nupdates           | 3240     |
| policy_entropy     | 0.872    |
| policy_loss        | 0.0554   |
| total_timesteps    | 7854575  |
| value_loss         | 0.286    |
---------------------------------
---------------------------------
| explained_variance | 0.952    |
| fps                | 2062     |
| nupdates           | 3250     |
| policy_entropy     | 0.873    |
| policy_loss        | 0.000611 |
| total_timesteps    | 7878825  |
| value_loss         | 0.469    |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps                | 2062     |
| nupdates           | 3260     |
| policy_entropy     | 0.956    |
| policy_loss        | -0.115   |
| total_timesteps    | 7903075  |
| value_loss         | 0.544    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2060     |
| nupdates           | 3510     |
| policy_entropy     | 0.878    |
| policy_loss        | 0.0916   |
| total_timesteps    | 8509325  |
| value_loss         | 0.141    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2060     |
| nupdates           | 3520     |
| policy_entropy     | 0.907    |
| policy_loss        | 0.00092  |
| total_timesteps    | 8533575  |
| value_loss         | 0.258    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2060     |
| nupdates           | 3530     |
| policy_entropy     | 0.883    |
| policy_loss        | -0.0101  |
| total_timesteps    | 8557825  |
| value_loss         | 0.232    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.971    |
| fps                | 2055     |
| nupdates           | 3780     |
| policy_entropy     | 0.856    |
| policy_loss        | -0.0735  |
| total_timesteps    | 9164075  |
| value_loss         | 0.336    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2055     |
| nupdates           | 3790     |
| policy_entropy     | 0.901    |
| policy_loss        | -0.0738  |
| total_timesteps    | 9188325  |
| value_loss         | 0.426    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2055     |
| nupdates           | 3800     |
| policy_entropy     | 0.84     |
| policy_loss        | -0.0566  |
| total_timesteps    | 9212575  |
| value_loss         | 0.508    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps         

---------------------------------
| explained_variance | 0.974    |
| fps                | 2046     |
| nupdates           | 4050     |
| policy_entropy     | 0.962    |
| policy_loss        | -0.0759  |
| total_timesteps    | 9818825  |
| value_loss         | 0.419    |
---------------------------------
---------------------------------
| explained_variance | 0.957    |
| fps                | 2046     |
| nupdates           | 4060     |
| policy_entropy     | 0.853    |
| policy_loss        | -0.0479  |
| total_timesteps    | 9843075  |
| value_loss         | 0.503    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2045     |
| nupdates           | 4070     |
| policy_entropy     | 0.916    |
| policy_loss        | -0.00484 |
| total_timesteps    | 9867325  |
| value_loss         | 0.275    |
---------------------------------
---------------------------------
| explained_variance | 0.955    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.946    |
| fps                | 1564     |
| nupdates           | 1        |
| policy_entropy     | 0.861    |
| policy_loss        | -0.238   |
| total_timesteps    | 0        |
| value_loss         | 0.796    |
---------------------------------
---------------------------------
| explained_variance | 0.951    |
| fps                | 2584     |
| nupdates           | 10       |
| policy_entropy     | 0.846    |
| policy_loss        | 0.468    |
| total_timesteps    | 21825    |
| value_loss         | 1.34     |
---------------------------------
---------------------------------
| explained_variance | 0.959    |
| fps                | 2252     |
| nupdates           | 20       |
| policy_entropy     | 0.848    |
| policy_loss        | -0.977   |
| total_timesteps    | 46075    |
| value_loss         | 2.13     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.967    |
| fps                | 2034     |
| nupdates           | 270      |
| policy_entropy     | 0.842    |
| policy_loss        | -0.0581  |
| total_timesteps    | 652325   |
| value_loss         | 0.436    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2034     |
| nupdates           | 280      |
| policy_entropy     | 0.835    |
| policy_loss        | 0.147    |
| total_timesteps    | 676575   |
| value_loss         | 0.348    |
---------------------------------
---------------------------------
| explained_variance | 0.953    |
| fps                | 2034     |
| nupdates           | 290      |
| policy_entropy     | 0.846    |
| policy_loss        | -0.0477  |
| total_timesteps    | 700825   |
| value_loss         | 0.507    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.955    |
| fps                | 2035     |
| nupdates           | 540      |
| policy_entropy     | 0.857    |
| policy_loss        | -0.155   |
| total_timesteps    | 1307075  |
| value_loss         | 0.708    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2035     |
| nupdates           | 550      |
| policy_entropy     | 0.903    |
| policy_loss        | -0.0255  |
| total_timesteps    | 1331325  |
| value_loss         | 0.356    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2035     |
| nupdates           | 560      |
| policy_entropy     | 0.85     |
| policy_loss        | -0.00176 |
| total_timesteps    | 1355575  |
| value_loss         | 0.19     |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.972    |
| fps                | 2038     |
| nupdates           | 810      |
| policy_entropy     | 0.941    |
| policy_loss        | -0.129   |
| total_timesteps    | 1961825  |
| value_loss         | 0.398    |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps                | 2038     |
| nupdates           | 820      |
| policy_entropy     | 0.871    |
| policy_loss        | -0.00495 |
| total_timesteps    | 1986075  |
| value_loss         | 0.364    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2038     |
| nupdates           | 830      |
| policy_entropy     | 0.865    |
| policy_loss        | 0.0538   |
| total_timesteps    | 2010325  |
| value_loss         | 0.289    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps         

---------------------------------
| explained_variance | 0.924    |
| fps                | 2043     |
| nupdates           | 1080     |
| policy_entropy     | 0.849    |
| policy_loss        | 0.0385   |
| total_timesteps    | 2616575  |
| value_loss         | 0.689    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2043     |
| nupdates           | 1090     |
| policy_entropy     | 0.936    |
| policy_loss        | -0.105   |
| total_timesteps    | 2640825  |
| value_loss         | 0.22     |
---------------------------------
---------------------------------
| explained_variance | 0.955    |
| fps                | 2043     |
| nupdates           | 1100     |
| policy_entropy     | 0.947    |
| policy_loss        | -0.0589  |
| total_timesteps    | 2665075  |
| value_loss         | 0.459    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.986    |
| fps                | 2047     |
| nupdates           | 1350     |
| policy_entropy     | 0.86     |
| policy_loss        | 0.00566  |
| total_timesteps    | 3271325  |
| value_loss         | 0.182    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2048     |
| nupdates           | 1360     |
| policy_entropy     | 0.88     |
| policy_loss        | -0.0869  |
| total_timesteps    | 3295575  |
| value_loss         | 0.45     |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2048     |
| nupdates           | 1370     |
| policy_entropy     | 0.919    |
| policy_loss        | -0.0399  |
| total_timesteps    | 3319825  |
| value_loss         | 0.45     |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.94     |
| fps                | 2050     |
| nupdates           | 1620     |
| policy_entropy     | 0.864    |
| policy_loss        | -0.0953  |
| total_timesteps    | 3926075  |
| value_loss         | 0.622    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2050     |
| nupdates           | 1630     |
| policy_entropy     | 0.842    |
| policy_loss        | 0.0543   |
| total_timesteps    | 3950325  |
| value_loss         | 0.215    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2050     |
| nupdates           | 1640     |
| policy_entropy     | 0.879    |
| policy_loss        | 0.113    |
| total_timesteps    | 3974575  |
| value_loss         | 0.211    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.958    |
| fps                | 2052     |
| nupdates           | 1890     |
| policy_entropy     | 0.92     |
| policy_loss        | -0.135   |
| total_timesteps    | 4580825  |
| value_loss         | 0.484    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2052     |
| nupdates           | 1900     |
| policy_entropy     | 0.921    |
| policy_loss        | 0.0879   |
| total_timesteps    | 4605075  |
| value_loss         | 0.231    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2052     |
| nupdates           | 1910     |
| policy_entropy     | 0.919    |
| policy_loss        | -0.023   |
| total_timesteps    | 4629325  |
| value_loss         | 0.346    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps         

---------------------------------
| explained_variance | 0.956    |
| fps                | 2054     |
| nupdates           | 2160     |
| policy_entropy     | 0.928    |
| policy_loss        | -0.186   |
| total_timesteps    | 5235575  |
| value_loss         | 0.633    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2054     |
| nupdates           | 2170     |
| policy_entropy     | 0.906    |
| policy_loss        | -0.0116  |
| total_timesteps    | 5259825  |
| value_loss         | 0.207    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2054     |
| nupdates           | 2180     |
| policy_entropy     | 0.856    |
| policy_loss        | 0.0162   |
| total_timesteps    | 5284075  |
| value_loss         | 0.219    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.99     |
| fps                | 2056     |
| nupdates           | 2430     |
| policy_entropy     | 0.876    |
| policy_loss        | 0.205    |
| total_timesteps    | 5890325  |
| value_loss         | 0.311    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2056     |
| nupdates           | 2440     |
| policy_entropy     | 0.872    |
| policy_loss        | 0.0135   |
| total_timesteps    | 5914575  |
| value_loss         | 0.421    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2056     |
| nupdates           | 2450     |
| policy_entropy     | 0.888    |
| policy_loss        | 0.0303   |
| total_timesteps    | 5938825  |
| value_loss         | 0.293    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps         

---------------------------------
| explained_variance | 0.966    |
| fps                | 2058     |
| nupdates           | 2700     |
| policy_entropy     | 0.956    |
| policy_loss        | -0.0366  |
| total_timesteps    | 6545075  |
| value_loss         | 0.481    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2058     |
| nupdates           | 2710     |
| policy_entropy     | 0.974    |
| policy_loss        | 0.0428   |
| total_timesteps    | 6569325  |
| value_loss         | 0.348    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2058     |
| nupdates           | 2720     |
| policy_entropy     | 0.931    |
| policy_loss        | 0.107    |
| total_timesteps    | 6593575  |
| value_loss         | 0.161    |
---------------------------------
---------------------------------
| explained_variance | 0.948    |
| fps         

---------------------------------
| explained_variance | 0.967    |
| fps                | 2058     |
| nupdates           | 2970     |
| policy_entropy     | 0.865    |
| policy_loss        | -0.0783  |
| total_timesteps    | 7199825  |
| value_loss         | 0.407    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps                | 2058     |
| nupdates           | 2980     |
| policy_entropy     | 0.915    |
| policy_loss        | 0.0362   |
| total_timesteps    | 7224075  |
| value_loss         | 0.354    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2058     |
| nupdates           | 2990     |
| policy_entropy     | 0.894    |
| policy_loss        | -0.17    |
| total_timesteps    | 7248325  |
| value_loss         | 0.359    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2056     |
| nupdates           | 3240     |
| policy_entropy     | 0.913    |
| policy_loss        | -0.0738  |
| total_timesteps    | 7854575  |
| value_loss         | 0.385    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2056     |
| nupdates           | 3250     |
| policy_entropy     | 0.892    |
| policy_loss        | 0.0773   |
| total_timesteps    | 7878825  |
| value_loss         | 0.148    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2056     |
| nupdates           | 3260     |
| policy_entropy     | 0.967    |
| policy_loss        | -0.072   |
| total_timesteps    | 7903075  |
| value_loss         | 0.433    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2053     |
| nupdates           | 3510     |
| policy_entropy     | 0.927    |
| policy_loss        | 0.000159 |
| total_timesteps    | 8509325  |
| value_loss         | 0.286    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2053     |
| nupdates           | 3520     |
| policy_entropy     | 0.903    |
| policy_loss        | -0.00805 |
| total_timesteps    | 8533575  |
| value_loss         | 0.284    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2053     |
| nupdates           | 3530     |
| policy_entropy     | 0.888    |
| policy_loss        | 0.109    |
| total_timesteps    | 8557825  |
| value_loss         | 0.117    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.964    |
| fps                | 2048     |
| nupdates           | 3780     |
| policy_entropy     | 0.89     |
| policy_loss        | -0.181   |
| total_timesteps    | 9164075  |
| value_loss         | 0.568    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2047     |
| nupdates           | 3790     |
| policy_entropy     | 0.948    |
| policy_loss        | -0.0761  |
| total_timesteps    | 9188325  |
| value_loss         | 0.358    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2047     |
| nupdates           | 3800     |
| policy_entropy     | 0.873    |
| policy_loss        | -0.0267  |
| total_timesteps    | 9212575  |
| value_loss         | 0.248    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps         

---------------------------------
| explained_variance | 0.961    |
| fps                | 2039     |
| nupdates           | 4050     |
| policy_entropy     | 0.957    |
| policy_loss        | -0.0319  |
| total_timesteps    | 9818825  |
| value_loss         | 0.331    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2039     |
| nupdates           | 4060     |
| policy_entropy     | 0.882    |
| policy_loss        | 0.147    |
| total_timesteps    | 9843075  |
| value_loss         | 0.167    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2039     |
| nupdates           | 4070     |
| policy_entropy     | 0.927    |
| policy_loss        | 0.0649   |
| total_timesteps    | 9867325  |
| value_loss         | 0.231    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.976    |
| fps                | 1565     |
| nupdates           | 1        |
| policy_entropy     | 0.947    |
| policy_loss        | -0.0272  |
| total_timesteps    | 0        |
| value_loss         | 0.237    |
---------------------------------
---------------------------------
| explained_variance | 0.909    |
| fps                | 2583     |
| nupdates           | 10       |
| policy_entropy     | 0.449    |
| policy_loss        | -0.651   |
| total_timesteps    | 21825    |
| value_loss         | 3.43     |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2249     |
| nupdates           | 20       |
| policy_entropy     | 0.431    |
| policy_loss        | -0.0299  |
| total_timesteps    | 46075    |
| value_loss         | 0.485    |
---------------------------------
------------

---------------------------------
| explained_variance | 0.972    |
| fps                | 2026     |
| nupdates           | 270      |
| policy_entropy     | 0.795    |
| policy_loss        | 0.00906  |
| total_timesteps    | 652325   |
| value_loss         | 0.426    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2026     |
| nupdates           | 280      |
| policy_entropy     | 0.871    |
| policy_loss        | -0.019   |
| total_timesteps    | 676575   |
| value_loss         | 0.335    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2025     |
| nupdates           | 290      |
| policy_entropy     | 0.829    |
| policy_loss        | 0.0128   |
| total_timesteps    | 700825   |
| value_loss         | 0.408    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

---------------------------------
| explained_variance | 0.987    |
| fps                | 2029     |
| nupdates           | 540      |
| policy_entropy     | 0.88     |
| policy_loss        | -0.019   |
| total_timesteps    | 1307075  |
| value_loss         | 0.222    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2029     |
| nupdates           | 550      |
| policy_entropy     | 0.886    |
| policy_loss        | 0.00371  |
| total_timesteps    | 1331325  |
| value_loss         | 0.272    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2029     |
| nupdates           | 560      |
| policy_entropy     | 0.951    |
| policy_loss        | -0.0679  |
| total_timesteps    | 1355575  |
| value_loss         | 0.345    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2033     |
| nupdates           | 810      |
| policy_entropy     | 0.903    |
| policy_loss        | -0.0744  |
| total_timesteps    | 1961825  |
| value_loss         | 0.284    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2033     |
| nupdates           | 820      |
| policy_entropy     | 0.846    |
| policy_loss        | 0.104    |
| total_timesteps    | 1986075  |
| value_loss         | 0.29     |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2034     |
| nupdates           | 830      |
| policy_entropy     | 0.945    |
| policy_loss        | -0.0634  |
| total_timesteps    | 2010325  |
| value_loss         | 0.267    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps         

---------------------------------
| explained_variance | 0.964    |
| fps                | 2037     |
| nupdates           | 1080     |
| policy_entropy     | 0.882    |
| policy_loss        | -0.0514  |
| total_timesteps    | 2616575  |
| value_loss         | 0.289    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2038     |
| nupdates           | 1090     |
| policy_entropy     | 0.83     |
| policy_loss        | -0.0165  |
| total_timesteps    | 2640825  |
| value_loss         | 0.286    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2038     |
| nupdates           | 1100     |
| policy_entropy     | 0.834    |
| policy_loss        | -0.0121  |
| total_timesteps    | 2665075  |
| value_loss         | 0.252    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.967    |
| fps                | 2041     |
| nupdates           | 1350     |
| policy_entropy     | 0.912    |
| policy_loss        | 0.0331   |
| total_timesteps    | 3271325  |
| value_loss         | 0.307    |
---------------------------------
---------------------------------
| explained_variance | 0.96     |
| fps                | 2041     |
| nupdates           | 1360     |
| policy_entropy     | 0.881    |
| policy_loss        | -0.169   |
| total_timesteps    | 3295575  |
| value_loss         | 0.78     |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2041     |
| nupdates           | 1370     |
| policy_entropy     | 0.874    |
| policy_loss        | -0.178   |
| total_timesteps    | 3319825  |
| value_loss         | 0.513    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.985    |
| fps                | 2044     |
| nupdates           | 1620     |
| policy_entropy     | 0.887    |
| policy_loss        | 0.0328   |
| total_timesteps    | 3926075  |
| value_loss         | 0.247    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps                | 2044     |
| nupdates           | 1630     |
| policy_entropy     | 0.877    |
| policy_loss        | -0.136   |
| total_timesteps    | 3950325  |
| value_loss         | 0.442    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2044     |
| nupdates           | 1640     |
| policy_entropy     | 0.925    |
| policy_loss        | 0.0951   |
| total_timesteps    | 3974575  |
| value_loss         | 0.252    |
---------------------------------
---------------------------------
| explained_variance | 0.956    |
| fps         

---------------------------------
| explained_variance | 0.965    |
| fps                | 2047     |
| nupdates           | 1890     |
| policy_entropy     | 0.889    |
| policy_loss        | 0.0103   |
| total_timesteps    | 4580825  |
| value_loss         | 0.36     |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2047     |
| nupdates           | 1900     |
| policy_entropy     | 0.828    |
| policy_loss        | -0.0155  |
| total_timesteps    | 4605075  |
| value_loss         | 0.188    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2047     |
| nupdates           | 1910     |
| policy_entropy     | 0.855    |
| policy_loss        | -0.101   |
| total_timesteps    | 4629325  |
| value_loss         | 0.469    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2049     |
| nupdates           | 2160     |
| policy_entropy     | 0.894    |
| policy_loss        | 0.00607  |
| total_timesteps    | 5235575  |
| value_loss         | 0.33     |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2049     |
| nupdates           | 2170     |
| policy_entropy     | 0.951    |
| policy_loss        | -0.0316  |
| total_timesteps    | 5259825  |
| value_loss         | 0.254    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2050     |
| nupdates           | 2180     |
| policy_entropy     | 0.891    |
| policy_loss        | -0.052   |
| total_timesteps    | 5284075  |
| value_loss         | 0.42     |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2051     |
| nupdates           | 2430     |
| policy_entropy     | 0.829    |
| policy_loss        | 0.0152   |
| total_timesteps    | 5890325  |
| value_loss         | 0.286    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2051     |
| nupdates           | 2440     |
| policy_entropy     | 0.87     |
| policy_loss        | 0.0877   |
| total_timesteps    | 5914575  |
| value_loss         | 0.235    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2051     |
| nupdates           | 2450     |
| policy_entropy     | 0.889    |
| policy_loss        | 0.0703   |
| total_timesteps    | 5938825  |
| value_loss         | 0.351    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps         

---------------------------------
| explained_variance | 0.961    |
| fps                | 2053     |
| nupdates           | 2700     |
| policy_entropy     | 0.886    |
| policy_loss        | 0.0365   |
| total_timesteps    | 6545075  |
| value_loss         | 0.369    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2053     |
| nupdates           | 2710     |
| policy_entropy     | 0.851    |
| policy_loss        | -0.19    |
| total_timesteps    | 6569325  |
| value_loss         | 0.548    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2053     |
| nupdates           | 2720     |
| policy_entropy     | 0.89     |
| policy_loss        | 0.0807   |
| total_timesteps    | 6593575  |
| value_loss         | 0.191    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps         

---------------------------------
| explained_variance | 0.985    |
| fps                | 2054     |
| nupdates           | 2970     |
| policy_entropy     | 0.874    |
| policy_loss        | -0.145   |
| total_timesteps    | 7199825  |
| value_loss         | 0.388    |
---------------------------------
----------------------------------
| explained_variance | 0.99      |
| fps                | 2054      |
| nupdates           | 2980      |
| policy_entropy     | 0.834     |
| policy_loss        | -0.000122 |
| total_timesteps    | 7224075   |
| value_loss         | 0.145     |
----------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2054     |
| nupdates           | 2990     |
| policy_entropy     | 0.922    |
| policy_loss        | -0.0115  |
| total_timesteps    | 7248325  |
| value_loss         | 0.22     |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps

---------------------------------
| explained_variance | 0.97     |
| fps                | 2053     |
| nupdates           | 3240     |
| policy_entropy     | 0.928    |
| policy_loss        | -0.00314 |
| total_timesteps    | 7854575  |
| value_loss         | 0.289    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2053     |
| nupdates           | 3250     |
| policy_entropy     | 0.905    |
| policy_loss        | -0.0609  |
| total_timesteps    | 7878825  |
| value_loss         | 0.509    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2053     |
| nupdates           | 3260     |
| policy_entropy     | 0.933    |
| policy_loss        | -0.143   |
| total_timesteps    | 7903075  |
| value_loss         | 0.477    |
---------------------------------
---------------------------------
| explained_variance | 0.956    |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2050     |
| nupdates           | 3510     |
| policy_entropy     | 0.891    |
| policy_loss        | 0.0542   |
| total_timesteps    | 8509325  |
| value_loss         | 0.303    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps                | 2049     |
| nupdates           | 3520     |
| policy_entropy     | 0.853    |
| policy_loss        | 0.03     |
| total_timesteps    | 8533575  |
| value_loss         | 0.354    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2049     |
| nupdates           | 3530     |
| policy_entropy     | 0.813    |
| policy_loss        | 0.042    |
| total_timesteps    | 8557825  |
| value_loss         | 0.245    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps         

---------------------------------
| explained_variance | 0.948    |
| fps                | 2043     |
| nupdates           | 3780     |
| policy_entropy     | 0.919    |
| policy_loss        | -0.112   |
| total_timesteps    | 9164075  |
| value_loss         | 0.59     |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2043     |
| nupdates           | 3790     |
| policy_entropy     | 0.839    |
| policy_loss        | 0.0405   |
| total_timesteps    | 9188325  |
| value_loss         | 0.253    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2042     |
| nupdates           | 3800     |
| policy_entropy     | 0.933    |
| policy_loss        | -0.0725  |
| total_timesteps    | 9212575  |
| value_loss         | 0.274    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps         

---------------------------------
| explained_variance | 0.976    |
| fps                | 2034     |
| nupdates           | 4050     |
| policy_entropy     | 0.919    |
| policy_loss        | -0.02    |
| total_timesteps    | 9818825  |
| value_loss         | 0.369    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2033     |
| nupdates           | 4060     |
| policy_entropy     | 0.987    |
| policy_loss        | -0.0811  |
| total_timesteps    | 9843075  |
| value_loss         | 0.353    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2033     |
| nupdates           | 4070     |
| policy_entropy     | 0.927    |
| policy_loss        | 0.082    |
| total_timesteps    | 9867325  |
| value_loss         | 0.264    |
---------------------------------
---------------------------------
| explained_variance | 0.993    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.934    |
| fps                | 1561     |
| nupdates           | 1        |
| policy_entropy     | 0.871    |
| policy_loss        | -0.124   |
| total_timesteps    | 0        |
| value_loss         | 0.627    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2588     |
| nupdates           | 10       |
| policy_entropy     | 0.823    |
| policy_loss        | 0.83     |
| total_timesteps    | 21825    |
| value_loss         | 1.76     |
---------------------------------
---------------------------------
| explained_variance | 0.96     |
| fps                | 2258     |
| nupdates           | 20       |
| policy_entropy     | 0.816    |
| policy_loss        | -1.08    |
| total_timesteps    | 46075    |
| value_loss         | 2.2      |
---------------------------------
------------

---------------------------------
| explained_variance | 0.985    |
| fps                | 2034     |
| nupdates           | 270      |
| policy_entropy     | 0.879    |
| policy_loss        | 0.05     |
| total_timesteps    | 652325   |
| value_loss         | 0.279    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2034     |
| nupdates           | 280      |
| policy_entropy     | 0.883    |
| policy_loss        | -0.0129  |
| total_timesteps    | 676575   |
| value_loss         | 0.253    |
---------------------------------
---------------------------------
| explained_variance | 0.936    |
| fps                | 2033     |
| nupdates           | 290      |
| policy_entropy     | 0.902    |
| policy_loss        | -0.0617  |
| total_timesteps    | 700825   |
| value_loss         | 0.479    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2032     |
| nupdates           | 540      |
| policy_entropy     | 0.934    |
| policy_loss        | 0.058    |
| total_timesteps    | 1307075  |
| value_loss         | 0.207    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2032     |
| nupdates           | 550      |
| policy_entropy     | 0.834    |
| policy_loss        | -0.0583  |
| total_timesteps    | 1331325  |
| value_loss         | 0.457    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps                | 2032     |
| nupdates           | 560      |
| policy_entropy     | 0.788    |
| policy_loss        | 0.071    |
| total_timesteps    | 1355575  |
| value_loss         | 0.153    |
---------------------------------
---------------------------------
| explained_variance | 0.993    |
| fps         

---------------------------------
| explained_variance | 0.97     |
| fps                | 2034     |
| nupdates           | 810      |
| policy_entropy     | 0.889    |
| policy_loss        | 0.119    |
| total_timesteps    | 1961825  |
| value_loss         | 0.373    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2035     |
| nupdates           | 820      |
| policy_entropy     | 0.871    |
| policy_loss        | -0.119   |
| total_timesteps    | 1986075  |
| value_loss         | 0.264    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps                | 2035     |
| nupdates           | 830      |
| policy_entropy     | 0.875    |
| policy_loss        | 0.00485  |
| total_timesteps    | 2010325  |
| value_loss         | 0.456    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2038     |
| nupdates           | 1080     |
| policy_entropy     | 0.829    |
| policy_loss        | 0.0185   |
| total_timesteps    | 2616575  |
| value_loss         | 0.308    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2038     |
| nupdates           | 1090     |
| policy_entropy     | 0.837    |
| policy_loss        | 0.0413   |
| total_timesteps    | 2640825  |
| value_loss         | 0.279    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps                | 2038     |
| nupdates           | 1100     |
| policy_entropy     | 0.865    |
| policy_loss        | -0.0316  |
| total_timesteps    | 2665075  |
| value_loss         | 0.388    |
---------------------------------
---------------------------------
| explained_variance | 0.956    |
| fps         

---------------------------------
| explained_variance | 0.99     |
| fps                | 2043     |
| nupdates           | 1350     |
| policy_entropy     | 0.882    |
| policy_loss        | 0.0164   |
| total_timesteps    | 3271325  |
| value_loss         | 0.173    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2043     |
| nupdates           | 1360     |
| policy_entropy     | 0.894    |
| policy_loss        | -0.0519  |
| total_timesteps    | 3295575  |
| value_loss         | 0.288    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2043     |
| nupdates           | 1370     |
| policy_entropy     | 0.892    |
| policy_loss        | -0.00137 |
| total_timesteps    | 3319825  |
| value_loss         | 0.446    |
---------------------------------
---------------------------------
| explained_variance | 0.948    |
| fps         

---------------------------------
| explained_variance | 0.984    |
| fps                | 2048     |
| nupdates           | 1620     |
| policy_entropy     | 0.92     |
| policy_loss        | 0.0241   |
| total_timesteps    | 3926075  |
| value_loss         | 0.236    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2048     |
| nupdates           | 1630     |
| policy_entropy     | 0.931    |
| policy_loss        | -0.117   |
| total_timesteps    | 3950325  |
| value_loss         | 0.663    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2048     |
| nupdates           | 1640     |
| policy_entropy     | 0.915    |
| policy_loss        | -0.121   |
| total_timesteps    | 3974575  |
| value_loss         | 0.316    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2052     |
| nupdates           | 1890     |
| policy_entropy     | 0.87     |
| policy_loss        | 0.199    |
| total_timesteps    | 4580825  |
| value_loss         | 0.407    |
---------------------------------
---------------------------------
| explained_variance | 0.998    |
| fps                | 2052     |
| nupdates           | 1900     |
| policy_entropy     | 0.823    |
| policy_loss        | 0.00385  |
| total_timesteps    | 4605075  |
| value_loss         | 0.0478   |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2052     |
| nupdates           | 1910     |
| policy_entropy     | 0.903    |
| policy_loss        | -0.0162  |
| total_timesteps    | 4629325  |
| value_loss         | 0.224    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2055     |
| nupdates           | 2160     |
| policy_entropy     | 0.861    |
| policy_loss        | 0.00715  |
| total_timesteps    | 5235575  |
| value_loss         | 0.204    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2055     |
| nupdates           | 2170     |
| policy_entropy     | 0.893    |
| policy_loss        | 0.0309   |
| total_timesteps    | 5259825  |
| value_loss         | 0.273    |
---------------------------------
---------------------------------
| explained_variance | 0.958    |
| fps                | 2055     |
| nupdates           | 2180     |
| policy_entropy     | 0.848    |
| policy_loss        | 0.0875   |
| total_timesteps    | 5284075  |
| value_loss         | 0.434    |
---------------------------------
---------------------------------
| explained_variance | 0.959    |
| fps         

---------------------------------
| explained_variance | 0.971    |
| fps                | 2058     |
| nupdates           | 2430     |
| policy_entropy     | 0.93     |
| policy_loss        | -0.0644  |
| total_timesteps    | 5890325  |
| value_loss         | 0.378    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps                | 2058     |
| nupdates           | 2440     |
| policy_entropy     | 0.912    |
| policy_loss        | 0.107    |
| total_timesteps    | 5914575  |
| value_loss         | 0.375    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2058     |
| nupdates           | 2450     |
| policy_entropy     | 0.881    |
| policy_loss        | -0.0616  |
| total_timesteps    | 5938825  |
| value_loss         | 0.362    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps         

---------------------------------
| explained_variance | 0.979    |
| fps                | 2060     |
| nupdates           | 2700     |
| policy_entropy     | 0.968    |
| policy_loss        | -0.0874  |
| total_timesteps    | 6545075  |
| value_loss         | 0.262    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2060     |
| nupdates           | 2710     |
| policy_entropy     | 1        |
| policy_loss        | -0.105   |
| total_timesteps    | 6569325  |
| value_loss         | 0.348    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2060     |
| nupdates           | 2720     |
| policy_entropy     | 0.974    |
| policy_loss        | -0.0526  |
| total_timesteps    | 6593575  |
| value_loss         | 0.334    |
---------------------------------
---------------------------------
| explained_variance | 0.959    |
| fps         

---------------------------------
| explained_variance | 0.981    |
| fps                | 2061     |
| nupdates           | 2970     |
| policy_entropy     | 0.913    |
| policy_loss        | -0.0415  |
| total_timesteps    | 7199825  |
| value_loss         | 0.36     |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2061     |
| nupdates           | 2980     |
| policy_entropy     | 0.892    |
| policy_loss        | -0.0436  |
| total_timesteps    | 7224075  |
| value_loss         | 0.37     |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2061     |
| nupdates           | 2990     |
| policy_entropy     | 0.92     |
| policy_loss        | -0.0388  |
| total_timesteps    | 7248325  |
| value_loss         | 0.18     |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.941    |
| fps                | 2061     |
| nupdates           | 3240     |
| policy_entropy     | 0.913    |
| policy_loss        | -0.0323  |
| total_timesteps    | 7854575  |
| value_loss         | 0.408    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2061     |
| nupdates           | 3250     |
| policy_entropy     | 0.847    |
| policy_loss        | 0.0265   |
| total_timesteps    | 7878825  |
| value_loss         | 0.229    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2061     |
| nupdates           | 3260     |
| policy_entropy     | 0.863    |
| policy_loss        | -0.052   |
| total_timesteps    | 7903075  |
| value_loss         | 0.223    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2059     |
| nupdates           | 3510     |
| policy_entropy     | 0.908    |
| policy_loss        | 0.0404   |
| total_timesteps    | 8509325  |
| value_loss         | 0.338    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2059     |
| nupdates           | 3520     |
| policy_entropy     | 0.9      |
| policy_loss        | 0.0107   |
| total_timesteps    | 8533575  |
| value_loss         | 0.266    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2059     |
| nupdates           | 3530     |
| policy_entropy     | 0.933    |
| policy_loss        | -0.0751  |
| total_timesteps    | 8557825  |
| value_loss         | 0.421    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps         

---------------------------------
| explained_variance | 0.893    |
| fps                | 2055     |
| nupdates           | 3780     |
| policy_entropy     | 0.93     |
| policy_loss        | -0.2     |
| total_timesteps    | 9164075  |
| value_loss         | 0.8      |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2054     |
| nupdates           | 3790     |
| policy_entropy     | 0.888    |
| policy_loss        | -0.0614  |
| total_timesteps    | 9188325  |
| value_loss         | 0.335    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2054     |
| nupdates           | 3800     |
| policy_entropy     | 0.88     |
| policy_loss        | 0.0619   |
| total_timesteps    | 9212575  |
| value_loss         | 0.268    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps         

---------------------------------
| explained_variance | 0.953    |
| fps                | 2046     |
| nupdates           | 4050     |
| policy_entropy     | 0.899    |
| policy_loss        | 0.0137   |
| total_timesteps    | 9818825  |
| value_loss         | 0.428    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2046     |
| nupdates           | 4060     |
| policy_entropy     | 0.819    |
| policy_loss        | 0.0105   |
| total_timesteps    | 9843075  |
| value_loss         | 0.319    |
---------------------------------
---------------------------------
| explained_variance | 0.95     |
| fps                | 2046     |
| nupdates           | 4070     |
| policy_entropy     | 0.94     |
| policy_loss        | -0.123   |
| total_timesteps    | 9867325  |
| value_loss         | 0.547    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.986    |
| fps                | 1565     |
| nupdates           | 1        |
| policy_entropy     | 0.891    |
| policy_loss        | 0.0671   |
| total_timesteps    | 0        |
| value_loss         | 0.228    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps                | 2587     |
| nupdates           | 10       |
| policy_entropy     | 0.817    |
| policy_loss        | -0.956   |
| total_timesteps    | 21825    |
| value_loss         | 1.97     |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2257     |
| nupdates           | 20       |
| policy_entropy     | 0.807    |
| policy_loss        | 1.1      |
| total_timesteps    | 46075    |
| value_loss         | 2.24     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.955    |
| fps                | 2036     |
| nupdates           | 270      |
| policy_entropy     | 0.86     |
| policy_loss        | -0.0658  |
| total_timesteps    | 652325   |
| value_loss         | 0.645    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2036     |
| nupdates           | 280      |
| policy_entropy     | 0.874    |
| policy_loss        | -0.0964  |
| total_timesteps    | 676575   |
| value_loss         | 0.592    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2036     |
| nupdates           | 290      |
| policy_entropy     | 0.877    |
| policy_loss        | -0.0141  |
| total_timesteps    | 700825   |
| value_loss         | 0.219    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2036     |
| nupdates           | 540      |
| policy_entropy     | 0.836    |
| policy_loss        | 0.0635   |
| total_timesteps    | 1307075  |
| value_loss         | 0.184    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2036     |
| nupdates           | 550      |
| policy_entropy     | 0.813    |
| policy_loss        | 0.112    |
| total_timesteps    | 1331325  |
| value_loss         | 0.14     |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps                | 2036     |
| nupdates           | 560      |
| policy_entropy     | 0.819    |
| policy_loss        | 0.0419   |
| total_timesteps    | 1355575  |
| value_loss         | 0.353    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.987    |
| fps                | 2040     |
| nupdates           | 810      |
| policy_entropy     | 0.862    |
| policy_loss        | 0.0969   |
| total_timesteps    | 1961825  |
| value_loss         | 0.235    |
---------------------------------
---------------------------------
| explained_variance | 0.937    |
| fps                | 2040     |
| nupdates           | 820      |
| policy_entropy     | 0.909    |
| policy_loss        | -0.0974  |
| total_timesteps    | 1986075  |
| value_loss         | 0.806    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2040     |
| nupdates           | 830      |
| policy_entropy     | 0.886    |
| policy_loss        | -0.0974  |
| total_timesteps    | 2010325  |
| value_loss         | 0.384    |
---------------------------------
---------------------------------
| explained_variance | 0.958    |
| fps         

---------------------------------
| explained_variance | 0.945    |
| fps                | 2044     |
| nupdates           | 1080     |
| policy_entropy     | 0.867    |
| policy_loss        | -0.146   |
| total_timesteps    | 2616575  |
| value_loss         | 0.671    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2044     |
| nupdates           | 1090     |
| policy_entropy     | 0.856    |
| policy_loss        | 0.0709   |
| total_timesteps    | 2640825  |
| value_loss         | 0.182    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2044     |
| nupdates           | 1100     |
| policy_entropy     | 0.908    |
| policy_loss        | -0.0271  |
| total_timesteps    | 2665075  |
| value_loss         | 0.365    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

---------------------------------
| explained_variance | 0.965    |
| fps                | 2047     |
| nupdates           | 1350     |
| policy_entropy     | 0.934    |
| policy_loss        | -0.0758  |
| total_timesteps    | 3271325  |
| value_loss         | 0.493    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2047     |
| nupdates           | 1360     |
| policy_entropy     | 0.919    |
| policy_loss        | -0.113   |
| total_timesteps    | 3295575  |
| value_loss         | 0.353    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps                | 2048     |
| nupdates           | 1370     |
| policy_entropy     | 0.903    |
| policy_loss        | 0.192    |
| total_timesteps    | 3319825  |
| value_loss         | 0.155    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.958    |
| fps                | 2051     |
| nupdates           | 1620     |
| policy_entropy     | 0.879    |
| policy_loss        | -0.26    |
| total_timesteps    | 3926075  |
| value_loss         | 0.653    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2051     |
| nupdates           | 1630     |
| policy_entropy     | 0.887    |
| policy_loss        | -0.0405  |
| total_timesteps    | 3950325  |
| value_loss         | 0.315    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2051     |
| nupdates           | 1640     |
| policy_entropy     | 0.913    |
| policy_loss        | 0.0508   |
| total_timesteps    | 3974575  |
| value_loss         | 0.164    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps         

---------------------------------
| explained_variance | 0.995    |
| fps                | 2054     |
| nupdates           | 1890     |
| policy_entropy     | 0.809    |
| policy_loss        | 0.0648   |
| total_timesteps    | 4580825  |
| value_loss         | 0.068    |
---------------------------------
---------------------------------
| explained_variance | 0.953    |
| fps                | 2054     |
| nupdates           | 1900     |
| policy_entropy     | 0.853    |
| policy_loss        | 0.0123   |
| total_timesteps    | 4605075  |
| value_loss         | 0.49     |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2054     |
| nupdates           | 1910     |
| policy_entropy     | 0.887    |
| policy_loss        | -0.0627  |
| total_timesteps    | 4629325  |
| value_loss         | 0.402    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.971    |
| fps                | 2056     |
| nupdates           | 2160     |
| policy_entropy     | 0.894    |
| policy_loss        | -0.0971  |
| total_timesteps    | 5235575  |
| value_loss         | 0.484    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2056     |
| nupdates           | 2170     |
| policy_entropy     | 0.846    |
| policy_loss        | 0.114    |
| total_timesteps    | 5259825  |
| value_loss         | 0.212    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2056     |
| nupdates           | 2180     |
| policy_entropy     | 0.919    |
| policy_loss        | -0.0415  |
| total_timesteps    | 5284075  |
| value_loss         | 0.337    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.991    |
| fps                | 2058     |
| nupdates           | 2430     |
| policy_entropy     | 0.862    |
| policy_loss        | 0.115    |
| total_timesteps    | 5890325  |
| value_loss         | 0.17     |
---------------------------------
---------------------------------
| explained_variance | 0.996    |
| fps                | 2058     |
| nupdates           | 2440     |
| policy_entropy     | 0.901    |
| policy_loss        | 0.109    |
| total_timesteps    | 5914575  |
| value_loss         | 0.0817   |
---------------------------------
----------------------------------
| explained_variance | 0.972     |
| fps                | 2059      |
| nupdates           | 2450      |
| policy_entropy     | 0.998     |
| policy_loss        | -0.000686 |
| total_timesteps    | 5938825   |
| value_loss         | 0.385     |
----------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps

---------------------------------
| explained_variance | 0.932    |
| fps                | 2060     |
| nupdates           | 2700     |
| policy_entropy     | 0.973    |
| policy_loss        | -0.18    |
| total_timesteps    | 6545075  |
| value_loss         | 0.573    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2060     |
| nupdates           | 2710     |
| policy_entropy     | 0.933    |
| policy_loss        | -0.0246  |
| total_timesteps    | 6569325  |
| value_loss         | 0.331    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2060     |
| nupdates           | 2720     |
| policy_entropy     | 0.939    |
| policy_loss        | 0.0289   |
| total_timesteps    | 6593575  |
| value_loss         | 0.0881   |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2060     |
| nupdates           | 2970     |
| policy_entropy     | 0.856    |
| policy_loss        | -0.0034  |
| total_timesteps    | 7199825  |
| value_loss         | 0.243    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2060     |
| nupdates           | 2980     |
| policy_entropy     | 0.84     |
| policy_loss        | 0.0542   |
| total_timesteps    | 7224075  |
| value_loss         | 0.22     |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2060     |
| nupdates           | 2990     |
| policy_entropy     | 0.812    |
| policy_loss        | -0.0367  |
| total_timesteps    | 7248325  |
| value_loss         | 0.248    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps         

---------------------------------
| explained_variance | 0.941    |
| fps                | 2059     |
| nupdates           | 3240     |
| policy_entropy     | 0.882    |
| policy_loss        | -0.108   |
| total_timesteps    | 7854575  |
| value_loss         | 0.844    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2059     |
| nupdates           | 3250     |
| policy_entropy     | 0.918    |
| policy_loss        | 0.127    |
| total_timesteps    | 7878825  |
| value_loss         | 0.189    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2059     |
| nupdates           | 3260     |
| policy_entropy     | 0.862    |
| policy_loss        | -0.0252  |
| total_timesteps    | 7903075  |
| value_loss         | 0.464    |
---------------------------------
---------------------------------
| explained_variance | 0.995    |
| fps         

---------------------------------
| explained_variance | 0.966    |
| fps                | 2056     |
| nupdates           | 3510     |
| policy_entropy     | 0.891    |
| policy_loss        | -0.0693  |
| total_timesteps    | 8509325  |
| value_loss         | 0.494    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2056     |
| nupdates           | 3520     |
| policy_entropy     | 0.89     |
| policy_loss        | -0.0289  |
| total_timesteps    | 8533575  |
| value_loss         | 0.277    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2055     |
| nupdates           | 3530     |
| policy_entropy     | 0.911    |
| policy_loss        | 0.0727   |
| total_timesteps    | 8557825  |
| value_loss         | 0.171    |
---------------------------------
---------------------------------
| explained_variance | 0.909    |
| fps         

---------------------------------
| explained_variance | 0.972    |
| fps                | 2051     |
| nupdates           | 3780     |
| policy_entropy     | 0.889    |
| policy_loss        | -0.108   |
| total_timesteps    | 9164075  |
| value_loss         | 0.485    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2051     |
| nupdates           | 3790     |
| policy_entropy     | 0.937    |
| policy_loss        | -0.115   |
| total_timesteps    | 9188325  |
| value_loss         | 0.558    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2051     |
| nupdates           | 3800     |
| policy_entropy     | 0.93     |
| policy_loss        | -0.0357  |
| total_timesteps    | 9212575  |
| value_loss         | 0.24     |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps         

---------------------------------
| explained_variance | 0.976    |
| fps                | 2045     |
| nupdates           | 4050     |
| policy_entropy     | 0.946    |
| policy_loss        | -0.022   |
| total_timesteps    | 9818825  |
| value_loss         | 0.349    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2044     |
| nupdates           | 4060     |
| policy_entropy     | 0.927    |
| policy_loss        | -0.00537 |
| total_timesteps    | 9843075  |
| value_loss         | 0.322    |
---------------------------------
---------------------------------
| explained_variance | 0.945    |
| fps                | 2044     |
| nupdates           | 4070     |
| policy_entropy     | 0.893    |
| policy_loss        | -0.208   |
| total_timesteps    | 9867325  |
| value_loss         | 1.05     |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.965    |
| fps                | 1551     |
| nupdates           | 1        |
| policy_entropy     | 0.941    |
| policy_loss        | 0.0286   |
| total_timesteps    | 0        |
| value_loss         | 0.417    |
---------------------------------
---------------------------------
| explained_variance | 0.924    |
| fps                | 2550     |
| nupdates           | 10       |
| policy_entropy     | 0.912    |
| policy_loss        | 0.351    |
| total_timesteps    | 21825    |
| value_loss         | 1.2      |
---------------------------------
---------------------------------
| explained_variance | 0.939    |
| fps                | 2223     |
| nupdates           | 20       |
| policy_entropy     | 0.797    |
| policy_loss        | 0.397    |
| total_timesteps    | 46075    |
| value_loss         | 1.02     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.982    |
| fps                | 2022     |
| nupdates           | 270      |
| policy_entropy     | 0.88     |
| policy_loss        | 0.0197   |
| total_timesteps    | 652325   |
| value_loss         | 0.207    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2023     |
| nupdates           | 280      |
| policy_entropy     | 0.898    |
| policy_loss        | -0.0244  |
| total_timesteps    | 676575   |
| value_loss         | 0.402    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps                | 2022     |
| nupdates           | 290      |
| policy_entropy     | 0.865    |
| policy_loss        | 0.235    |
| total_timesteps    | 700825   |
| value_loss         | 0.189    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps         

---------------------------------
| explained_variance | 0.99     |
| fps                | 2025     |
| nupdates           | 540      |
| policy_entropy     | 0.891    |
| policy_loss        | -0.0489  |
| total_timesteps    | 1307075  |
| value_loss         | 0.151    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2025     |
| nupdates           | 550      |
| policy_entropy     | 0.898    |
| policy_loss        | 0.0714   |
| total_timesteps    | 1331325  |
| value_loss         | 0.252    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2025     |
| nupdates           | 560      |
| policy_entropy     | 0.863    |
| policy_loss        | 0.108    |
| total_timesteps    | 1355575  |
| value_loss         | 0.214    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps         

---------------------------------
| explained_variance | 0.971    |
| fps                | 2030     |
| nupdates           | 810      |
| policy_entropy     | 0.837    |
| policy_loss        | 0.016    |
| total_timesteps    | 1961825  |
| value_loss         | 0.396    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2030     |
| nupdates           | 820      |
| policy_entropy     | 0.868    |
| policy_loss        | -0.0815  |
| total_timesteps    | 1986075  |
| value_loss         | 0.39     |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2030     |
| nupdates           | 830      |
| policy_entropy     | 0.912    |
| policy_loss        | -0.0293  |
| total_timesteps    | 2010325  |
| value_loss         | 0.284    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 2034     |
| nupdates           | 1080     |
| policy_entropy     | 0.919    |
| policy_loss        | -0.0459  |
| total_timesteps    | 2616575  |
| value_loss         | 0.289    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2034     |
| nupdates           | 1090     |
| policy_entropy     | 0.909    |
| policy_loss        | 0.00117  |
| total_timesteps    | 2640825  |
| value_loss         | 0.119    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2035     |
| nupdates           | 1100     |
| policy_entropy     | 0.885    |
| policy_loss        | 0.0973   |
| total_timesteps    | 2665075  |
| value_loss         | 0.157    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.964    |
| fps                | 2038     |
| nupdates           | 1350     |
| policy_entropy     | 0.883    |
| policy_loss        | -0.105   |
| total_timesteps    | 3271325  |
| value_loss         | 0.6      |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2039     |
| nupdates           | 1360     |
| policy_entropy     | 0.941    |
| policy_loss        | -0.152   |
| total_timesteps    | 3295575  |
| value_loss         | 0.518    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2039     |
| nupdates           | 1370     |
| policy_entropy     | 0.844    |
| policy_loss        | 0.0543   |
| total_timesteps    | 3319825  |
| value_loss         | 0.157    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.944    |
| fps                | 2042     |
| nupdates           | 1620     |
| policy_entropy     | 0.937    |
| policy_loss        | -0.186   |
| total_timesteps    | 3926075  |
| value_loss         | 0.634    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2042     |
| nupdates           | 1630     |
| policy_entropy     | 0.924    |
| policy_loss        | 0.0162   |
| total_timesteps    | 3950325  |
| value_loss         | 0.354    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2042     |
| nupdates           | 1640     |
| policy_entropy     | 0.918    |
| policy_loss        | -0.0727  |
| total_timesteps    | 3974575  |
| value_loss         | 0.453    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps         

---------------------------------
| explained_variance | 0.986    |
| fps                | 2045     |
| nupdates           | 1890     |
| policy_entropy     | 0.87     |
| policy_loss        | 0.0538   |
| total_timesteps    | 4580825  |
| value_loss         | 0.187    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2045     |
| nupdates           | 1900     |
| policy_entropy     | 0.861    |
| policy_loss        | -0.0512  |
| total_timesteps    | 4605075  |
| value_loss         | 0.501    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2045     |
| nupdates           | 1910     |
| policy_entropy     | 0.861    |
| policy_loss        | 0.0482   |
| total_timesteps    | 4629325  |
| value_loss         | 0.26     |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps         

---------------------------------
| explained_variance | 0.989    |
| fps                | 2048     |
| nupdates           | 2160     |
| policy_entropy     | 0.913    |
| policy_loss        | 0.0498   |
| total_timesteps    | 5235575  |
| value_loss         | 0.189    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2048     |
| nupdates           | 2170     |
| policy_entropy     | 0.85     |
| policy_loss        | -0.00399 |
| total_timesteps    | 5259825  |
| value_loss         | 0.241    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2048     |
| nupdates           | 2180     |
| policy_entropy     | 0.839    |
| policy_loss        | -0.0771  |
| total_timesteps    | 5284075  |
| value_loss         | 0.246    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.964    |
| fps                | 2049     |
| nupdates           | 2430     |
| policy_entropy     | 0.941    |
| policy_loss        | -0.209   |
| total_timesteps    | 5890325  |
| value_loss         | 0.56     |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2049     |
| nupdates           | 2440     |
| policy_entropy     | 0.907    |
| policy_loss        | 0.017    |
| total_timesteps    | 5914575  |
| value_loss         | 0.33     |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2049     |
| nupdates           | 2450     |
| policy_entropy     | 0.823    |
| policy_loss        | -0.0621  |
| total_timesteps    | 5938825  |
| value_loss         | 0.316    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps         

---------------------------------
| explained_variance | 0.978    |
| fps                | 2051     |
| nupdates           | 2700     |
| policy_entropy     | 0.867    |
| policy_loss        | 0.0553   |
| total_timesteps    | 6545075  |
| value_loss         | 0.254    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2051     |
| nupdates           | 2710     |
| policy_entropy     | 0.878    |
| policy_loss        | 0.0344   |
| total_timesteps    | 6569325  |
| value_loss         | 0.224    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2051     |
| nupdates           | 2720     |
| policy_entropy     | 0.903    |
| policy_loss        | 0.0105   |
| total_timesteps    | 6593575  |
| value_loss         | 0.271    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps         

---------------------------------
| explained_variance | 0.986    |
| fps                | 2052     |
| nupdates           | 2970     |
| policy_entropy     | 0.941    |
| policy_loss        | -0.0363  |
| total_timesteps    | 7199825  |
| value_loss         | 0.258    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2052     |
| nupdates           | 2980     |
| policy_entropy     | 0.891    |
| policy_loss        | 0.038    |
| total_timesteps    | 7224075  |
| value_loss         | 0.213    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2052     |
| nupdates           | 2990     |
| policy_entropy     | 0.955    |
| policy_loss        | -0.0894  |
| total_timesteps    | 7248325  |
| value_loss         | 0.285    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps         

---------------------------------
| explained_variance | 0.985    |
| fps                | 2051     |
| nupdates           | 3240     |
| policy_entropy     | 0.899    |
| policy_loss        | 0.0635   |
| total_timesteps    | 7854575  |
| value_loss         | 0.244    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2051     |
| nupdates           | 3250     |
| policy_entropy     | 0.883    |
| policy_loss        | 0.0315   |
| total_timesteps    | 7878825  |
| value_loss         | 0.208    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2051     |
| nupdates           | 3260     |
| policy_entropy     | 0.908    |
| policy_loss        | -0.109   |
| total_timesteps    | 7903075  |
| value_loss         | 0.469    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.964    |
| fps                | 2048     |
| nupdates           | 3510     |
| policy_entropy     | 0.935    |
| policy_loss        | -0.0426  |
| total_timesteps    | 8509325  |
| value_loss         | 0.454    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2048     |
| nupdates           | 3520     |
| policy_entropy     | 0.952    |
| policy_loss        | -0.0268  |
| total_timesteps    | 8533575  |
| value_loss         | 0.36     |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2048     |
| nupdates           | 3530     |
| policy_entropy     | 0.88     |
| policy_loss        | 0.0371   |
| total_timesteps    | 8557825  |
| value_loss         | 0.184    |
---------------------------------
---------------------------------
| explained_variance | 0.955    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 2042     |
| nupdates           | 3780     |
| policy_entropy     | 0.871    |
| policy_loss        | 0.00308  |
| total_timesteps    | 9164075  |
| value_loss         | 0.241    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2041     |
| nupdates           | 3790     |
| policy_entropy     | 0.887    |
| policy_loss        | -0.0345  |
| total_timesteps    | 9188325  |
| value_loss         | 0.441    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2041     |
| nupdates           | 3800     |
| policy_entropy     | 0.817    |
| policy_loss        | 0.125    |
| total_timesteps    | 9212575  |
| value_loss         | 0.22     |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2031     |
| nupdates           | 4050     |
| policy_entropy     | 0.874    |
| policy_loss        | -0.00137 |
| total_timesteps    | 9818825  |
| value_loss         | 0.224    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2031     |
| nupdates           | 4060     |
| policy_entropy     | 0.909    |
| policy_loss        | -0.0871  |
| total_timesteps    | 9843075  |
| value_loss         | 0.44     |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2030     |
| nupdates           | 4070     |
| policy_entropy     | 0.882    |
| policy_loss        | -0.013   |
| total_timesteps    | 9867325  |
| value_loss         | 0.262    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.954    |
| fps                | 1538     |
| nupdates           | 1        |
| policy_entropy     | 0.886    |
| policy_loss        | -0.147   |
| total_timesteps    | 0        |
| value_loss         | 0.423    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2567     |
| nupdates           | 10       |
| policy_entropy     | 0.836    |
| policy_loss        | 0.261    |
| total_timesteps    | 21825    |
| value_loss         | 0.433    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps                | 2243     |
| nupdates           | 20       |
| policy_entropy     | 0.829    |
| policy_loss        | -1.32    |
| total_timesteps    | 46075    |
| value_loss         | 3.14     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.977    |
| fps                | 2026     |
| nupdates           | 270      |
| policy_entropy     | 0.891    |
| policy_loss        | -0.00171 |
| total_timesteps    | 652325   |
| value_loss         | 0.333    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps                | 2026     |
| nupdates           | 280      |
| policy_entropy     | 0.811    |
| policy_loss        | 0.00318  |
| total_timesteps    | 676575   |
| value_loss         | 0.36     |
---------------------------------
---------------------------------
| explained_variance | 0.943    |
| fps                | 2026     |
| nupdates           | 290      |
| policy_entropy     | 0.916    |
| policy_loss        | -0.148   |
| total_timesteps    | 700825   |
| value_loss         | 0.848    |
---------------------------------
---------------------------------
| explained_variance | 0.941    |
| fps         

---------------------------------
| explained_variance | 0.984    |
| fps                | 2026     |
| nupdates           | 540      |
| policy_entropy     | 0.959    |
| policy_loss        | -0.0815  |
| total_timesteps    | 1307075  |
| value_loss         | 0.257    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps                | 2026     |
| nupdates           | 550      |
| policy_entropy     | 0.852    |
| policy_loss        | 0.101    |
| total_timesteps    | 1331325  |
| value_loss         | 0.0926   |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2027     |
| nupdates           | 560      |
| policy_entropy     | 0.901    |
| policy_loss        | -0.00802 |
| total_timesteps    | 1355575  |
| value_loss         | 0.196    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.979    |
| fps                | 2030     |
| nupdates           | 810      |
| policy_entropy     | 0.916    |
| policy_loss        | 0.131    |
| total_timesteps    | 1961825  |
| value_loss         | 0.353    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2030     |
| nupdates           | 820      |
| policy_entropy     | 0.942    |
| policy_loss        | -0.236   |
| total_timesteps    | 1986075  |
| value_loss         | 0.647    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps                | 2030     |
| nupdates           | 830      |
| policy_entropy     | 0.83     |
| policy_loss        | -0.125   |
| total_timesteps    | 2010325  |
| value_loss         | 0.775    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.957    |
| fps                | 2034     |
| nupdates           | 1080     |
| policy_entropy     | 0.885    |
| policy_loss        | -0.0691  |
| total_timesteps    | 2616575  |
| value_loss         | 0.622    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2034     |
| nupdates           | 1090     |
| policy_entropy     | 0.824    |
| policy_loss        | 0.108    |
| total_timesteps    | 2640825  |
| value_loss         | 0.287    |
---------------------------------
---------------------------------
| explained_variance | 0.943    |
| fps                | 2034     |
| nupdates           | 1100     |
| policy_entropy     | 0.879    |
| policy_loss        | -0.0966  |
| total_timesteps    | 2665075  |
| value_loss         | 0.603    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps         

---------------------------------
| explained_variance | 0.924    |
| fps                | 2038     |
| nupdates           | 1350     |
| policy_entropy     | 0.86     |
| policy_loss        | -0.124   |
| total_timesteps    | 3271325  |
| value_loss         | 0.671    |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps                | 2038     |
| nupdates           | 1360     |
| policy_entropy     | 0.891    |
| policy_loss        | -0.118   |
| total_timesteps    | 3295575  |
| value_loss         | 0.584    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2038     |
| nupdates           | 1370     |
| policy_entropy     | 0.882    |
| policy_loss        | 0.0748   |
| total_timesteps    | 3319825  |
| value_loss         | 0.238    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps         

---------------------------------
| explained_variance | 0.972    |
| fps                | 2042     |
| nupdates           | 1620     |
| policy_entropy     | 0.885    |
| policy_loss        | -0.0858  |
| total_timesteps    | 3926075  |
| value_loss         | 0.332    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2042     |
| nupdates           | 1630     |
| policy_entropy     | 0.926    |
| policy_loss        | -0.0962  |
| total_timesteps    | 3950325  |
| value_loss         | 0.309    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2042     |
| nupdates           | 1640     |
| policy_entropy     | 0.951    |
| policy_loss        | 0.0442   |
| total_timesteps    | 3974575  |
| value_loss         | 0.435    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps         

---------------------------------
| explained_variance | 0.993    |
| fps                | 2045     |
| nupdates           | 1890     |
| policy_entropy     | 0.873    |
| policy_loss        | 0.0772   |
| total_timesteps    | 4580825  |
| value_loss         | 0.14     |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2045     |
| nupdates           | 1900     |
| policy_entropy     | 0.873    |
| policy_loss        | 0.0699   |
| total_timesteps    | 4605075  |
| value_loss         | 0.23     |
---------------------------------
---------------------------------
| explained_variance | 0.993    |
| fps                | 2045     |
| nupdates           | 1910     |
| policy_entropy     | 0.911    |
| policy_loss        | 0.169    |
| total_timesteps    | 4629325  |
| value_loss         | 0.191    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2047     |
| nupdates           | 2160     |
| policy_entropy     | 0.903    |
| policy_loss        | -0.00567 |
| total_timesteps    | 5235575  |
| value_loss         | 0.31     |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2048     |
| nupdates           | 2170     |
| policy_entropy     | 0.893    |
| policy_loss        | 0.0683   |
| total_timesteps    | 5259825  |
| value_loss         | 0.206    |
---------------------------------
---------------------------------
| explained_variance | 0.996    |
| fps                | 2048     |
| nupdates           | 2180     |
| policy_entropy     | 0.896    |
| policy_loss        | 0.0901   |
| total_timesteps    | 5284075  |
| value_loss         | 0.087    |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps         

---------------------------------
| explained_variance | 0.974    |
| fps                | 2049     |
| nupdates           | 2430     |
| policy_entropy     | 0.854    |
| policy_loss        | -0.0448  |
| total_timesteps    | 5890325  |
| value_loss         | 0.374    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2049     |
| nupdates           | 2440     |
| policy_entropy     | 0.926    |
| policy_loss        | 0.0376   |
| total_timesteps    | 5914575  |
| value_loss         | 0.29     |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2049     |
| nupdates           | 2450     |
| policy_entropy     | 0.906    |
| policy_loss        | -0.043   |
| total_timesteps    | 5938825  |
| value_loss         | 0.309    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2050     |
| nupdates           | 2700     |
| policy_entropy     | 0.841    |
| policy_loss        | -0.072   |
| total_timesteps    | 6545075  |
| value_loss         | 0.241    |
---------------------------------
---------------------------------
| explained_variance | 0.948    |
| fps                | 2050     |
| nupdates           | 2710     |
| policy_entropy     | 0.829    |
| policy_loss        | 0.0183   |
| total_timesteps    | 6569325  |
| value_loss         | 0.567    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2050     |
| nupdates           | 2720     |
| policy_entropy     | 0.846    |
| policy_loss        | -0.0228  |
| total_timesteps    | 6593575  |
| value_loss         | 0.465    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.978    |
| fps                | 2050     |
| nupdates           | 2970     |
| policy_entropy     | 0.826    |
| policy_loss        | 0.0362   |
| total_timesteps    | 7199825  |
| value_loss         | 0.293    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2050     |
| nupdates           | 2980     |
| policy_entropy     | 0.877    |
| policy_loss        | -0.0207  |
| total_timesteps    | 7224075  |
| value_loss         | 0.287    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps                | 2050     |
| nupdates           | 2990     |
| policy_entropy     | 0.837    |
| policy_loss        | 0.0389   |
| total_timesteps    | 7248325  |
| value_loss         | 0.117    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.927    |
| fps                | 2049     |
| nupdates           | 3240     |
| policy_entropy     | 0.898    |
| policy_loss        | -0.169   |
| total_timesteps    | 7854575  |
| value_loss         | 0.824    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2049     |
| nupdates           | 3250     |
| policy_entropy     | 0.867    |
| policy_loss        | 0.143    |
| total_timesteps    | 7878825  |
| value_loss         | 0.223    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2049     |
| nupdates           | 3260     |
| policy_entropy     | 0.841    |
| policy_loss        | 0.00252  |
| total_timesteps    | 7903075  |
| value_loss         | 0.177    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps         

---------------------------------
| explained_variance | 0.989    |
| fps                | 2046     |
| nupdates           | 3510     |
| policy_entropy     | 0.932    |
| policy_loss        | 0.0244   |
| total_timesteps    | 8509325  |
| value_loss         | 0.174    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2045     |
| nupdates           | 3520     |
| policy_entropy     | 0.924    |
| policy_loss        | 0.054    |
| total_timesteps    | 8533575  |
| value_loss         | 0.241    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2045     |
| nupdates           | 3530     |
| policy_entropy     | 0.899    |
| policy_loss        | 0.087    |
| total_timesteps    | 8557825  |
| value_loss         | 0.203    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps         

---------------------------------
| explained_variance | 0.972    |
| fps                | 2040     |
| nupdates           | 3780     |
| policy_entropy     | 0.895    |
| policy_loss        | 0.0238   |
| total_timesteps    | 9164075  |
| value_loss         | 0.414    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2039     |
| nupdates           | 3790     |
| policy_entropy     | 0.924    |
| policy_loss        | -0.0033  |
| total_timesteps    | 9188325  |
| value_loss         | 0.281    |
---------------------------------
----------------------------------
| explained_variance | 0.971     |
| fps                | 2039      |
| nupdates           | 3800      |
| policy_entropy     | 0.935     |
| policy_loss        | -0.000767 |
| total_timesteps    | 9212575   |
| value_loss         | 0.35      |
----------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps

---------------------------------
| explained_variance | 0.959    |
| fps                | 2031     |
| nupdates           | 4050     |
| policy_entropy     | 0.889    |
| policy_loss        | -0.185   |
| total_timesteps    | 9818825  |
| value_loss         | 0.622    |
---------------------------------
---------------------------------
| explained_variance | 0.95     |
| fps                | 2030     |
| nupdates           | 4060     |
| policy_entropy     | 0.864    |
| policy_loss        | -0.0311  |
| total_timesteps    | 9843075  |
| value_loss         | 0.461    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps                | 2030     |
| nupdates           | 4070     |
| policy_entropy     | 0.961    |
| policy_loss        | 0.113    |
| total_timesteps    | 9867325  |
| value_loss         | 0.11     |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.966    |
| fps                | 1541     |
| nupdates           | 1        |
| policy_entropy     | 0.913    |
| policy_loss        | -0.00244 |
| total_timesteps    | 0        |
| value_loss         | 0.418    |
---------------------------------
---------------------------------
| explained_variance | 0.94     |
| fps                | 2572     |
| nupdates           | 10       |
| policy_entropy     | 0.719    |
| policy_loss        | -0.231   |
| total_timesteps    | 21825    |
| value_loss         | 1.13     |
---------------------------------
---------------------------------
| explained_variance | 0.899    |
| fps                | 2250     |
| nupdates           | 20       |
| policy_entropy     | 0.699    |
| policy_loss        | 2.39     |
| total_timesteps    | 46075    |
| value_loss         | 14.9     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.98     |
| fps                | 2030     |
| nupdates           | 270      |
| policy_entropy     | 0.829    |
| policy_loss        | 0.0419   |
| total_timesteps    | 652325   |
| value_loss         | 0.33     |
---------------------------------
---------------------------------
| explained_variance | 0.932    |
| fps                | 2030     |
| nupdates           | 280      |
| policy_entropy     | 0.866    |
| policy_loss        | -0.0724  |
| total_timesteps    | 676575   |
| value_loss         | 0.762    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2029     |
| nupdates           | 290      |
| policy_entropy     | 0.857    |
| policy_loss        | 0.0496   |
| total_timesteps    | 700825   |
| value_loss         | 0.139    |
---------------------------------
----------------------------------
| explained_variance | 0.975     |
| fps       

---------------------------------
| explained_variance | 0.981    |
| fps                | 2029     |
| nupdates           | 540      |
| policy_entropy     | 0.897    |
| policy_loss        | 0.0582   |
| total_timesteps    | 1307075  |
| value_loss         | 0.201    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2029     |
| nupdates           | 550      |
| policy_entropy     | 0.856    |
| policy_loss        | 0.0213   |
| total_timesteps    | 1331325  |
| value_loss         | 0.21     |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2029     |
| nupdates           | 560      |
| policy_entropy     | 0.873    |
| policy_loss        | -0.0207  |
| total_timesteps    | 1355575  |
| value_loss         | 0.192    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps         

---------------------------------
| explained_variance | 0.984    |
| fps                | 2032     |
| nupdates           | 810      |
| policy_entropy     | 0.836    |
| policy_loss        | 0.0628   |
| total_timesteps    | 1961825  |
| value_loss         | 0.293    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2032     |
| nupdates           | 820      |
| policy_entropy     | 0.813    |
| policy_loss        | -0.0086  |
| total_timesteps    | 1986075  |
| value_loss         | 0.384    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2033     |
| nupdates           | 830      |
| policy_entropy     | 0.803    |
| policy_loss        | 0.101    |
| total_timesteps    | 2010325  |
| value_loss         | 0.307    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps         

---------------------------------
| explained_variance | 0.964    |
| fps                | 2036     |
| nupdates           | 1080     |
| policy_entropy     | 0.876    |
| policy_loss        | -0.129   |
| total_timesteps    | 2616575  |
| value_loss         | 0.475    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2036     |
| nupdates           | 1090     |
| policy_entropy     | 0.942    |
| policy_loss        | -0.0521  |
| total_timesteps    | 2640825  |
| value_loss         | 0.256    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2036     |
| nupdates           | 1100     |
| policy_entropy     | 0.912    |
| policy_loss        | -0.00308 |
| total_timesteps    | 2665075  |
| value_loss         | 0.343    |
---------------------------------
---------------------------------
| explained_variance | 0.945    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2040     |
| nupdates           | 1350     |
| policy_entropy     | 0.804    |
| policy_loss        | 0.0104   |
| total_timesteps    | 3271325  |
| value_loss         | 0.312    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2040     |
| nupdates           | 1360     |
| policy_entropy     | 0.831    |
| policy_loss        | 0.0747   |
| total_timesteps    | 3295575  |
| value_loss         | 0.274    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2040     |
| nupdates           | 1370     |
| policy_entropy     | 0.84     |
| policy_loss        | 0.0273   |
| total_timesteps    | 3319825  |
| value_loss         | 0.164    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps         

---------------------------------
| explained_variance | 0.989    |
| fps                | 2044     |
| nupdates           | 1620     |
| policy_entropy     | 0.837    |
| policy_loss        | 0.18     |
| total_timesteps    | 3926075  |
| value_loss         | 0.172    |
---------------------------------
---------------------------------
| explained_variance | 0.944    |
| fps                | 2044     |
| nupdates           | 1630     |
| policy_entropy     | 0.881    |
| policy_loss        | -0.132   |
| total_timesteps    | 3950325  |
| value_loss         | 0.772    |
---------------------------------
---------------------------------
| explained_variance | 0.936    |
| fps                | 2044     |
| nupdates           | 1640     |
| policy_entropy     | 0.892    |
| policy_loss        | -0.2     |
| total_timesteps    | 3974575  |
| value_loss         | 0.744    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.978    |
| fps                | 2047     |
| nupdates           | 1890     |
| policy_entropy     | 0.879    |
| policy_loss        | 0.017    |
| total_timesteps    | 4580825  |
| value_loss         | 0.384    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2047     |
| nupdates           | 1900     |
| policy_entropy     | 0.892    |
| policy_loss        | -0.0554  |
| total_timesteps    | 4605075  |
| value_loss         | 0.347    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2047     |
| nupdates           | 1910     |
| policy_entropy     | 0.89     |
| policy_loss        | -0.0382  |
| total_timesteps    | 4629325  |
| value_loss         | 0.308    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps         

---------------------------------
| explained_variance | 0.962    |
| fps                | 2049     |
| nupdates           | 2160     |
| policy_entropy     | 0.847    |
| policy_loss        | -0.0606  |
| total_timesteps    | 5235575  |
| value_loss         | 0.587    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2049     |
| nupdates           | 2170     |
| policy_entropy     | 0.954    |
| policy_loss        | -0.0886  |
| total_timesteps    | 5259825  |
| value_loss         | 0.334    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps                | 2050     |
| nupdates           | 2180     |
| policy_entropy     | 0.815    |
| policy_loss        | 0.111    |
| total_timesteps    | 5284075  |
| value_loss         | 0.126    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps         

---------------------------------
| explained_variance | 0.979    |
| fps                | 2051     |
| nupdates           | 2430     |
| policy_entropy     | 0.837    |
| policy_loss        | 0.08     |
| total_timesteps    | 5890325  |
| value_loss         | 0.348    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2051     |
| nupdates           | 2440     |
| policy_entropy     | 0.832    |
| policy_loss        | 0.0106   |
| total_timesteps    | 5914575  |
| value_loss         | 0.205    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2051     |
| nupdates           | 2450     |
| policy_entropy     | 0.843    |
| policy_loss        | -0.0605  |
| total_timesteps    | 5938825  |
| value_loss         | 0.392    |
---------------------------------
---------------------------------
| explained_variance | 0.912    |
| fps         

---------------------------------
| explained_variance | 0.956    |
| fps                | 2051     |
| nupdates           | 2700     |
| policy_entropy     | 0.826    |
| policy_loss        | -0.0786  |
| total_timesteps    | 6545075  |
| value_loss         | 0.48     |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2051     |
| nupdates           | 2710     |
| policy_entropy     | 0.852    |
| policy_loss        | 0.0086   |
| total_timesteps    | 6569325  |
| value_loss         | 0.266    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2051     |
| nupdates           | 2720     |
| policy_entropy     | 0.881    |
| policy_loss        | -0.0108  |
| total_timesteps    | 6593575  |
| value_loss         | 0.326    |
---------------------------------
---------------------------------
| explained_variance | 0.944    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2050     |
| nupdates           | 2970     |
| policy_entropy     | 0.855    |
| policy_loss        | 0.0504   |
| total_timesteps    | 7199825  |
| value_loss         | 0.238    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2050     |
| nupdates           | 2980     |
| policy_entropy     | 0.892    |
| policy_loss        | 0.0324   |
| total_timesteps    | 7224075  |
| value_loss         | 0.15     |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2049     |
| nupdates           | 2990     |
| policy_entropy     | 0.856    |
| policy_loss        | -0.137   |
| total_timesteps    | 7248325  |
| value_loss         | 0.453    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.989    |
| fps                | 2045     |
| nupdates           | 3240     |
| policy_entropy     | 0.864    |
| policy_loss        | 0.127    |
| total_timesteps    | 7854575  |
| value_loss         | 0.165    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2045     |
| nupdates           | 3250     |
| policy_entropy     | 0.917    |
| policy_loss        | 0.174    |
| total_timesteps    | 7878825  |
| value_loss         | 0.359    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2045     |
| nupdates           | 3260     |
| policy_entropy     | 0.978    |
| policy_loss        | 0.00495  |
| total_timesteps    | 7903075  |
| value_loss         | 0.166    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 2038     |
| nupdates           | 3510     |
| policy_entropy     | 0.931    |
| policy_loss        | 0.0224   |
| total_timesteps    | 8509325  |
| value_loss         | 0.274    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2037     |
| nupdates           | 3520     |
| policy_entropy     | 0.959    |
| policy_loss        | -0.177   |
| total_timesteps    | 8533575  |
| value_loss         | 0.493    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps                | 2037     |
| nupdates           | 3530     |
| policy_entropy     | 0.894    |
| policy_loss        | -0.0963  |
| total_timesteps    | 8557825  |
| value_loss         | 0.542    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2026     |
| nupdates           | 3780     |
| policy_entropy     | 0.882    |
| policy_loss        | -0.0605  |
| total_timesteps    | 9164075  |
| value_loss         | 0.331    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2026     |
| nupdates           | 3790     |
| policy_entropy     | 1.02     |
| policy_loss        | -0.0347  |
| total_timesteps    | 9188325  |
| value_loss         | 0.238    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2025     |
| nupdates           | 3800     |
| policy_entropy     | 0.874    |
| policy_loss        | 0.0824   |
| total_timesteps    | 9212575  |
| value_loss         | 0.168    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

---------------------------------
| explained_variance | 0.963    |
| fps                | 2011     |
| nupdates           | 4050     |
| policy_entropy     | 0.875    |
| policy_loss        | -0.0244  |
| total_timesteps    | 9818825  |
| value_loss         | 0.309    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2010     |
| nupdates           | 4060     |
| policy_entropy     | 0.823    |
| policy_loss        | 0.0742   |
| total_timesteps    | 9843075  |
| value_loss         | 0.18     |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2010     |
| nupdates           | 4070     |
| policy_entropy     | 0.846    |
| policy_loss        | 0.0939   |
| total_timesteps    | 9867325  |
| value_loss         | 0.271    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
----------------------------------
| explained_variance | 0.978     |
| fps                | 1557      |
| nupdates           | 1         |
| policy_entropy     | 0.927     |
| policy_loss        | -0.000273 |
| total_timesteps    | 0         |
| value_loss         | 0.291     |
----------------------------------
---------------------------------
| explained_variance | 0.951    |
| fps                | 2580     |
| nupdates           | 10       |
| policy_entropy     | 0.623    |
| policy_loss        | -0.572   |
| total_timesteps    | 21825    |
| value_loss         | 1.97     |
---------------------------------
---------------------------------
| explained_variance | 0.953    |
| fps                | 2246     |
| nupdates           | 20       |
| policy_entropy     | 0.558    |
| policy_loss        | 0.769    |
| total_timesteps    | 46075    |
| value_loss         | 2.66     |
---------------------------------
---

---------------------------------
| explained_variance | 0.99     |
| fps                | 2020     |
| nupdates           | 270      |
| policy_entropy     | 0.831    |
| policy_loss        | 0.0186   |
| total_timesteps    | 652325   |
| value_loss         | 0.174    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2020     |
| nupdates           | 280      |
| policy_entropy     | 0.722    |
| policy_loss        | 0.0942   |
| total_timesteps    | 676575   |
| value_loss         | 0.2      |
---------------------------------
---------------------------------
| explained_variance | 0.954    |
| fps                | 2020     |
| nupdates           | 290      |
| policy_entropy     | 0.873    |
| policy_loss        | -0.167   |
| total_timesteps    | 700825   |
| value_loss         | 0.657    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps         

---------------------------------
| explained_variance | 0.945    |
| fps                | 2021     |
| nupdates           | 540      |
| policy_entropy     | 0.848    |
| policy_loss        | -0.152   |
| total_timesteps    | 1307075  |
| value_loss         | 0.851    |
---------------------------------
---------------------------------
| explained_variance | 0.917    |
| fps                | 2021     |
| nupdates           | 550      |
| policy_entropy     | 0.814    |
| policy_loss        | -0.0469  |
| total_timesteps    | 1331325  |
| value_loss         | 0.761    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2021     |
| nupdates           | 560      |
| policy_entropy     | 0.818    |
| policy_loss        | -0.0851  |
| total_timesteps    | 1355575  |
| value_loss         | 0.278    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.952    |
| fps                | 2027     |
| nupdates           | 810      |
| policy_entropy     | 0.845    |
| policy_loss        | 0.0324   |
| total_timesteps    | 1961825  |
| value_loss         | 0.601    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2027     |
| nupdates           | 820      |
| policy_entropy     | 0.809    |
| policy_loss        | 0.0222   |
| total_timesteps    | 1986075  |
| value_loss         | 0.183    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2027     |
| nupdates           | 830      |
| policy_entropy     | 0.794    |
| policy_loss        | -0.0942  |
| total_timesteps    | 2010325  |
| value_loss         | 0.293    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps         

---------------------------------
| explained_variance | 0.981    |
| fps                | 2031     |
| nupdates           | 1080     |
| policy_entropy     | 0.851    |
| policy_loss        | 0.0386   |
| total_timesteps    | 2616575  |
| value_loss         | 0.255    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2031     |
| nupdates           | 1090     |
| policy_entropy     | 0.838    |
| policy_loss        | 0.0459   |
| total_timesteps    | 2640825  |
| value_loss         | 0.378    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2032     |
| nupdates           | 1100     |
| policy_entropy     | 0.827    |
| policy_loss        | -0.0212  |
| total_timesteps    | 2665075  |
| value_loss         | 0.375    |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps         

---------------------------------
| explained_variance | 0.971    |
| fps                | 2035     |
| nupdates           | 1350     |
| policy_entropy     | 0.818    |
| policy_loss        | 0.0201   |
| total_timesteps    | 3271325  |
| value_loss         | 0.434    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2035     |
| nupdates           | 1360     |
| policy_entropy     | 0.879    |
| policy_loss        | 0.0132   |
| total_timesteps    | 3295575  |
| value_loss         | 0.253    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2035     |
| nupdates           | 1370     |
| policy_entropy     | 0.803    |
| policy_loss        | 0.0253   |
| total_timesteps    | 3319825  |
| value_loss         | 0.201    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.942    |
| fps                | 2039     |
| nupdates           | 1620     |
| policy_entropy     | 0.952    |
| policy_loss        | -0.0243  |
| total_timesteps    | 3926075  |
| value_loss         | 0.608    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2039     |
| nupdates           | 1630     |
| policy_entropy     | 0.868    |
| policy_loss        | -0.114   |
| total_timesteps    | 3950325  |
| value_loss         | 0.407    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2039     |
| nupdates           | 1640     |
| policy_entropy     | 0.877    |
| policy_loss        | 0.0576   |
| total_timesteps    | 3974575  |
| value_loss         | 0.281    |
---------------------------------
---------------------------------
| explained_variance | 0.948    |
| fps         

---------------------------------
| explained_variance | 0.968    |
| fps                | 2041     |
| nupdates           | 1890     |
| policy_entropy     | 0.898    |
| policy_loss        | -0.109   |
| total_timesteps    | 4580825  |
| value_loss         | 0.588    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps                | 2041     |
| nupdates           | 1900     |
| policy_entropy     | 0.944    |
| policy_loss        | -0.0104  |
| total_timesteps    | 4605075  |
| value_loss         | 0.46     |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2041     |
| nupdates           | 1910     |
| policy_entropy     | 0.888    |
| policy_loss        | -0.0364  |
| total_timesteps    | 4629325  |
| value_loss         | 0.355    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps         

---------------------------------
| explained_variance | 0.958    |
| fps                | 2043     |
| nupdates           | 2160     |
| policy_entropy     | 0.997    |
| policy_loss        | -0.153   |
| total_timesteps    | 5235575  |
| value_loss         | 0.62     |
---------------------------------
---------------------------------
| explained_variance | 0.993    |
| fps                | 2044     |
| nupdates           | 2170     |
| policy_entropy     | 0.87     |
| policy_loss        | -0.0167  |
| total_timesteps    | 5259825  |
| value_loss         | 0.124    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2044     |
| nupdates           | 2180     |
| policy_entropy     | 0.845    |
| policy_loss        | 0.0403   |
| total_timesteps    | 5284075  |
| value_loss         | 0.421    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps         

---------------------------------
| explained_variance | 0.976    |
| fps                | 2045     |
| nupdates           | 2430     |
| policy_entropy     | 0.902    |
| policy_loss        | -0.0255  |
| total_timesteps    | 5890325  |
| value_loss         | 0.337    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2045     |
| nupdates           | 2440     |
| policy_entropy     | 0.951    |
| policy_loss        | 0.0485   |
| total_timesteps    | 5914575  |
| value_loss         | 0.282    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2046     |
| nupdates           | 2450     |
| policy_entropy     | 0.887    |
| policy_loss        | -0.13    |
| total_timesteps    | 5938825  |
| value_loss         | 0.335    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps         

---------------------------------
| explained_variance | 0.962    |
| fps                | 2046     |
| nupdates           | 2700     |
| policy_entropy     | 0.888    |
| policy_loss        | -0.18    |
| total_timesteps    | 6545075  |
| value_loss         | 0.644    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2046     |
| nupdates           | 2710     |
| policy_entropy     | 0.921    |
| policy_loss        | 0.0521   |
| total_timesteps    | 6569325  |
| value_loss         | 0.243    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2046     |
| nupdates           | 2720     |
| policy_entropy     | 0.88     |
| policy_loss        | 0.016    |
| total_timesteps    | 6593575  |
| value_loss         | 0.239    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.985    |
| fps                | 2046     |
| nupdates           | 2970     |
| policy_entropy     | 0.993    |
| policy_loss        | -0.0277  |
| total_timesteps    | 7199825  |
| value_loss         | 0.288    |
---------------------------------
---------------------------------
| explained_variance | 0.955    |
| fps                | 2046     |
| nupdates           | 2980     |
| policy_entropy     | 0.94     |
| policy_loss        | 0.0478   |
| total_timesteps    | 7224075  |
| value_loss         | 0.531    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2046     |
| nupdates           | 2990     |
| policy_entropy     | 0.918    |
| policy_loss        | -0.0362  |
| total_timesteps    | 7248325  |
| value_loss         | 0.258    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.979    |
| fps                | 2043     |
| nupdates           | 3240     |
| policy_entropy     | 0.902    |
| policy_loss        | 0.0521   |
| total_timesteps    | 7854575  |
| value_loss         | 0.296    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2043     |
| nupdates           | 3250     |
| policy_entropy     | 0.998    |
| policy_loss        | -0.0714  |
| total_timesteps    | 7878825  |
| value_loss         | 0.27     |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2043     |
| nupdates           | 3260     |
| policy_entropy     | 0.904    |
| policy_loss        | -0.0685  |
| total_timesteps    | 7903075  |
| value_loss         | 0.377    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.974    |
| fps                | 2038     |
| nupdates           | 3510     |
| policy_entropy     | 0.969    |
| policy_loss        | -0.0929  |
| total_timesteps    | 8509325  |
| value_loss         | 0.362    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2037     |
| nupdates           | 3520     |
| policy_entropy     | 0.948    |
| policy_loss        | 0.0894   |
| total_timesteps    | 8533575  |
| value_loss         | 0.168    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2037     |
| nupdates           | 3530     |
| policy_entropy     | 0.927    |
| policy_loss        | 0.0204   |
| total_timesteps    | 8557825  |
| value_loss         | 0.191    |
---------------------------------
---------------------------------
| explained_variance | 0.944    |
| fps         

---------------------------------
| explained_variance | 0.976    |
| fps                | 2029     |
| nupdates           | 3780     |
| policy_entropy     | 0.965    |
| policy_loss        | 0.0227   |
| total_timesteps    | 9164075  |
| value_loss         | 0.266    |
---------------------------------
---------------------------------
| explained_variance | 0.957    |
| fps                | 2028     |
| nupdates           | 3790     |
| policy_entropy     | 0.896    |
| policy_loss        | -0.131   |
| total_timesteps    | 9188325  |
| value_loss         | 0.455    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2028     |
| nupdates           | 3800     |
| policy_entropy     | 0.902    |
| policy_loss        | 0.115    |
| total_timesteps    | 9212575  |
| value_loss         | 0.193    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.993    |
| fps                | 2015     |
| nupdates           | 4050     |
| policy_entropy     | 0.894    |
| policy_loss        | 0.103    |
| total_timesteps    | 9818825  |
| value_loss         | 0.14     |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2015     |
| nupdates           | 4060     |
| policy_entropy     | 0.904    |
| policy_loss        | 0.0614   |
| total_timesteps    | 9843075  |
| value_loss         | 0.251    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2014     |
| nupdates           | 4070     |
| policy_entropy     | 0.998    |
| policy_loss        | -0.11    |
| total_timesteps    | 9867325  |
| value_loss         | 0.283    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.971    |
| fps                | 1548     |
| nupdates           | 1        |
| policy_entropy     | 0.866    |
| policy_loss        | 0.0206   |
| total_timesteps    | 0        |
| value_loss         | 0.295    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2591     |
| nupdates           | 10       |
| policy_entropy     | 0.746    |
| policy_loss        | -0.443   |
| total_timesteps    | 21825    |
| value_loss         | 0.8      |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2246     |
| nupdates           | 20       |
| policy_entropy     | 0.773    |
| policy_loss        | 1.13     |
| total_timesteps    | 46075    |
| value_loss         | 3.07     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.969    |
| fps                | 2037     |
| nupdates           | 270      |
| policy_entropy     | 0.874    |
| policy_loss        | -0.0518  |
| total_timesteps    | 652325   |
| value_loss         | 0.375    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2037     |
| nupdates           | 280      |
| policy_entropy     | 0.867    |
| policy_loss        | -0.0163  |
| total_timesteps    | 676575   |
| value_loss         | 0.343    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2037     |
| nupdates           | 290      |
| policy_entropy     | 0.842    |
| policy_loss        | -0.0337  |
| total_timesteps    | 700825   |
| value_loss         | 0.372    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps         

---------------------------------
| explained_variance | 0.993    |
| fps                | 2040     |
| nupdates           | 540      |
| policy_entropy     | 0.751    |
| policy_loss        | 0.0618   |
| total_timesteps    | 1307075  |
| value_loss         | 0.145    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2040     |
| nupdates           | 550      |
| policy_entropy     | 0.772    |
| policy_loss        | 9.57e-05 |
| total_timesteps    | 1331325  |
| value_loss         | 0.424    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2040     |
| nupdates           | 560      |
| policy_entropy     | 0.81     |
| policy_loss        | -0.0313  |
| total_timesteps    | 1355575  |
| value_loss         | 0.307    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps         

---------------------------------
| explained_variance | 0.964    |
| fps                | 2043     |
| nupdates           | 810      |
| policy_entropy     | 0.939    |
| policy_loss        | -0.12    |
| total_timesteps    | 1961825  |
| value_loss         | 0.515    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2043     |
| nupdates           | 820      |
| policy_entropy     | 0.863    |
| policy_loss        | -0.0288  |
| total_timesteps    | 1986075  |
| value_loss         | 0.406    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2043     |
| nupdates           | 830      |
| policy_entropy     | 0.865    |
| policy_loss        | 0.052    |
| total_timesteps    | 2010325  |
| value_loss         | 0.239    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.976    |
| fps                | 2046     |
| nupdates           | 1080     |
| policy_entropy     | 0.886    |
| policy_loss        | -0.0948  |
| total_timesteps    | 2616575  |
| value_loss         | 0.406    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2046     |
| nupdates           | 1090     |
| policy_entropy     | 0.876    |
| policy_loss        | -0.009   |
| total_timesteps    | 2640825  |
| value_loss         | 0.294    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2046     |
| nupdates           | 1100     |
| policy_entropy     | 0.888    |
| policy_loss        | 0.00396  |
| total_timesteps    | 2665075  |
| value_loss         | 0.319    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 2049     |
| nupdates           | 1350     |
| policy_entropy     | 0.89     |
| policy_loss        | 0.0789   |
| total_timesteps    | 3271325  |
| value_loss         | 0.152    |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps                | 2049     |
| nupdates           | 1360     |
| policy_entropy     | 0.849    |
| policy_loss        | -0.0413  |
| total_timesteps    | 3295575  |
| value_loss         | 0.539    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2049     |
| nupdates           | 1370     |
| policy_entropy     | 0.915    |
| policy_loss        | -0.0793  |
| total_timesteps    | 3319825  |
| value_loss         | 0.362    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps         

---------------------------------
| explained_variance | 0.937    |
| fps                | 2052     |
| nupdates           | 1620     |
| policy_entropy     | 0.88     |
| policy_loss        | -0.2     |
| total_timesteps    | 3926075  |
| value_loss         | 0.716    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps                | 2052     |
| nupdates           | 1630     |
| policy_entropy     | 0.979    |
| policy_loss        | -0.111   |
| total_timesteps    | 3950325  |
| value_loss         | 0.367    |
---------------------------------
----------------------------------
| explained_variance | 0.975     |
| fps                | 2052      |
| nupdates           | 1640      |
| policy_entropy     | 0.913     |
| policy_loss        | -0.000992 |
| total_timesteps    | 3974575   |
| value_loss         | 0.258     |
----------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps

---------------------------------
| explained_variance | 0.982    |
| fps                | 2054     |
| nupdates           | 1890     |
| policy_entropy     | 0.951    |
| policy_loss        | -0.0313  |
| total_timesteps    | 4580825  |
| value_loss         | 0.282    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2054     |
| nupdates           | 1900     |
| policy_entropy     | 0.933    |
| policy_loss        | -0.0398  |
| total_timesteps    | 4605075  |
| value_loss         | 0.249    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2054     |
| nupdates           | 1910     |
| policy_entropy     | 0.882    |
| policy_loss        | -0.046   |
| total_timesteps    | 4629325  |
| value_loss         | 0.38     |
---------------------------------
---------------------------------
| explained_variance | 0.96     |
| fps         

---------------------------------
| explained_variance | 0.992    |
| fps                | 2057     |
| nupdates           | 2160     |
| policy_entropy     | 0.841    |
| policy_loss        | 0.0999   |
| total_timesteps    | 5235575  |
| value_loss         | 0.136    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2057     |
| nupdates           | 2170     |
| policy_entropy     | 0.848    |
| policy_loss        | 0.0258   |
| total_timesteps    | 5259825  |
| value_loss         | 0.17     |
---------------------------------
---------------------------------
| explained_variance | 0.948    |
| fps                | 2057     |
| nupdates           | 2180     |
| policy_entropy     | 0.834    |
| policy_loss        | 0.0109   |
| total_timesteps    | 5284075  |
| value_loss         | 0.566    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2059     |
| nupdates           | 2430     |
| policy_entropy     | 0.913    |
| policy_loss        | -0.0309  |
| total_timesteps    | 5890325  |
| value_loss         | 0.246    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2059     |
| nupdates           | 2440     |
| policy_entropy     | 0.831    |
| policy_loss        | 0.0643   |
| total_timesteps    | 5914575  |
| value_loss         | 0.243    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2059     |
| nupdates           | 2450     |
| policy_entropy     | 0.792    |
| policy_loss        | 0.0963   |
| total_timesteps    | 5938825  |
| value_loss         | 0.34     |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2061     |
| nupdates           | 2700     |
| policy_entropy     | 0.858    |
| policy_loss        | -0.0296  |
| total_timesteps    | 6545075  |
| value_loss         | 0.344    |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps                | 2061     |
| nupdates           | 2710     |
| policy_entropy     | 0.859    |
| policy_loss        | -0.115   |
| total_timesteps    | 6569325  |
| value_loss         | 0.409    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2061     |
| nupdates           | 2720     |
| policy_entropy     | 0.874    |
| policy_loss        | -0.0614  |
| total_timesteps    | 6593575  |
| value_loss         | 0.524    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps         

---------------------------------
| explained_variance | 0.99     |
| fps                | 2062     |
| nupdates           | 2970     |
| policy_entropy     | 0.932    |
| policy_loss        | -0.0164  |
| total_timesteps    | 7199825  |
| value_loss         | 0.196    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2062     |
| nupdates           | 2980     |
| policy_entropy     | 0.896    |
| policy_loss        | 0.0577   |
| total_timesteps    | 7224075  |
| value_loss         | 0.321    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2062     |
| nupdates           | 2990     |
| policy_entropy     | 0.903    |
| policy_loss        | -0.103   |
| total_timesteps    | 7248325  |
| value_loss         | 0.339    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2061     |
| nupdates           | 3240     |
| policy_entropy     | 0.908    |
| policy_loss        | -0.00992 |
| total_timesteps    | 7854575  |
| value_loss         | 0.328    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2061     |
| nupdates           | 3250     |
| policy_entropy     | 0.896    |
| policy_loss        | -0.0579  |
| total_timesteps    | 7878825  |
| value_loss         | 0.476    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2061     |
| nupdates           | 3260     |
| policy_entropy     | 0.914    |
| policy_loss        | -0.00171 |
| total_timesteps    | 7903075  |
| value_loss         | 0.248    |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps         

---------------------------------
| explained_variance | 0.957    |
| fps                | 2058     |
| nupdates           | 3510     |
| policy_entropy     | 0.847    |
| policy_loss        | -0.0702  |
| total_timesteps    | 8509325  |
| value_loss         | 0.443    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2058     |
| nupdates           | 3520     |
| policy_entropy     | 0.897    |
| policy_loss        | -0.0287  |
| total_timesteps    | 8533575  |
| value_loss         | 0.279    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2058     |
| nupdates           | 3530     |
| policy_entropy     | 0.908    |
| policy_loss        | -0.0161  |
| total_timesteps    | 8557825  |
| value_loss         | 0.302    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps         

---------------------------------
| explained_variance | 0.952    |
| fps                | 2053     |
| nupdates           | 3780     |
| policy_entropy     | 0.887    |
| policy_loss        | -0.183   |
| total_timesteps    | 9164075  |
| value_loss         | 0.783    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2052     |
| nupdates           | 3790     |
| policy_entropy     | 0.895    |
| policy_loss        | 0.0605   |
| total_timesteps    | 9188325  |
| value_loss         | 0.221    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2052     |
| nupdates           | 3800     |
| policy_entropy     | 0.866    |
| policy_loss        | 0.114    |
| total_timesteps    | 9212575  |
| value_loss         | 0.22     |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 2043     |
| nupdates           | 4050     |
| policy_entropy     | 0.871    |
| policy_loss        | -0.0308  |
| total_timesteps    | 9818825  |
| value_loss         | 0.269    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2042     |
| nupdates           | 4060     |
| policy_entropy     | 0.877    |
| policy_loss        | 0.0736   |
| total_timesteps    | 9843075  |
| value_loss         | 0.232    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2042     |
| nupdates           | 4070     |
| policy_entropy     | 0.939    |
| policy_loss        | -0.0201  |
| total_timesteps    | 9867325  |
| value_loss         | 0.272    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.96     |
| fps                | 1551     |
| nupdates           | 1        |
| policy_entropy     | 0.864    |
| policy_loss        | -0.0382  |
| total_timesteps    | 0        |
| value_loss         | 0.503    |
---------------------------------
---------------------------------
| explained_variance | 0.777    |
| fps                | 2591     |
| nupdates           | 10       |
| policy_entropy     | 0.25     |
| policy_loss        | 0.0574   |
| total_timesteps    | 21825    |
| value_loss         | 2.54     |
---------------------------------
---------------------------------
| explained_variance | 0.918    |
| fps                | 2252     |
| nupdates           | 20       |
| policy_entropy     | 0.249    |
| policy_loss        | -0.388   |
| total_timesteps    | 46075    |
| value_loss         | 2.78     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.974    |
| fps                | 2027     |
| nupdates           | 270      |
| policy_entropy     | 0.584    |
| policy_loss        | -0.0277  |
| total_timesteps    | 652325   |
| value_loss         | 0.362    |
---------------------------------
---------------------------------
| explained_variance | 0.944    |
| fps                | 2027     |
| nupdates           | 280      |
| policy_entropy     | 0.677    |
| policy_loss        | 0.0219   |
| total_timesteps    | 676575   |
| value_loss         | 0.632    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2026     |
| nupdates           | 290      |
| policy_entropy     | 0.698    |
| policy_loss        | -0.00398 |
| total_timesteps    | 700825   |
| value_loss         | 0.449    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

---------------------------------
| explained_variance | 0.931    |
| fps                | 2028     |
| nupdates           | 540      |
| policy_entropy     | 0.935    |
| policy_loss        | 0.0152   |
| total_timesteps    | 1307075  |
| value_loss         | 0.466    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2028     |
| nupdates           | 550      |
| policy_entropy     | 0.906    |
| policy_loss        | 0.0902   |
| total_timesteps    | 1331325  |
| value_loss         | 0.166    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2028     |
| nupdates           | 560      |
| policy_entropy     | 0.872    |
| policy_loss        | -0.0281  |
| total_timesteps    | 1355575  |
| value_loss         | 0.227    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2033     |
| nupdates           | 810      |
| policy_entropy     | 0.83     |
| policy_loss        | 0.0503   |
| total_timesteps    | 1961825  |
| value_loss         | 0.339    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2033     |
| nupdates           | 820      |
| policy_entropy     | 0.912    |
| policy_loss        | -0.162   |
| total_timesteps    | 1986075  |
| value_loss         | 0.537    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2033     |
| nupdates           | 830      |
| policy_entropy     | 1        |
| policy_loss        | -0.127   |
| total_timesteps    | 2010325  |
| value_loss         | 0.393    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.951    |
| fps                | 2037     |
| nupdates           | 1080     |
| policy_entropy     | 0.858    |
| policy_loss        | -0.124   |
| total_timesteps    | 2616575  |
| value_loss         | 0.456    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2037     |
| nupdates           | 1090     |
| policy_entropy     | 0.884    |
| policy_loss        | 0.0291   |
| total_timesteps    | 2640825  |
| value_loss         | 0.35     |
---------------------------------
---------------------------------
| explained_variance | 0.942    |
| fps                | 2037     |
| nupdates           | 1100     |
| policy_entropy     | 0.879    |
| policy_loss        | -0.0749  |
| total_timesteps    | 2665075  |
| value_loss         | 0.564    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps         

---------------------------------
| explained_variance | 0.986    |
| fps                | 2041     |
| nupdates           | 1350     |
| policy_entropy     | 0.911    |
| policy_loss        | 0.161    |
| total_timesteps    | 3271325  |
| value_loss         | 0.17     |
---------------------------------
---------------------------------
| explained_variance | 0.995    |
| fps                | 2041     |
| nupdates           | 1360     |
| policy_entropy     | 0.899    |
| policy_loss        | 0.0531   |
| total_timesteps    | 3295575  |
| value_loss         | 0.115    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps                | 2041     |
| nupdates           | 1370     |
| policy_entropy     | 0.95     |
| policy_loss        | -0.0568  |
| total_timesteps    | 3319825  |
| value_loss         | 0.136    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.985    |
| fps                | 2045     |
| nupdates           | 1620     |
| policy_entropy     | 0.862    |
| policy_loss        | 0.0765   |
| total_timesteps    | 3926075  |
| value_loss         | 0.193    |
---------------------------------
---------------------------------
| explained_variance | 0.958    |
| fps                | 2045     |
| nupdates           | 1630     |
| policy_entropy     | 0.879    |
| policy_loss        | 0.0871   |
| total_timesteps    | 3950325  |
| value_loss         | 0.416    |
---------------------------------
---------------------------------
| explained_variance | 0.945    |
| fps                | 2045     |
| nupdates           | 1640     |
| policy_entropy     | 0.846    |
| policy_loss        | -0.131   |
| total_timesteps    | 3974575  |
| value_loss         | 0.75     |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps         

---------------------------------
| explained_variance | 0.941    |
| fps                | 2048     |
| nupdates           | 1890     |
| policy_entropy     | 0.934    |
| policy_loss        | -0.246   |
| total_timesteps    | 4580825  |
| value_loss         | 0.845    |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps                | 2049     |
| nupdates           | 1900     |
| policy_entropy     | 0.873    |
| policy_loss        | -0.0671  |
| total_timesteps    | 4605075  |
| value_loss         | 0.35     |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2049     |
| nupdates           | 1910     |
| policy_entropy     | 0.881    |
| policy_loss        | 0.0878   |
| total_timesteps    | 4629325  |
| value_loss         | 0.123    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2051     |
| nupdates           | 2160     |
| policy_entropy     | 0.925    |
| policy_loss        | 0.0395   |
| total_timesteps    | 5235575  |
| value_loss         | 0.152    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2051     |
| nupdates           | 2170     |
| policy_entropy     | 0.847    |
| policy_loss        | -0.0089  |
| total_timesteps    | 5259825  |
| value_loss         | 0.143    |
---------------------------------
---------------------------------
| explained_variance | 0.958    |
| fps                | 2051     |
| nupdates           | 2180     |
| policy_entropy     | 0.919    |
| policy_loss        | -0.126   |
| total_timesteps    | 5284075  |
| value_loss         | 0.478    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.958    |
| fps                | 2053     |
| nupdates           | 2430     |
| policy_entropy     | 0.915    |
| policy_loss        | -0.0283  |
| total_timesteps    | 5890325  |
| value_loss         | 0.444    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2053     |
| nupdates           | 2440     |
| policy_entropy     | 0.795    |
| policy_loss        | -0.0247  |
| total_timesteps    | 5914575  |
| value_loss         | 0.415    |
---------------------------------
---------------------------------
| explained_variance | 0.935    |
| fps                | 2053     |
| nupdates           | 2450     |
| policy_entropy     | 0.879    |
| policy_loss        | -0.0433  |
| total_timesteps    | 5938825  |
| value_loss         | 0.616    |
---------------------------------
---------------------------------
| explained_variance | 0.928    |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2055     |
| nupdates           | 2700     |
| policy_entropy     | 0.883    |
| policy_loss        | 0.0354   |
| total_timesteps    | 6545075  |
| value_loss         | 0.241    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2055     |
| nupdates           | 2710     |
| policy_entropy     | 0.956    |
| policy_loss        | -0.0833  |
| total_timesteps    | 6569325  |
| value_loss         | 0.402    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2055     |
| nupdates           | 2720     |
| policy_entropy     | 0.944    |
| policy_loss        | -0.0706  |
| total_timesteps    | 6593575  |
| value_loss         | 0.477    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.993    |
| fps                | 2055     |
| nupdates           | 2970     |
| policy_entropy     | 0.885    |
| policy_loss        | 0.111    |
| total_timesteps    | 7199825  |
| value_loss         | 0.0986   |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2055     |
| nupdates           | 2980     |
| policy_entropy     | 0.97     |
| policy_loss        | 0.0366   |
| total_timesteps    | 7224075  |
| value_loss         | 0.173    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps                | 2055     |
| nupdates           | 2990     |
| policy_entropy     | 0.872    |
| policy_loss        | -0.0214  |
| total_timesteps    | 7248325  |
| value_loss         | 0.384    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.937    |
| fps                | 2054     |
| nupdates           | 3240     |
| policy_entropy     | 0.881    |
| policy_loss        | 0.0723   |
| total_timesteps    | 7854575  |
| value_loss         | 0.583    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2054     |
| nupdates           | 3250     |
| policy_entropy     | 0.841    |
| policy_loss        | 0.0253   |
| total_timesteps    | 7878825  |
| value_loss         | 0.38     |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2054     |
| nupdates           | 3260     |
| policy_entropy     | 0.863    |
| policy_loss        | 0.0409   |
| total_timesteps    | 7903075  |
| value_loss         | 0.308    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps         

---------------------------------
| explained_variance | 0.984    |
| fps                | 2051     |
| nupdates           | 3510     |
| policy_entropy     | 0.892    |
| policy_loss        | -0.0347  |
| total_timesteps    | 8509325  |
| value_loss         | 0.19     |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2050     |
| nupdates           | 3520     |
| policy_entropy     | 0.95     |
| policy_loss        | 0.0359   |
| total_timesteps    | 8533575  |
| value_loss         | 0.381    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2050     |
| nupdates           | 3530     |
| policy_entropy     | 0.914    |
| policy_loss        | -0.0701  |
| total_timesteps    | 8557825  |
| value_loss         | 0.391    |
---------------------------------
---------------------------------
| explained_variance | 0.946    |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2044     |
| nupdates           | 3780     |
| policy_entropy     | 0.887    |
| policy_loss        | -0.0616  |
| total_timesteps    | 9164075  |
| value_loss         | 0.375    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2043     |
| nupdates           | 3790     |
| policy_entropy     | 0.936    |
| policy_loss        | 0.021    |
| total_timesteps    | 9188325  |
| value_loss         | 0.259    |
---------------------------------
---------------------------------
| explained_variance | 0.959    |
| fps                | 2043     |
| nupdates           | 3800     |
| policy_entropy     | 0.857    |
| policy_loss        | -0.0971  |
| total_timesteps    | 9212575  |
| value_loss         | 0.54     |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

---------------------------------
| explained_variance | 0.963    |
| fps                | 2033     |
| nupdates           | 4050     |
| policy_entropy     | 0.902    |
| policy_loss        | -0.111   |
| total_timesteps    | 9818825  |
| value_loss         | 0.473    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2033     |
| nupdates           | 4060     |
| policy_entropy     | 0.922    |
| policy_loss        | 0.038    |
| total_timesteps    | 9843075  |
| value_loss         | 0.289    |
---------------------------------
---------------------------------
| explained_variance | 0.96     |
| fps                | 2032     |
| nupdates           | 4070     |
| policy_entropy     | 1        |
| policy_loss        | -0.202   |
| total_timesteps    | 9867325  |
| value_loss         | 0.557    |
---------------------------------
---------------------------------
| explained_variance | 0.953    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.96     |
| fps                | 1558     |
| nupdates           | 1        |
| policy_entropy     | 0.828    |
| policy_loss        | -0.013   |
| total_timesteps    | 0        |
| value_loss         | 0.621    |
---------------------------------
---------------------------------
| explained_variance | 0.915    |
| fps                | 2595     |
| nupdates           | 10       |
| policy_entropy     | 0.847    |
| policy_loss        | 3.01     |
| total_timesteps    | 21825    |
| value_loss         | 14       |
---------------------------------
---------------------------------
| explained_variance | 0.873    |
| fps                | 2259     |
| nupdates           | 20       |
| policy_entropy     | 0.779    |
| policy_loss        | 2.14     |
| total_timesteps    | 46075    |
| value_loss         | 9.47     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.974    |
| fps                | 2029     |
| nupdates           | 270      |
| policy_entropy     | 0.893    |
| policy_loss        | -0.0882  |
| total_timesteps    | 652325   |
| value_loss         | 0.281    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2029     |
| nupdates           | 280      |
| policy_entropy     | 0.896    |
| policy_loss        | -0.0952  |
| total_timesteps    | 676575   |
| value_loss         | 0.283    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2029     |
| nupdates           | 290      |
| policy_entropy     | 0.825    |
| policy_loss        | 0.0526   |
| total_timesteps    | 700825   |
| value_loss         | 0.312    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2031     |
| nupdates           | 540      |
| policy_entropy     | 0.868    |
| policy_loss        | 0.00939  |
| total_timesteps    | 1307075  |
| value_loss         | 0.179    |
---------------------------------
---------------------------------
| explained_variance | 0.993    |
| fps                | 2031     |
| nupdates           | 550      |
| policy_entropy     | 0.915    |
| policy_loss        | -0.00187 |
| total_timesteps    | 1331325  |
| value_loss         | 0.129    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2031     |
| nupdates           | 560      |
| policy_entropy     | 0.888    |
| policy_loss        | 0.0429   |
| total_timesteps    | 1355575  |
| value_loss         | 0.245    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps         

---------------------------------
| explained_variance | 0.967    |
| fps                | 2034     |
| nupdates           | 810      |
| policy_entropy     | 0.813    |
| policy_loss        | 0.11     |
| total_timesteps    | 1961825  |
| value_loss         | 0.476    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2034     |
| nupdates           | 820      |
| policy_entropy     | 0.914    |
| policy_loss        | 0.0282   |
| total_timesteps    | 1986075  |
| value_loss         | 0.161    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2034     |
| nupdates           | 830      |
| policy_entropy     | 0.842    |
| policy_loss        | 0.0428   |
| total_timesteps    | 2010325  |
| value_loss         | 0.153    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps         

---------------------------------
| explained_variance | 0.962    |
| fps                | 2037     |
| nupdates           | 1080     |
| policy_entropy     | 0.89     |
| policy_loss        | -0.0156  |
| total_timesteps    | 2616575  |
| value_loss         | 0.396    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2037     |
| nupdates           | 1090     |
| policy_entropy     | 0.969    |
| policy_loss        | -0.0807  |
| total_timesteps    | 2640825  |
| value_loss         | 0.442    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2037     |
| nupdates           | 1100     |
| policy_entropy     | 0.906    |
| policy_loss        | -0.142   |
| total_timesteps    | 2665075  |
| value_loss         | 0.518    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps         

---------------------------------
| explained_variance | 0.991    |
| fps                | 2041     |
| nupdates           | 1350     |
| policy_entropy     | 0.834    |
| policy_loss        | 0.0521   |
| total_timesteps    | 3271325  |
| value_loss         | 0.14     |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps                | 2042     |
| nupdates           | 1360     |
| policy_entropy     | 0.85     |
| policy_loss        | 0.0639   |
| total_timesteps    | 3295575  |
| value_loss         | 0.149    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2042     |
| nupdates           | 1370     |
| policy_entropy     | 0.847    |
| policy_loss        | -0.00121 |
| total_timesteps    | 3319825  |
| value_loss         | 0.288    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps         

---------------------------------
| explained_variance | 0.966    |
| fps                | 2045     |
| nupdates           | 1620     |
| policy_entropy     | 0.949    |
| policy_loss        | -0.218   |
| total_timesteps    | 3926075  |
| value_loss         | 0.426    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2045     |
| nupdates           | 1630     |
| policy_entropy     | 0.925    |
| policy_loss        | -0.0574  |
| total_timesteps    | 3950325  |
| value_loss         | 0.306    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2045     |
| nupdates           | 1640     |
| policy_entropy     | 0.882    |
| policy_loss        | -0.0401  |
| total_timesteps    | 3974575  |
| value_loss         | 0.527    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps         

---------------------------------
| explained_variance | 0.987    |
| fps                | 2047     |
| nupdates           | 1890     |
| policy_entropy     | 0.918    |
| policy_loss        | 0.0646   |
| total_timesteps    | 4580825  |
| value_loss         | 0.217    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2047     |
| nupdates           | 1900     |
| policy_entropy     | 0.947    |
| policy_loss        | -0.081   |
| total_timesteps    | 4605075  |
| value_loss         | 0.563    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2048     |
| nupdates           | 1910     |
| policy_entropy     | 0.965    |
| policy_loss        | -0.0559  |
| total_timesteps    | 4629325  |
| value_loss         | 0.384    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps         

---------------------------------
| explained_variance | 0.987    |
| fps                | 2050     |
| nupdates           | 2160     |
| policy_entropy     | 0.848    |
| policy_loss        | 0.00287  |
| total_timesteps    | 5235575  |
| value_loss         | 0.182    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2050     |
| nupdates           | 2170     |
| policy_entropy     | 0.883    |
| policy_loss        | -0.00249 |
| total_timesteps    | 5259825  |
| value_loss         | 0.252    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2050     |
| nupdates           | 2180     |
| policy_entropy     | 0.93     |
| policy_loss        | -0.114   |
| total_timesteps    | 5284075  |
| value_loss         | 0.295    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.971    |
| fps                | 2052     |
| nupdates           | 2430     |
| policy_entropy     | 0.844    |
| policy_loss        | 0.0105   |
| total_timesteps    | 5890325  |
| value_loss         | 0.36     |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2052     |
| nupdates           | 2440     |
| policy_entropy     | 0.883    |
| policy_loss        | -0.00327 |
| total_timesteps    | 5914575  |
| value_loss         | 0.392    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2052     |
| nupdates           | 2450     |
| policy_entropy     | 0.883    |
| policy_loss        | -0.0848  |
| total_timesteps    | 5938825  |
| value_loss         | 0.388    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.965    |
| fps                | 2054     |
| nupdates           | 2700     |
| policy_entropy     | 0.937    |
| policy_loss        | -0.00964 |
| total_timesteps    | 6545075  |
| value_loss         | 0.409    |
---------------------------------
---------------------------------
| explained_variance | 0.95     |
| fps                | 2054     |
| nupdates           | 2710     |
| policy_entropy     | 0.818    |
| policy_loss        | 0.000933 |
| total_timesteps    | 6569325  |
| value_loss         | 0.538    |
---------------------------------
---------------------------------
| explained_variance | 0.96     |
| fps                | 2054     |
| nupdates           | 2720     |
| policy_entropy     | 0.84     |
| policy_loss        | -0.0357  |
| total_timesteps    | 6593575  |
| value_loss         | 0.548    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.978    |
| fps                | 2054     |
| nupdates           | 2970     |
| policy_entropy     | 0.861    |
| policy_loss        | 0.0559   |
| total_timesteps    | 7199825  |
| value_loss         | 0.22     |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2054     |
| nupdates           | 2980     |
| policy_entropy     | 0.818    |
| policy_loss        | 0.134    |
| total_timesteps    | 7224075  |
| value_loss         | 0.136    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2054     |
| nupdates           | 2990     |
| policy_entropy     | 0.838    |
| policy_loss        | 0.139    |
| total_timesteps    | 7248325  |
| value_loss         | 0.288    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2053     |
| nupdates           | 3240     |
| policy_entropy     | 0.886    |
| policy_loss        | -0.0312  |
| total_timesteps    | 7854575  |
| value_loss         | 0.35     |
---------------------------------
---------------------------------
| explained_variance | 0.957    |
| fps                | 2053     |
| nupdates           | 3250     |
| policy_entropy     | 0.93     |
| policy_loss        | -0.206   |
| total_timesteps    | 7878825  |
| value_loss         | 0.691    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2053     |
| nupdates           | 3260     |
| policy_entropy     | 0.883    |
| policy_loss        | -0.039   |
| total_timesteps    | 7903075  |
| value_loss         | 0.42     |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps         

---------------------------------
| explained_variance | 0.935    |
| fps                | 2051     |
| nupdates           | 3510     |
| policy_entropy     | 0.944    |
| policy_loss        | -0.106   |
| total_timesteps    | 8509325  |
| value_loss         | 0.558    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2050     |
| nupdates           | 3520     |
| policy_entropy     | 0.929    |
| policy_loss        | -0.104   |
| total_timesteps    | 8533575  |
| value_loss         | 0.432    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2050     |
| nupdates           | 3530     |
| policy_entropy     | 0.879    |
| policy_loss        | 0.0634   |
| total_timesteps    | 8557825  |
| value_loss         | 0.265    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps         

---------------------------------
| explained_variance | 0.956    |
| fps                | 2045     |
| nupdates           | 3780     |
| policy_entropy     | 0.879    |
| policy_loss        | -0.0507  |
| total_timesteps    | 9164075  |
| value_loss         | 0.559    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2045     |
| nupdates           | 3790     |
| policy_entropy     | 0.866    |
| policy_loss        | 0.00918  |
| total_timesteps    | 9188325  |
| value_loss         | 0.335    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2045     |
| nupdates           | 3800     |
| policy_entropy     | 0.908    |
| policy_loss        | 0.000559 |
| total_timesteps    | 9212575  |
| value_loss         | 0.332    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.981    |
| fps                | 2036     |
| nupdates           | 4050     |
| policy_entropy     | 0.872    |
| policy_loss        | 0.015    |
| total_timesteps    | 9818825  |
| value_loss         | 0.315    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2036     |
| nupdates           | 4060     |
| policy_entropy     | 0.942    |
| policy_loss        | -0.0265  |
| total_timesteps    | 9843075  |
| value_loss         | 0.25     |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2035     |
| nupdates           | 4070     |
| policy_entropy     | 0.905    |
| policy_loss        | 0.0127   |
| total_timesteps    | 9867325  |
| value_loss         | 0.239    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.961    |
| fps                | 1572     |
| nupdates           | 1        |
| policy_entropy     | 0.933    |
| policy_loss        | -0.0775  |
| total_timesteps    | 0        |
| value_loss         | 0.511    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2601     |
| nupdates           | 10       |
| policy_entropy     | 0.812    |
| policy_loss        | 0.534    |
| total_timesteps    | 21825    |
| value_loss         | 0.905    |
---------------------------------
---------------------------------
| explained_variance | 0.943    |
| fps                | 2264     |
| nupdates           | 20       |
| policy_entropy     | 0.814    |
| policy_loss        | -1.39    |
| total_timesteps    | 46075    |
| value_loss         | 4.01     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.983    |
| fps                | 2044     |
| nupdates           | 270      |
| policy_entropy     | 0.838    |
| policy_loss        | 0.00343  |
| total_timesteps    | 652325   |
| value_loss         | 0.298    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2044     |
| nupdates           | 280      |
| policy_entropy     | 0.827    |
| policy_loss        | 0.0458   |
| total_timesteps    | 676575   |
| value_loss         | 0.432    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2044     |
| nupdates           | 290      |
| policy_entropy     | 0.812    |
| policy_loss        | 0.072    |
| total_timesteps    | 700825   |
| value_loss         | 0.436    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.95     |
| fps                | 2043     |
| nupdates           | 540      |
| policy_entropy     | 0.879    |
| policy_loss        | -0.0919  |
| total_timesteps    | 1307075  |
| value_loss         | 0.503    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2043     |
| nupdates           | 550      |
| policy_entropy     | 0.897    |
| policy_loss        | -0.0482  |
| total_timesteps    | 1331325  |
| value_loss         | 0.44     |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps                | 2043     |
| nupdates           | 560      |
| policy_entropy     | 0.845    |
| policy_loss        | 0.0662   |
| total_timesteps    | 1355575  |
| value_loss         | 0.37     |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 2049     |
| nupdates           | 810      |
| policy_entropy     | 0.919    |
| policy_loss        | 0.0487   |
| total_timesteps    | 1961825  |
| value_loss         | 0.257    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2049     |
| nupdates           | 820      |
| policy_entropy     | 0.879    |
| policy_loss        | -0.0579  |
| total_timesteps    | 1986075  |
| value_loss         | 0.338    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2049     |
| nupdates           | 830      |
| policy_entropy     | 0.894    |
| policy_loss        | 0.0819   |
| total_timesteps    | 2010325  |
| value_loss         | 0.249    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps         

---------------------------------
| explained_variance | 0.963    |
| fps                | 2052     |
| nupdates           | 1080     |
| policy_entropy     | 0.869    |
| policy_loss        | -0.0741  |
| total_timesteps    | 2616575  |
| value_loss         | 0.374    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2053     |
| nupdates           | 1090     |
| policy_entropy     | 0.875    |
| policy_loss        | -0.0647  |
| total_timesteps    | 2640825  |
| value_loss         | 0.349    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2053     |
| nupdates           | 1100     |
| policy_entropy     | 0.812    |
| policy_loss        | -0.0347  |
| total_timesteps    | 2665075  |
| value_loss         | 0.226    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.986    |
| fps                | 2055     |
| nupdates           | 1350     |
| policy_entropy     | 0.818    |
| policy_loss        | -0.0286  |
| total_timesteps    | 3271325  |
| value_loss         | 0.218    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps                | 2056     |
| nupdates           | 1360     |
| policy_entropy     | 0.797    |
| policy_loss        | 0.0181   |
| total_timesteps    | 3295575  |
| value_loss         | 0.365    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2056     |
| nupdates           | 1370     |
| policy_entropy     | 0.778    |
| policy_loss        | -0.047   |
| total_timesteps    | 3319825  |
| value_loss         | 0.182    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2055     |
| nupdates           | 1620     |
| policy_entropy     | 0.833    |
| policy_loss        | 0.122    |
| total_timesteps    | 3926075  |
| value_loss         | 0.164    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2055     |
| nupdates           | 1630     |
| policy_entropy     | 0.823    |
| policy_loss        | 0.0233   |
| total_timesteps    | 3950325  |
| value_loss         | 0.404    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2055     |
| nupdates           | 1640     |
| policy_entropy     | 0.868    |
| policy_loss        | 0.0277   |
| total_timesteps    | 3974575  |
| value_loss         | 0.347    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.99     |
| fps                | 2058     |
| nupdates           | 1890     |
| policy_entropy     | 0.958    |
| policy_loss        | 0.0313   |
| total_timesteps    | 4580825  |
| value_loss         | 0.15     |
---------------------------------
---------------------------------
| explained_variance | 0.911    |
| fps                | 2058     |
| nupdates           | 1900     |
| policy_entropy     | 0.969    |
| policy_loss        | -0.0889  |
| total_timesteps    | 4605075  |
| value_loss         | 0.501    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2058     |
| nupdates           | 1910     |
| policy_entropy     | 0.906    |
| policy_loss        | -0.0261  |
| total_timesteps    | 4629325  |
| value_loss         | 0.288    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.987    |
| fps                | 2061     |
| nupdates           | 2160     |
| policy_entropy     | 0.916    |
| policy_loss        | 0.0317   |
| total_timesteps    | 5235575  |
| value_loss         | 0.182    |
---------------------------------
---------------------------------
| explained_variance | 0.947    |
| fps                | 2061     |
| nupdates           | 2170     |
| policy_entropy     | 0.903    |
| policy_loss        | -0.0495  |
| total_timesteps    | 5259825  |
| value_loss         | 0.438    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2061     |
| nupdates           | 2180     |
| policy_entropy     | 0.899    |
| policy_loss        | -0.127   |
| total_timesteps    | 5284075  |
| value_loss         | 0.356    |
---------------------------------
---------------------------------
| explained_variance | 0.952    |
| fps         

---------------------------------
| explained_variance | 0.958    |
| fps                | 2064     |
| nupdates           | 2430     |
| policy_entropy     | 0.932    |
| policy_loss        | -0.0929  |
| total_timesteps    | 5890325  |
| value_loss         | 0.586    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2064     |
| nupdates           | 2440     |
| policy_entropy     | 0.886    |
| policy_loss        | -0.0914  |
| total_timesteps    | 5914575  |
| value_loss         | 0.403    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2064     |
| nupdates           | 2450     |
| policy_entropy     | 0.872    |
| policy_loss        | 0.0117   |
| total_timesteps    | 5938825  |
| value_loss         | 0.177    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps         

---------------------------------
| explained_variance | 0.957    |
| fps                | 2065     |
| nupdates           | 2700     |
| policy_entropy     | 0.925    |
| policy_loss        | -0.102   |
| total_timesteps    | 6545075  |
| value_loss         | 0.532    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps                | 2065     |
| nupdates           | 2710     |
| policy_entropy     | 0.863    |
| policy_loss        | -0.132   |
| total_timesteps    | 6569325  |
| value_loss         | 0.618    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2065     |
| nupdates           | 2720     |
| policy_entropy     | 0.887    |
| policy_loss        | 0.0551   |
| total_timesteps    | 6593575  |
| value_loss         | 0.333    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2063     |
| nupdates           | 2970     |
| policy_entropy     | 0.814    |
| policy_loss        | -0.00889 |
| total_timesteps    | 7199825  |
| value_loss         | 0.238    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2063     |
| nupdates           | 2980     |
| policy_entropy     | 0.939    |
| policy_loss        | -0.081   |
| total_timesteps    | 7224075  |
| value_loss         | 0.378    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2063     |
| nupdates           | 2990     |
| policy_entropy     | 0.907    |
| policy_loss        | -0.00916 |
| total_timesteps    | 7248325  |
| value_loss         | 0.299    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps         

---------------------------------
| explained_variance | 0.981    |
| fps                | 2058     |
| nupdates           | 3240     |
| policy_entropy     | 0.949    |
| policy_loss        | -0.0319  |
| total_timesteps    | 7854575  |
| value_loss         | 0.184    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2058     |
| nupdates           | 3250     |
| policy_entropy     | 0.926    |
| policy_loss        | 0.0382   |
| total_timesteps    | 7878825  |
| value_loss         | 0.385    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2057     |
| nupdates           | 3260     |
| policy_entropy     | 0.89     |
| policy_loss        | 0.077    |
| total_timesteps    | 7903075  |
| value_loss         | 0.25     |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.975    |
| fps                | 2049     |
| nupdates           | 3510     |
| policy_entropy     | 0.864    |
| policy_loss        | 0.117    |
| total_timesteps    | 8509325  |
| value_loss         | 0.366    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2048     |
| nupdates           | 3520     |
| policy_entropy     | 0.913    |
| policy_loss        | -0.00896 |
| total_timesteps    | 8533575  |
| value_loss         | 0.317    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2048     |
| nupdates           | 3530     |
| policy_entropy     | 0.945    |
| policy_loss        | -0.0306  |
| total_timesteps    | 8557825  |
| value_loss         | 0.272    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

---------------------------------
| explained_variance | 0.978    |
| fps                | 2037     |
| nupdates           | 3780     |
| policy_entropy     | 0.872    |
| policy_loss        | 0.0919   |
| total_timesteps    | 9164075  |
| value_loss         | 0.255    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2036     |
| nupdates           | 3790     |
| policy_entropy     | 0.888    |
| policy_loss        | 0.00302  |
| total_timesteps    | 9188325  |
| value_loss         | 0.218    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2036     |
| nupdates           | 3800     |
| policy_entropy     | 0.964    |
| policy_loss        | -0.0371  |
| total_timesteps    | 9212575  |
| value_loss         | 0.263    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.978    |
| fps                | 2022     |
| nupdates           | 4050     |
| policy_entropy     | 0.894    |
| policy_loss        | 0.00945  |
| total_timesteps    | 9818825  |
| value_loss         | 0.322    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2022     |
| nupdates           | 4060     |
| policy_entropy     | 0.895    |
| policy_loss        | -0.0322  |
| total_timesteps    | 9843075  |
| value_loss         | 0.332    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2021     |
| nupdates           | 4070     |
| policy_entropy     | 0.943    |
| policy_loss        | 0.00709  |
| total_timesteps    | 9867325  |
| value_loss         | 0.356    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.985    |
| fps                | 1566     |
| nupdates           | 1        |
| policy_entropy     | 0.83     |
| policy_loss        | 0.00119  |
| total_timesteps    | 0        |
| value_loss         | 0.27     |
---------------------------------
---------------------------------
| explained_variance | 0.871    |
| fps                | 2584     |
| nupdates           | 10       |
| policy_entropy     | 0.777    |
| policy_loss        | -0.0977  |
| total_timesteps    | 21825    |
| value_loss         | 1.64     |
---------------------------------
---------------------------------
| explained_variance | 0.709    |
| fps                | 2257     |
| nupdates           | 20       |
| policy_entropy     | 0.747    |
| policy_loss        | 4.26     |
| total_timesteps    | 46075    |
| value_loss         | 39.4     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.967    |
| fps                | 2031     |
| nupdates           | 270      |
| policy_entropy     | 0.709    |
| policy_loss        | 0.0032   |
| total_timesteps    | 652325   |
| value_loss         | 0.492    |
---------------------------------
---------------------------------
| explained_variance | 0.96     |
| fps                | 2030     |
| nupdates           | 280      |
| policy_entropy     | 0.786    |
| policy_loss        | -0.0108  |
| total_timesteps    | 676575   |
| value_loss         | 0.323    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2030     |
| nupdates           | 290      |
| policy_entropy     | 0.794    |
| policy_loss        | -0.0662  |
| total_timesteps    | 700825   |
| value_loss         | 0.285    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2026     |
| nupdates           | 540      |
| policy_entropy     | 0.869    |
| policy_loss        | -0.0586  |
| total_timesteps    | 1307075  |
| value_loss         | 0.298    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2026     |
| nupdates           | 550      |
| policy_entropy     | 0.818    |
| policy_loss        | 0.0502   |
| total_timesteps    | 1331325  |
| value_loss         | 0.202    |
---------------------------------
---------------------------------
| explained_variance | 0.993    |
| fps                | 2026     |
| nupdates           | 560      |
| policy_entropy     | 0.811    |
| policy_loss        | 0.126    |
| total_timesteps    | 1355575  |
| value_loss         | 0.142    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2027     |
| nupdates           | 810      |
| policy_entropy     | 0.873    |
| policy_loss        | 0.0331   |
| total_timesteps    | 1961825  |
| value_loss         | 0.423    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2027     |
| nupdates           | 820      |
| policy_entropy     | 0.873    |
| policy_loss        | 0.0836   |
| total_timesteps    | 1986075  |
| value_loss         | 0.245    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2027     |
| nupdates           | 830      |
| policy_entropy     | 0.875    |
| policy_loss        | -0.0779  |
| total_timesteps    | 2010325  |
| value_loss         | 0.452    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2031     |
| nupdates           | 1080     |
| policy_entropy     | 0.936    |
| policy_loss        | -0.0167  |
| total_timesteps    | 2616575  |
| value_loss         | 0.25     |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2031     |
| nupdates           | 1090     |
| policy_entropy     | 0.91     |
| policy_loss        | -0.0805  |
| total_timesteps    | 2640825  |
| value_loss         | 0.377    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2032     |
| nupdates           | 1100     |
| policy_entropy     | 0.929    |
| policy_loss        | -0.0492  |
| total_timesteps    | 2665075  |
| value_loss         | 0.51     |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

---------------------------------
| explained_variance | 0.95     |
| fps                | 2035     |
| nupdates           | 1350     |
| policy_entropy     | 0.876    |
| policy_loss        | -0.178   |
| total_timesteps    | 3271325  |
| value_loss         | 0.55     |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2035     |
| nupdates           | 1360     |
| policy_entropy     | 0.894    |
| policy_loss        | 0.0146   |
| total_timesteps    | 3295575  |
| value_loss         | 0.404    |
---------------------------------
---------------------------------
| explained_variance | 0.946    |
| fps                | 2035     |
| nupdates           | 1370     |
| policy_entropy     | 0.826    |
| policy_loss        | -0.0641  |
| total_timesteps    | 3319825  |
| value_loss         | 0.478    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.991    |
| fps                | 2038     |
| nupdates           | 1620     |
| policy_entropy     | 0.821    |
| policy_loss        | 0.0187   |
| total_timesteps    | 3926075  |
| value_loss         | 0.165    |
---------------------------------
---------------------------------
| explained_variance | 0.959    |
| fps                | 2038     |
| nupdates           | 1630     |
| policy_entropy     | 0.934    |
| policy_loss        | 0.0348   |
| total_timesteps    | 3950325  |
| value_loss         | 0.469    |
---------------------------------
---------------------------------
| explained_variance | 0.931    |
| fps                | 2038     |
| nupdates           | 1640     |
| policy_entropy     | 0.839    |
| policy_loss        | 0.0157   |
| total_timesteps    | 3974575  |
| value_loss         | 0.633    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.934    |
| fps                | 2041     |
| nupdates           | 1890     |
| policy_entropy     | 0.882    |
| policy_loss        | -0.141   |
| total_timesteps    | 4580825  |
| value_loss         | 0.715    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps                | 2041     |
| nupdates           | 1900     |
| policy_entropy     | 0.87     |
| policy_loss        | -0.0946  |
| total_timesteps    | 4605075  |
| value_loss         | 0.507    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2041     |
| nupdates           | 1910     |
| policy_entropy     | 0.94     |
| policy_loss        | 0.00943  |
| total_timesteps    | 4629325  |
| value_loss         | 0.302    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps         

---------------------------------
| explained_variance | 0.952    |
| fps                | 2044     |
| nupdates           | 2160     |
| policy_entropy     | 0.835    |
| policy_loss        | -0.0141  |
| total_timesteps    | 5235575  |
| value_loss         | 0.588    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2044     |
| nupdates           | 2170     |
| policy_entropy     | 0.954    |
| policy_loss        | 0.0104   |
| total_timesteps    | 5259825  |
| value_loss         | 0.171    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps                | 2044     |
| nupdates           | 2180     |
| policy_entropy     | 0.862    |
| policy_loss        | -0.0116  |
| total_timesteps    | 5284075  |
| value_loss         | 0.0803   |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps         

---------------------------------
| explained_variance | 0.976    |
| fps                | 2047     |
| nupdates           | 2430     |
| policy_entropy     | 0.898    |
| policy_loss        | 0.0264   |
| total_timesteps    | 5890325  |
| value_loss         | 0.245    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2047     |
| nupdates           | 2440     |
| policy_entropy     | 0.889    |
| policy_loss        | -0.0978  |
| total_timesteps    | 5914575  |
| value_loss         | 0.43     |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2047     |
| nupdates           | 2450     |
| policy_entropy     | 0.843    |
| policy_loss        | 0.044    |
| total_timesteps    | 5938825  |
| value_loss         | 0.428    |
---------------------------------
---------------------------------
| explained_variance | 0.956    |
| fps         

---------------------------------
| explained_variance | 0.985    |
| fps                | 2050     |
| nupdates           | 2700     |
| policy_entropy     | 0.909    |
| policy_loss        | 0.112    |
| total_timesteps    | 6545075  |
| value_loss         | 0.2      |
---------------------------------
----------------------------------
| explained_variance | 0.985     |
| fps                | 2050      |
| nupdates           | 2710      |
| policy_entropy     | 0.827     |
| policy_loss        | -0.000486 |
| total_timesteps    | 6569325   |
| value_loss         | 0.243     |
----------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps                | 2050     |
| nupdates           | 2720     |
| policy_entropy     | 0.931    |
| policy_loss        | -0.0813  |
| total_timesteps    | 6593575  |
| value_loss         | 0.389    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps

---------------------------------
| explained_variance | 0.962    |
| fps                | 2051     |
| nupdates           | 2970     |
| policy_entropy     | 0.826    |
| policy_loss        | -0.0521  |
| total_timesteps    | 7199825  |
| value_loss         | 0.45     |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2051     |
| nupdates           | 2980     |
| policy_entropy     | 0.859    |
| policy_loss        | -0.0209  |
| total_timesteps    | 7224075  |
| value_loss         | 0.165    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2051     |
| nupdates           | 2990     |
| policy_entropy     | 0.886    |
| policy_loss        | 0.0141   |
| total_timesteps    | 7248325  |
| value_loss         | 0.321    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2051     |
| nupdates           | 3240     |
| policy_entropy     | 0.863    |
| policy_loss        | -0.0275  |
| total_timesteps    | 7854575  |
| value_loss         | 0.373    |
---------------------------------
---------------------------------
| explained_variance | 0.94     |
| fps                | 2051     |
| nupdates           | 3250     |
| policy_entropy     | 0.969    |
| policy_loss        | -0.136   |
| total_timesteps    | 7878825  |
| value_loss         | 0.621    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2051     |
| nupdates           | 3260     |
| policy_entropy     | 0.869    |
| policy_loss        | 0.0551   |
| total_timesteps    | 7903075  |
| value_loss         | 0.396    |
---------------------------------
---------------------------------
| explained_variance | 0.959    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 2050     |
| nupdates           | 3510     |
| policy_entropy     | 0.877    |
| policy_loss        | 0.0463   |
| total_timesteps    | 8509325  |
| value_loss         | 0.225    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2049     |
| nupdates           | 3520     |
| policy_entropy     | 0.944    |
| policy_loss        | -0.116   |
| total_timesteps    | 8533575  |
| value_loss         | 0.63     |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2049     |
| nupdates           | 3530     |
| policy_entropy     | 0.855    |
| policy_loss        | -0.049   |
| total_timesteps    | 8557825  |
| value_loss         | 0.353    |
---------------------------------
---------------------------------
| explained_variance | 0.956    |
| fps         

---------------------------------
| explained_variance | 0.968    |
| fps                | 2045     |
| nupdates           | 3780     |
| policy_entropy     | 0.913    |
| policy_loss        | 0.0051   |
| total_timesteps    | 9164075  |
| value_loss         | 0.358    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2045     |
| nupdates           | 3790     |
| policy_entropy     | 1.01     |
| policy_loss        | 0.0251   |
| total_timesteps    | 9188325  |
| value_loss         | 0.28     |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2045     |
| nupdates           | 3800     |
| policy_entropy     | 0.987    |
| policy_loss        | -0.0522  |
| total_timesteps    | 9212575  |
| value_loss         | 0.34     |
---------------------------------
---------------------------------
| explained_variance | 0.941    |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2037     |
| nupdates           | 4050     |
| policy_entropy     | 0.849    |
| policy_loss        | 0.117    |
| total_timesteps    | 9818825  |
| value_loss         | 0.171    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2037     |
| nupdates           | 4060     |
| policy_entropy     | 0.926    |
| policy_loss        | -0.129   |
| total_timesteps    | 9843075  |
| value_loss         | 0.498    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2036     |
| nupdates           | 4070     |
| policy_entropy     | 0.938    |
| policy_loss        | -0.00553 |
| total_timesteps    | 9867325  |
| value_loss         | 0.207    |
---------------------------------
---------------------------------
| explained_variance | 0.958    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.982    |
| fps                | 1576     |
| nupdates           | 1        |
| policy_entropy     | 0.839    |
| policy_loss        | 0.0289   |
| total_timesteps    | 0        |
| value_loss         | 0.259    |
---------------------------------
---------------------------------
| explained_variance | 0.917    |
| fps                | 2599     |
| nupdates           | 10       |
| policy_entropy     | 0.602    |
| policy_loss        | 0.744    |
| total_timesteps    | 21825    |
| value_loss         | 2.27     |
---------------------------------
---------------------------------
| explained_variance | 0.848    |
| fps                | 2260     |
| nupdates           | 20       |
| policy_entropy     | 0.728    |
| policy_loss        | -1.04    |
| total_timesteps    | 46075    |
| value_loss         | 4.86     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.992    |
| fps                | 2028     |
| nupdates           | 270      |
| policy_entropy     | 0.829    |
| policy_loss        | 0.271    |
| total_timesteps    | 652325   |
| value_loss         | 0.297    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2028     |
| nupdates           | 280      |
| policy_entropy     | 0.957    |
| policy_loss        | -0.215   |
| total_timesteps    | 676575   |
| value_loss         | 0.459    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2028     |
| nupdates           | 290      |
| policy_entropy     | 0.871    |
| policy_loss        | -0.0816  |
| total_timesteps    | 700825   |
| value_loss         | 0.488    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps         

---------------------------------
| explained_variance | 0.992    |
| fps                | 2027     |
| nupdates           | 540      |
| policy_entropy     | 0.836    |
| policy_loss        | 0.0559   |
| total_timesteps    | 1307075  |
| value_loss         | 0.103    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2027     |
| nupdates           | 550      |
| policy_entropy     | 0.85     |
| policy_loss        | 0.0582   |
| total_timesteps    | 1331325  |
| value_loss         | 0.198    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2027     |
| nupdates           | 560      |
| policy_entropy     | 0.86     |
| policy_loss        | -0.0206  |
| total_timesteps    | 1355575  |
| value_loss         | 0.188    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps         

---------------------------------
| explained_variance | 0.949    |
| fps                | 2029     |
| nupdates           | 810      |
| policy_entropy     | 0.891    |
| policy_loss        | -0.166   |
| total_timesteps    | 1961825  |
| value_loss         | 0.611    |
---------------------------------
---------------------------------
| explained_variance | 0.941    |
| fps                | 2029     |
| nupdates           | 820      |
| policy_entropy     | 0.924    |
| policy_loss        | -0.226   |
| total_timesteps    | 1986075  |
| value_loss         | 0.971    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2030     |
| nupdates           | 830      |
| policy_entropy     | 0.821    |
| policy_loss        | -0.00802 |
| total_timesteps    | 2010325  |
| value_loss         | 0.468    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

---------------------------------
| explained_variance | 0.975    |
| fps                | 2034     |
| nupdates           | 1080     |
| policy_entropy     | 0.904    |
| policy_loss        | 0.0257   |
| total_timesteps    | 2616575  |
| value_loss         | 0.326    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2034     |
| nupdates           | 1090     |
| policy_entropy     | 0.887    |
| policy_loss        | -0.0646  |
| total_timesteps    | 2640825  |
| value_loss         | 0.498    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2034     |
| nupdates           | 1100     |
| policy_entropy     | 0.921    |
| policy_loss        | -0.0112  |
| total_timesteps    | 2665075  |
| value_loss         | 0.361    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps         

---------------------------------
| explained_variance | 0.987    |
| fps                | 2038     |
| nupdates           | 1350     |
| policy_entropy     | 0.895    |
| policy_loss        | 0.0556   |
| total_timesteps    | 3271325  |
| value_loss         | 0.11     |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2039     |
| nupdates           | 1360     |
| policy_entropy     | 0.94     |
| policy_loss        | 0.0393   |
| total_timesteps    | 3295575  |
| value_loss         | 0.21     |
---------------------------------
----------------------------------
| explained_variance | 0.992     |
| fps                | 2039      |
| nupdates           | 1370      |
| policy_entropy     | 0.914     |
| policy_loss        | -3.71e-06 |
| total_timesteps    | 3319825   |
| value_loss         | 0.137     |
----------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps

---------------------------------
| explained_variance | 0.977    |
| fps                | 2043     |
| nupdates           | 1620     |
| policy_entropy     | 0.876    |
| policy_loss        | 0.0798   |
| total_timesteps    | 3926075  |
| value_loss         | 0.333    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2043     |
| nupdates           | 1630     |
| policy_entropy     | 0.889    |
| policy_loss        | 0.02     |
| total_timesteps    | 3950325  |
| value_loss         | 0.207    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2043     |
| nupdates           | 1640     |
| policy_entropy     | 0.841    |
| policy_loss        | 0.0621   |
| total_timesteps    | 3974575  |
| value_loss         | 0.299    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2047     |
| nupdates           | 1890     |
| policy_entropy     | 0.874    |
| policy_loss        | -0.0326  |
| total_timesteps    | 4580825  |
| value_loss         | 0.279    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2047     |
| nupdates           | 1900     |
| policy_entropy     | 0.872    |
| policy_loss        | 0.0655   |
| total_timesteps    | 4605075  |
| value_loss         | 0.302    |
---------------------------------
---------------------------------
| explained_variance | 0.957    |
| fps                | 2047     |
| nupdates           | 1910     |
| policy_entropy     | 0.891    |
| policy_loss        | -0.141   |
| total_timesteps    | 4629325  |
| value_loss         | 0.424    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps         

---------------------------------
| explained_variance | 0.958    |
| fps                | 2049     |
| nupdates           | 2160     |
| policy_entropy     | 0.955    |
| policy_loss        | -0.0653  |
| total_timesteps    | 5235575  |
| value_loss         | 0.582    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2050     |
| nupdates           | 2170     |
| policy_entropy     | 0.952    |
| policy_loss        | -0.0715  |
| total_timesteps    | 5259825  |
| value_loss         | 0.137    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2050     |
| nupdates           | 2180     |
| policy_entropy     | 0.952    |
| policy_loss        | -0.0627  |
| total_timesteps    | 5284075  |
| value_loss         | 0.351    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps         

---------------------------------
| explained_variance | 0.975    |
| fps                | 2052     |
| nupdates           | 2430     |
| policy_entropy     | 0.998    |
| policy_loss        | -0.0475  |
| total_timesteps    | 5890325  |
| value_loss         | 0.445    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2052     |
| nupdates           | 2440     |
| policy_entropy     | 0.937    |
| policy_loss        | 0.0412   |
| total_timesteps    | 5914575  |
| value_loss         | 0.197    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2052     |
| nupdates           | 2450     |
| policy_entropy     | 0.847    |
| policy_loss        | 0.00759  |
| total_timesteps    | 5938825  |
| value_loss         | 0.278    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2053     |
| nupdates           | 2700     |
| policy_entropy     | 0.915    |
| policy_loss        | -0.0102  |
| total_timesteps    | 6545075  |
| value_loss         | 0.369    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2053     |
| nupdates           | 2710     |
| policy_entropy     | 0.869    |
| policy_loss        | -0.0126  |
| total_timesteps    | 6569325  |
| value_loss         | 0.319    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2053     |
| nupdates           | 2720     |
| policy_entropy     | 0.896    |
| policy_loss        | 0.0245   |
| total_timesteps    | 6593575  |
| value_loss         | 0.204    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2054     |
| nupdates           | 2970     |
| policy_entropy     | 0.77     |
| policy_loss        | 0.107    |
| total_timesteps    | 7199825  |
| value_loss         | 0.16     |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2054     |
| nupdates           | 2980     |
| policy_entropy     | 0.852    |
| policy_loss        | -0.127   |
| total_timesteps    | 7224075  |
| value_loss         | 0.363    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2054     |
| nupdates           | 2990     |
| policy_entropy     | 0.882    |
| policy_loss        | -0.0673  |
| total_timesteps    | 7248325  |
| value_loss         | 0.296    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

---------------------------------
| explained_variance | 0.989    |
| fps                | 2053     |
| nupdates           | 3240     |
| policy_entropy     | 0.846    |
| policy_loss        | 0.136    |
| total_timesteps    | 7854575  |
| value_loss         | 0.229    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2052     |
| nupdates           | 3250     |
| policy_entropy     | 0.866    |
| policy_loss        | -0.0246  |
| total_timesteps    | 7878825  |
| value_loss         | 0.434    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2052     |
| nupdates           | 3260     |
| policy_entropy     | 0.8      |
| policy_loss        | 0.111    |
| total_timesteps    | 7903075  |
| value_loss         | 0.296    |
---------------------------------
---------------------------------
| explained_variance | 0.924    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 2049     |
| nupdates           | 3510     |
| policy_entropy     | 0.939    |
| policy_loss        | -0.0477  |
| total_timesteps    | 8509325  |
| value_loss         | 0.277    |
---------------------------------
---------------------------------
| explained_variance | 0.957    |
| fps                | 2049     |
| nupdates           | 3520     |
| policy_entropy     | 0.853    |
| policy_loss        | -0.0668  |
| total_timesteps    | 8533575  |
| value_loss         | 0.585    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2049     |
| nupdates           | 3530     |
| policy_entropy     | 0.831    |
| policy_loss        | -0.0179  |
| total_timesteps    | 8557825  |
| value_loss         | 0.391    |
---------------------------------
---------------------------------
| explained_variance | 0.93     |
| fps         

---------------------------------
| explained_variance | 0.99     |
| fps                | 2042     |
| nupdates           | 3780     |
| policy_entropy     | 0.857    |
| policy_loss        | 0.0423   |
| total_timesteps    | 9164075  |
| value_loss         | 0.174    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2042     |
| nupdates           | 3790     |
| policy_entropy     | 0.798    |
| policy_loss        | 0.0997   |
| total_timesteps    | 9188325  |
| value_loss         | 0.162    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2041     |
| nupdates           | 3800     |
| policy_entropy     | 0.812    |
| policy_loss        | 0.0711   |
| total_timesteps    | 9212575  |
| value_loss         | 0.148    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.97     |
| fps                | 2030     |
| nupdates           | 4050     |
| policy_entropy     | 0.937    |
| policy_loss        | 0.00126  |
| total_timesteps    | 9818825  |
| value_loss         | 0.382    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2030     |
| nupdates           | 4060     |
| policy_entropy     | 0.903    |
| policy_loss        | -0.0786  |
| total_timesteps    | 9843075  |
| value_loss         | 0.432    |
---------------------------------
---------------------------------
| explained_variance | 0.995    |
| fps                | 2029     |
| nupdates           | 4070     |
| policy_entropy     | 0.79     |
| policy_loss        | 0.154    |
| total_timesteps    | 9867325  |
| value_loss         | 0.123    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.969    |
| fps                | 1583     |
| nupdates           | 1        |
| policy_entropy     | 0.901    |
| policy_loss        | -0.00902 |
| total_timesteps    | 0        |
| value_loss         | 0.428    |
---------------------------------
---------------------------------
| explained_variance | 0.935    |
| fps                | 2620     |
| nupdates           | 10       |
| policy_entropy     | 0.281    |
| policy_loss        | 0.18     |
| total_timesteps    | 21825    |
| value_loss         | 0.99     |
---------------------------------
---------------------------------
| explained_variance | 0.932    |
| fps                | 2265     |
| nupdates           | 20       |
| policy_entropy     | 0.236    |
| policy_loss        | -0.297   |
| total_timesteps    | 46075    |
| value_loss         | 1.22     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.98     |
| fps                | 2041     |
| nupdates           | 270      |
| policy_entropy     | 0.539    |
| policy_loss        | 0.00207  |
| total_timesteps    | 652325   |
| value_loss         | 0.231    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2041     |
| nupdates           | 280      |
| policy_entropy     | 0.653    |
| policy_loss        | 0.000423 |
| total_timesteps    | 676575   |
| value_loss         | 0.26     |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2041     |
| nupdates           | 290      |
| policy_entropy     | 0.677    |
| policy_loss        | 0.0382   |
| total_timesteps    | 700825   |
| value_loss         | 0.317    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2041     |
| nupdates           | 540      |
| policy_entropy     | 0.867    |
| policy_loss        | -0.0798  |
| total_timesteps    | 1307075  |
| value_loss         | 0.35     |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2041     |
| nupdates           | 550      |
| policy_entropy     | 0.903    |
| policy_loss        | -0.0506  |
| total_timesteps    | 1331325  |
| value_loss         | 0.296    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2041     |
| nupdates           | 560      |
| policy_entropy     | 0.887    |
| policy_loss        | -0.0167  |
| total_timesteps    | 1355575  |
| value_loss         | 0.261    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.984    |
| fps                | 2044     |
| nupdates           | 810      |
| policy_entropy     | 0.954    |
| policy_loss        | 0.0757   |
| total_timesteps    | 1961825  |
| value_loss         | 0.145    |
---------------------------------
---------------------------------
| explained_variance | 0.942    |
| fps                | 2044     |
| nupdates           | 820      |
| policy_entropy     | 0.903    |
| policy_loss        | -0.0559  |
| total_timesteps    | 1986075  |
| value_loss         | 0.469    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2044     |
| nupdates           | 830      |
| policy_entropy     | 0.849    |
| policy_loss        | 0.0699   |
| total_timesteps    | 2010325  |
| value_loss         | 0.248    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps         

---------------------------------
| explained_variance | 0.976    |
| fps                | 2048     |
| nupdates           | 1080     |
| policy_entropy     | 0.92     |
| policy_loss        | -0.0242  |
| total_timesteps    | 2616575  |
| value_loss         | 0.318    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2048     |
| nupdates           | 1090     |
| policy_entropy     | 0.871    |
| policy_loss        | -0.0648  |
| total_timesteps    | 2640825  |
| value_loss         | 0.349    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2048     |
| nupdates           | 1100     |
| policy_entropy     | 0.826    |
| policy_loss        | -0.0629  |
| total_timesteps    | 2665075  |
| value_loss         | 0.413    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2051     |
| nupdates           | 1350     |
| policy_entropy     | 0.91     |
| policy_loss        | 0.0293   |
| total_timesteps    | 3271325  |
| value_loss         | 0.346    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2051     |
| nupdates           | 1360     |
| policy_entropy     | 0.85     |
| policy_loss        | 0.0995   |
| total_timesteps    | 3295575  |
| value_loss         | 0.279    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2052     |
| nupdates           | 1370     |
| policy_entropy     | 0.898    |
| policy_loss        | 0.0177   |
| total_timesteps    | 3319825  |
| value_loss         | 0.142    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.965    |
| fps                | 2055     |
| nupdates           | 1620     |
| policy_entropy     | 0.757    |
| policy_loss        | -0.118   |
| total_timesteps    | 3926075  |
| value_loss         | 0.462    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps                | 2055     |
| nupdates           | 1630     |
| policy_entropy     | 0.776    |
| policy_loss        | 0.118    |
| total_timesteps    | 3950325  |
| value_loss         | 0.158    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2055     |
| nupdates           | 1640     |
| policy_entropy     | 0.851    |
| policy_loss        | 0.0184   |
| total_timesteps    | 3974575  |
| value_loss         | 0.304    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2058     |
| nupdates           | 1890     |
| policy_entropy     | 0.896    |
| policy_loss        | 0.0374   |
| total_timesteps    | 4580825  |
| value_loss         | 0.224    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2058     |
| nupdates           | 1900     |
| policy_entropy     | 0.874    |
| policy_loss        | 0.11     |
| total_timesteps    | 4605075  |
| value_loss         | 0.235    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2058     |
| nupdates           | 1910     |
| policy_entropy     | 0.919    |
| policy_loss        | 0.0039   |
| total_timesteps    | 4629325  |
| value_loss         | 0.168    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2060     |
| nupdates           | 2160     |
| policy_entropy     | 0.888    |
| policy_loss        | -0.0306  |
| total_timesteps    | 5235575  |
| value_loss         | 0.306    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2060     |
| nupdates           | 2170     |
| policy_entropy     | 0.909    |
| policy_loss        | -0.082   |
| total_timesteps    | 5259825  |
| value_loss         | 0.331    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2060     |
| nupdates           | 2180     |
| policy_entropy     | 0.897    |
| policy_loss        | 0.0129   |
| total_timesteps    | 5284075  |
| value_loss         | 0.217    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2062     |
| nupdates           | 2430     |
| policy_entropy     | 0.878    |
| policy_loss        | -0.05    |
| total_timesteps    | 5890325  |
| value_loss         | 0.298    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2062     |
| nupdates           | 2440     |
| policy_entropy     | 0.876    |
| policy_loss        | 0.0416   |
| total_timesteps    | 5914575  |
| value_loss         | 0.216    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2062     |
| nupdates           | 2450     |
| policy_entropy     | 0.856    |
| policy_loss        | -0.0418  |
| total_timesteps    | 5938825  |
| value_loss         | 0.285    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.996    |
| fps                | 2062     |
| nupdates           | 2700     |
| policy_entropy     | 0.846    |
| policy_loss        | 0.127    |
| total_timesteps    | 6545075  |
| value_loss         | 0.125    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2062     |
| nupdates           | 2710     |
| policy_entropy     | 0.827    |
| policy_loss        | -0.0111  |
| total_timesteps    | 6569325  |
| value_loss         | 0.214    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2062     |
| nupdates           | 2720     |
| policy_entropy     | 0.862    |
| policy_loss        | -0.0108  |
| total_timesteps    | 6593575  |
| value_loss         | 0.545    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2061     |
| nupdates           | 2970     |
| policy_entropy     | 0.919    |
| policy_loss        | -0.123   |
| total_timesteps    | 7199825  |
| value_loss         | 0.528    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2061     |
| nupdates           | 2980     |
| policy_entropy     | 0.879    |
| policy_loss        | -0.0401  |
| total_timesteps    | 7224075  |
| value_loss         | 0.373    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2061     |
| nupdates           | 2990     |
| policy_entropy     | 0.885    |
| policy_loss        | 0.024    |
| total_timesteps    | 7248325  |
| value_loss         | 0.328    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps         

---------------------------------
| explained_variance | 0.987    |
| fps                | 2056     |
| nupdates           | 3240     |
| policy_entropy     | 0.906    |
| policy_loss        | -0.00439 |
| total_timesteps    | 7854575  |
| value_loss         | 0.218    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2056     |
| nupdates           | 3250     |
| policy_entropy     | 0.944    |
| policy_loss        | -0.0376  |
| total_timesteps    | 7878825  |
| value_loss         | 0.466    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps                | 2055     |
| nupdates           | 3260     |
| policy_entropy     | 0.958    |
| policy_loss        | -0.203   |
| total_timesteps    | 7903075  |
| value_loss         | 0.523    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2047     |
| nupdates           | 3510     |
| policy_entropy     | 0.862    |
| policy_loss        | 0.0774   |
| total_timesteps    | 8509325  |
| value_loss         | 0.231    |
---------------------------------
---------------------------------
| explained_variance | 0.947    |
| fps                | 2046     |
| nupdates           | 3520     |
| policy_entropy     | 0.87     |
| policy_loss        | -0.284   |
| total_timesteps    | 8533575  |
| value_loss         | 0.986    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2046     |
| nupdates           | 3530     |
| policy_entropy     | 0.867    |
| policy_loss        | 0.0414   |
| total_timesteps    | 8557825  |
| value_loss         | 0.197    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2032     |
| nupdates           | 3780     |
| policy_entropy     | 0.858    |
| policy_loss        | 0.0172   |
| total_timesteps    | 9164075  |
| value_loss         | 0.204    |
---------------------------------
----------------------------------
| explained_variance | 0.984     |
| fps                | 2031      |
| nupdates           | 3790      |
| policy_entropy     | 0.852     |
| policy_loss        | -0.000334 |
| total_timesteps    | 9188325   |
| value_loss         | 0.246     |
----------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2031     |
| nupdates           | 3800     |
| policy_entropy     | 0.915    |
| policy_loss        | 0.0201   |
| total_timesteps    | 9212575  |
| value_loss         | 0.286    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps

---------------------------------
| explained_variance | 0.963    |
| fps                | 2012     |
| nupdates           | 4050     |
| policy_entropy     | 0.906    |
| policy_loss        | 0.0353   |
| total_timesteps    | 9818825  |
| value_loss         | 0.414    |
---------------------------------
---------------------------------
| explained_variance | 0.947    |
| fps                | 2011     |
| nupdates           | 4060     |
| policy_entropy     | 0.912    |
| policy_loss        | -0.171   |
| total_timesteps    | 9843075  |
| value_loss         | 0.711    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2010     |
| nupdates           | 4070     |
| policy_entropy     | 0.944    |
| policy_loss        | 0.066    |
| total_timesteps    | 9867325  |
| value_loss         | 0.272    |
---------------------------------
---------------------------------
| explained_variance | 0.96     |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.954    |
| fps                | 1580     |
| nupdates           | 1        |
| policy_entropy     | 0.841    |
| policy_loss        | -0.0699  |
| total_timesteps    | 0        |
| value_loss         | 0.514    |
---------------------------------
---------------------------------
| explained_variance | 0.917    |
| fps                | 2618     |
| nupdates           | 10       |
| policy_entropy     | 0.822    |
| policy_loss        | 1.17     |
| total_timesteps    | 21825    |
| value_loss         | 3.09     |
---------------------------------
---------------------------------
| explained_variance | 0.897    |
| fps                | 2275     |
| nupdates           | 20       |
| policy_entropy     | 0.887    |
| policy_loss        | -0.656   |
| total_timesteps    | 46075    |
| value_loss         | 1.89     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.991    |
| fps                | 2036     |
| nupdates           | 270      |
| policy_entropy     | 0.878    |
| policy_loss        | 0.0234   |
| total_timesteps    | 652325   |
| value_loss         | 0.123    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2036     |
| nupdates           | 280      |
| policy_entropy     | 0.87     |
| policy_loss        | 0.0404   |
| total_timesteps    | 676575   |
| value_loss         | 0.188    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2036     |
| nupdates           | 290      |
| policy_entropy     | 0.925    |
| policy_loss        | 0.0339   |
| total_timesteps    | 700825   |
| value_loss         | 0.496    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 2036     |
| nupdates           | 540      |
| policy_entropy     | 0.86     |
| policy_loss        | 0.00946  |
| total_timesteps    | 1307075  |
| value_loss         | 0.232    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2036     |
| nupdates           | 550      |
| policy_entropy     | 0.842    |
| policy_loss        | -0.00423 |
| total_timesteps    | 1331325  |
| value_loss         | 0.257    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2036     |
| nupdates           | 560      |
| policy_entropy     | 0.787    |
| policy_loss        | -0.0188  |
| total_timesteps    | 1355575  |
| value_loss         | 0.286    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2039     |
| nupdates           | 810      |
| policy_entropy     | 0.869    |
| policy_loss        | -0.0751  |
| total_timesteps    | 1961825  |
| value_loss         | 0.518    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2039     |
| nupdates           | 820      |
| policy_entropy     | 0.828    |
| policy_loss        | 0.143    |
| total_timesteps    | 1986075  |
| value_loss         | 0.31     |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2039     |
| nupdates           | 830      |
| policy_entropy     | 0.878    |
| policy_loss        | -0.132   |
| total_timesteps    | 2010325  |
| value_loss         | 0.544    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2043     |
| nupdates           | 1080     |
| policy_entropy     | 0.831    |
| policy_loss        | 0.0138   |
| total_timesteps    | 2616575  |
| value_loss         | 0.278    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2043     |
| nupdates           | 1090     |
| policy_entropy     | 0.882    |
| policy_loss        | -0.0494  |
| total_timesteps    | 2640825  |
| value_loss         | 0.3      |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2043     |
| nupdates           | 1100     |
| policy_entropy     | 0.886    |
| policy_loss        | -0.0247  |
| total_timesteps    | 2665075  |
| value_loss         | 0.465    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps         

---------------------------------
| explained_variance | 0.994    |
| fps                | 2046     |
| nupdates           | 1350     |
| policy_entropy     | 0.883    |
| policy_loss        | 0.167    |
| total_timesteps    | 3271325  |
| value_loss         | 0.111    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2046     |
| nupdates           | 1360     |
| policy_entropy     | 0.845    |
| policy_loss        | 0.0418   |
| total_timesteps    | 3295575  |
| value_loss         | 0.102    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2046     |
| nupdates           | 1370     |
| policy_entropy     | 0.919    |
| policy_loss        | 0.0264   |
| total_timesteps    | 3319825  |
| value_loss         | 0.215    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps         

---------------------------------
| explained_variance | 0.97     |
| fps                | 2048     |
| nupdates           | 1620     |
| policy_entropy     | 0.95     |
| policy_loss        | -0.0904  |
| total_timesteps    | 3926075  |
| value_loss         | 0.504    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2048     |
| nupdates           | 1630     |
| policy_entropy     | 0.962    |
| policy_loss        | 0.0525   |
| total_timesteps    | 3950325  |
| value_loss         | 0.395    |
---------------------------------
---------------------------------
| explained_variance | 0.959    |
| fps                | 2048     |
| nupdates           | 1640     |
| policy_entropy     | 0.944    |
| policy_loss        | -0.0843  |
| total_timesteps    | 3974575  |
| value_loss         | 0.527    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps         

---------------------------------
| explained_variance | 0.981    |
| fps                | 2052     |
| nupdates           | 1890     |
| policy_entropy     | 0.898    |
| policy_loss        | 0.00707  |
| total_timesteps    | 4580825  |
| value_loss         | 0.28     |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2052     |
| nupdates           | 1900     |
| policy_entropy     | 0.903    |
| policy_loss        | 0.0739   |
| total_timesteps    | 4605075  |
| value_loss         | 0.16     |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2052     |
| nupdates           | 1910     |
| policy_entropy     | 0.916    |
| policy_loss        | -0.102   |
| total_timesteps    | 4629325  |
| value_loss         | 0.359    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps         

---------------------------------
| explained_variance | 0.915    |
| fps                | 2055     |
| nupdates           | 2160     |
| policy_entropy     | 0.914    |
| policy_loss        | -0.111   |
| total_timesteps    | 5235575  |
| value_loss         | 0.693    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2055     |
| nupdates           | 2170     |
| policy_entropy     | 0.879    |
| policy_loss        | 0.168    |
| total_timesteps    | 5259825  |
| value_loss         | 0.221    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2055     |
| nupdates           | 2180     |
| policy_entropy     | 0.864    |
| policy_loss        | 0.00871  |
| total_timesteps    | 5284075  |
| value_loss         | 0.212    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps         

---------------------------------
| explained_variance | 0.957    |
| fps                | 2056     |
| nupdates           | 2430     |
| policy_entropy     | 0.964    |
| policy_loss        | -0.161   |
| total_timesteps    | 5890325  |
| value_loss         | 0.502    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2056     |
| nupdates           | 2440     |
| policy_entropy     | 0.944    |
| policy_loss        | -0.107   |
| total_timesteps    | 5914575  |
| value_loss         | 0.443    |
---------------------------------
---------------------------------
| explained_variance | 0.95     |
| fps                | 2056     |
| nupdates           | 2450     |
| policy_entropy     | 0.851    |
| policy_loss        | 0.0915   |
| total_timesteps    | 5938825  |
| value_loss         | 0.609    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps         

---------------------------------
| explained_variance | 0.938    |
| fps                | 2057     |
| nupdates           | 2700     |
| policy_entropy     | 0.881    |
| policy_loss        | -0.155   |
| total_timesteps    | 6545075  |
| value_loss         | 0.615    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2057     |
| nupdates           | 2710     |
| policy_entropy     | 0.903    |
| policy_loss        | -0.0774  |
| total_timesteps    | 6569325  |
| value_loss         | 0.378    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2058     |
| nupdates           | 2720     |
| policy_entropy     | 0.871    |
| policy_loss        | 0.107    |
| total_timesteps    | 6593575  |
| value_loss         | 0.144    |
---------------------------------
---------------------------------
| explained_variance | 0.932    |
| fps         

---------------------------------
| explained_variance | 0.986    |
| fps                | 2058     |
| nupdates           | 2970     |
| policy_entropy     | 0.854    |
| policy_loss        | 0.0518   |
| total_timesteps    | 7199825  |
| value_loss         | 0.231    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2058     |
| nupdates           | 2980     |
| policy_entropy     | 0.903    |
| policy_loss        | -0.0373  |
| total_timesteps    | 7224075  |
| value_loss         | 0.318    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2058     |
| nupdates           | 2990     |
| policy_entropy     | 0.9      |
| policy_loss        | -0.0166  |
| total_timesteps    | 7248325  |
| value_loss         | 0.24     |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps         

---------------------------------
| explained_variance | 0.986    |
| fps                | 2058     |
| nupdates           | 3240     |
| policy_entropy     | 0.924    |
| policy_loss        | 0.00464  |
| total_timesteps    | 7854575  |
| value_loss         | 0.282    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2058     |
| nupdates           | 3250     |
| policy_entropy     | 0.909    |
| policy_loss        | 0.00656  |
| total_timesteps    | 7878825  |
| value_loss         | 0.306    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2058     |
| nupdates           | 3260     |
| policy_entropy     | 0.847    |
| policy_loss        | -0.0875  |
| total_timesteps    | 7903075  |
| value_loss         | 0.403    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps         

---------------------------------
| explained_variance | 0.978    |
| fps                | 2056     |
| nupdates           | 3510     |
| policy_entropy     | 0.941    |
| policy_loss        | 0.0162   |
| total_timesteps    | 8509325  |
| value_loss         | 0.354    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2056     |
| nupdates           | 3520     |
| policy_entropy     | 0.841    |
| policy_loss        | 0.0515   |
| total_timesteps    | 8533575  |
| value_loss         | 0.324    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps                | 2056     |
| nupdates           | 3530     |
| policy_entropy     | 0.956    |
| policy_loss        | 0.0159   |
| total_timesteps    | 8557825  |
| value_loss         | 0.144    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.959    |
| fps                | 2052     |
| nupdates           | 3780     |
| policy_entropy     | 0.964    |
| policy_loss        | -0.144   |
| total_timesteps    | 9164075  |
| value_loss         | 0.482    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2052     |
| nupdates           | 3790     |
| policy_entropy     | 0.934    |
| policy_loss        | -0.103   |
| total_timesteps    | 9188325  |
| value_loss         | 0.559    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2052     |
| nupdates           | 3800     |
| policy_entropy     | 0.881    |
| policy_loss        | -0.0418  |
| total_timesteps    | 9212575  |
| value_loss         | 0.292    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps         

---------------------------------
| explained_variance | 0.955    |
| fps                | 2045     |
| nupdates           | 4050     |
| policy_entropy     | 0.9      |
| policy_loss        | 0.00839  |
| total_timesteps    | 9818825  |
| value_loss         | 0.497    |
---------------------------------
---------------------------------
| explained_variance | 0.959    |
| fps                | 2045     |
| nupdates           | 4060     |
| policy_entropy     | 0.886    |
| policy_loss        | -0.119   |
| total_timesteps    | 9843075  |
| value_loss         | 0.463    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2045     |
| nupdates           | 4070     |
| policy_entropy     | 0.91     |
| policy_loss        | -0.0846  |
| total_timesteps    | 9867325  |
| value_loss         | 0.355    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.949    |
| fps                | 1565     |
| nupdates           | 1        |
| policy_entropy     | 0.884    |
| policy_loss        | -0.185   |
| total_timesteps    | 0        |
| value_loss         | 0.624    |
---------------------------------
---------------------------------
| explained_variance | 0.956    |
| fps                | 2592     |
| nupdates           | 10       |
| policy_entropy     | 0.832    |
| policy_loss        | 0.502    |
| total_timesteps    | 21825    |
| value_loss         | 0.879    |
---------------------------------
---------------------------------
| explained_variance | 0.948    |
| fps                | 2258     |
| nupdates           | 20       |
| policy_entropy     | 0.823    |
| policy_loss        | -1.3     |
| total_timesteps    | 46075    |
| value_loss         | 3.41     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.971    |
| fps                | 2034     |
| nupdates           | 270      |
| policy_entropy     | 0.855    |
| policy_loss        | 0.0504   |
| total_timesteps    | 652325   |
| value_loss         | 0.409    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2033     |
| nupdates           | 280      |
| policy_entropy     | 0.854    |
| policy_loss        | -0.00855 |
| total_timesteps    | 676575   |
| value_loss         | 0.336    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2033     |
| nupdates           | 290      |
| policy_entropy     | 0.84     |
| policy_loss        | -0.0177  |
| total_timesteps    | 700825   |
| value_loss         | 0.247    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

---------------------------------
| explained_variance | 0.99     |
| fps                | 2031     |
| nupdates           | 540      |
| policy_entropy     | 0.871    |
| policy_loss        | 0.00891  |
| total_timesteps    | 1307075  |
| value_loss         | 0.16     |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2031     |
| nupdates           | 550      |
| policy_entropy     | 0.852    |
| policy_loss        | -0.117   |
| total_timesteps    | 1331325  |
| value_loss         | 0.4      |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2031     |
| nupdates           | 560      |
| policy_entropy     | 0.873    |
| policy_loss        | 0.0303   |
| total_timesteps    | 1355575  |
| value_loss         | 0.326    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.976    |
| fps                | 2033     |
| nupdates           | 810      |
| policy_entropy     | 0.896    |
| policy_loss        | 0.032    |
| total_timesteps    | 1961825  |
| value_loss         | 0.358    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2033     |
| nupdates           | 820      |
| policy_entropy     | 0.986    |
| policy_loss        | -0.12    |
| total_timesteps    | 1986075  |
| value_loss         | 0.337    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2033     |
| nupdates           | 830      |
| policy_entropy     | 0.921    |
| policy_loss        | -0.0236  |
| total_timesteps    | 2010325  |
| value_loss         | 0.483    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2036     |
| nupdates           | 1080     |
| policy_entropy     | 0.865    |
| policy_loss        | 0.0156   |
| total_timesteps    | 2616575  |
| value_loss         | 0.262    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2036     |
| nupdates           | 1090     |
| policy_entropy     | 0.917    |
| policy_loss        | 0.027    |
| total_timesteps    | 2640825  |
| value_loss         | 0.202    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2036     |
| nupdates           | 1100     |
| policy_entropy     | 0.894    |
| policy_loss        | 0.14     |
| total_timesteps    | 2665075  |
| value_loss         | 0.237    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps         

---------------------------------
| explained_variance | 0.971    |
| fps                | 2040     |
| nupdates           | 1350     |
| policy_entropy     | 0.902    |
| policy_loss        | -0.044   |
| total_timesteps    | 3271325  |
| value_loss         | 0.313    |
---------------------------------
---------------------------------
| explained_variance | 0.943    |
| fps                | 2040     |
| nupdates           | 1360     |
| policy_entropy     | 0.873    |
| policy_loss        | -0.117   |
| total_timesteps    | 3295575  |
| value_loss         | 0.688    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2040     |
| nupdates           | 1370     |
| policy_entropy     | 0.864    |
| policy_loss        | 0.00283  |
| total_timesteps    | 3319825  |
| value_loss         | 0.337    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.965    |
| fps                | 2043     |
| nupdates           | 1620     |
| policy_entropy     | 1.08     |
| policy_loss        | -0.119   |
| total_timesteps    | 3926075  |
| value_loss         | 0.345    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2043     |
| nupdates           | 1630     |
| policy_entropy     | 0.979    |
| policy_loss        | 0.0144   |
| total_timesteps    | 3950325  |
| value_loss         | 0.304    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2043     |
| nupdates           | 1640     |
| policy_entropy     | 0.962    |
| policy_loss        | -0.107   |
| total_timesteps    | 3974575  |
| value_loss         | 0.388    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps         

---------------------------------
| explained_variance | 0.969    |
| fps                | 2046     |
| nupdates           | 1890     |
| policy_entropy     | 0.942    |
| policy_loss        | -0.00542 |
| total_timesteps    | 4580825  |
| value_loss         | 0.513    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2046     |
| nupdates           | 1900     |
| policy_entropy     | 0.938    |
| policy_loss        | 0.0279   |
| total_timesteps    | 4605075  |
| value_loss         | 0.273    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2046     |
| nupdates           | 1910     |
| policy_entropy     | 0.936    |
| policy_loss        | -0.088   |
| total_timesteps    | 4629325  |
| value_loss         | 0.454    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps         

---------------------------------
| explained_variance | 0.978    |
| fps                | 2049     |
| nupdates           | 2160     |
| policy_entropy     | 0.898    |
| policy_loss        | 0.0274   |
| total_timesteps    | 5235575  |
| value_loss         | 0.387    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2049     |
| nupdates           | 2170     |
| policy_entropy     | 0.808    |
| policy_loss        | 0.0602   |
| total_timesteps    | 5259825  |
| value_loss         | 0.316    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2049     |
| nupdates           | 2180     |
| policy_entropy     | 0.905    |
| policy_loss        | 0.0145   |
| total_timesteps    | 5284075  |
| value_loss         | 0.337    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps         

---------------------------------
| explained_variance | 0.97     |
| fps                | 2051     |
| nupdates           | 2430     |
| policy_entropy     | 0.897    |
| policy_loss        | -0.0522  |
| total_timesteps    | 5890325  |
| value_loss         | 0.461    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2051     |
| nupdates           | 2440     |
| policy_entropy     | 0.92     |
| policy_loss        | -0.0833  |
| total_timesteps    | 5914575  |
| value_loss         | 0.236    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2051     |
| nupdates           | 2450     |
| policy_entropy     | 0.944    |
| policy_loss        | -0.0555  |
| total_timesteps    | 5938825  |
| value_loss         | 0.337    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps         

---------------------------------
| explained_variance | 0.979    |
| fps                | 2052     |
| nupdates           | 2700     |
| policy_entropy     | 0.871    |
| policy_loss        | -0.00999 |
| total_timesteps    | 6545075  |
| value_loss         | 0.232    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2052     |
| nupdates           | 2710     |
| policy_entropy     | 0.865    |
| policy_loss        | 0.104    |
| total_timesteps    | 6569325  |
| value_loss         | 0.207    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2052     |
| nupdates           | 2720     |
| policy_entropy     | 0.883    |
| policy_loss        | 0.0437   |
| total_timesteps    | 6593575  |
| value_loss         | 0.254    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps         

---------------------------------
| explained_variance | 0.981    |
| fps                | 2052     |
| nupdates           | 2970     |
| policy_entropy     | 0.958    |
| policy_loss        | -0.0685  |
| total_timesteps    | 7199825  |
| value_loss         | 0.25     |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2052     |
| nupdates           | 2980     |
| policy_entropy     | 0.911    |
| policy_loss        | 0.0353   |
| total_timesteps    | 7224075  |
| value_loss         | 0.389    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2052     |
| nupdates           | 2990     |
| policy_entropy     | 0.927    |
| policy_loss        | -0.0109  |
| total_timesteps    | 7248325  |
| value_loss         | 0.258    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps         

---------------------------------
| explained_variance | 0.978    |
| fps                | 2051     |
| nupdates           | 3240     |
| policy_entropy     | 0.828    |
| policy_loss        | 0.000249 |
| total_timesteps    | 7854575  |
| value_loss         | 0.411    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2051     |
| nupdates           | 3250     |
| policy_entropy     | 0.873    |
| policy_loss        | -0.101   |
| total_timesteps    | 7878825  |
| value_loss         | 0.354    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2051     |
| nupdates           | 3260     |
| policy_entropy     | 0.847    |
| policy_loss        | 0.045    |
| total_timesteps    | 7903075  |
| value_loss         | 0.156    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps         

---------------------------------
| explained_variance | 0.96     |
| fps                | 2046     |
| nupdates           | 3510     |
| policy_entropy     | 0.848    |
| policy_loss        | -0.0482  |
| total_timesteps    | 8509325  |
| value_loss         | 0.43     |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2046     |
| nupdates           | 3520     |
| policy_entropy     | 0.901    |
| policy_loss        | -0.0151  |
| total_timesteps    | 8533575  |
| value_loss         | 0.329    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2046     |
| nupdates           | 3530     |
| policy_entropy     | 0.866    |
| policy_loss        | 0.0251   |
| total_timesteps    | 8557825  |
| value_loss         | 0.272    |
---------------------------------
---------------------------------
| explained_variance | 0.942    |
| fps         

---------------------------------
| explained_variance | 0.989    |
| fps                | 2039     |
| nupdates           | 3780     |
| policy_entropy     | 0.866    |
| policy_loss        | 0.0285   |
| total_timesteps    | 9164075  |
| value_loss         | 0.18     |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2038     |
| nupdates           | 3790     |
| policy_entropy     | 0.844    |
| policy_loss        | 0.0905   |
| total_timesteps    | 9188325  |
| value_loss         | 0.201    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps                | 2038     |
| nupdates           | 3800     |
| policy_entropy     | 0.981    |
| policy_loss        | -0.278   |
| total_timesteps    | 9212575  |
| value_loss         | 0.478    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps         

---------------------------------
| explained_variance | 0.981    |
| fps                | 2027     |
| nupdates           | 4050     |
| policy_entropy     | 0.846    |
| policy_loss        | 0.0589   |
| total_timesteps    | 9818825  |
| value_loss         | 0.298    |
---------------------------------
---------------------------------
| explained_variance | 0.951    |
| fps                | 2027     |
| nupdates           | 4060     |
| policy_entropy     | 0.883    |
| policy_loss        | -0.149   |
| total_timesteps    | 9843075  |
| value_loss         | 0.658    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2026     |
| nupdates           | 4070     |
| policy_entropy     | 0.911    |
| policy_loss        | 0.00901  |
| total_timesteps    | 9867325  |
| value_loss         | 0.311    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.983    |
| fps                | 1558     |
| nupdates           | 1        |
| policy_entropy     | 0.862    |
| policy_loss        | -0.024   |
| total_timesteps    | 0        |
| value_loss         | 0.23     |
---------------------------------
---------------------------------
| explained_variance | 0.87     |
| fps                | 2601     |
| nupdates           | 10       |
| policy_entropy     | 0.583    |
| policy_loss        | 1.19     |
| total_timesteps    | 21825    |
| value_loss         | 4.64     |
---------------------------------
---------------------------------
| explained_variance | 0.907    |
| fps                | 2263     |
| nupdates           | 20       |
| policy_entropy     | 0.553    |
| policy_loss        | -0.0518  |
| total_timesteps    | 46075    |
| value_loss         | 1.48     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.956    |
| fps                | 2032     |
| nupdates           | 270      |
| policy_entropy     | 0.89     |
| policy_loss        | -0.162   |
| total_timesteps    | 652325   |
| value_loss         | 0.503    |
---------------------------------
----------------------------------
| explained_variance | 0.985     |
| fps                | 2032      |
| nupdates           | 280       |
| policy_entropy     | 0.872     |
| policy_loss        | -0.000656 |
| total_timesteps    | 676575    |
| value_loss         | 0.186     |
----------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps                | 2031     |
| nupdates           | 290      |
| policy_entropy     | 0.87     |
| policy_loss        | 0.0143   |
| total_timesteps    | 700825   |
| value_loss         | 0.166    |
---------------------------------
---------------------------------
| explained_variance | 0.954    |
| fps

---------------------------------
| explained_variance | 0.988    |
| fps                | 2031     |
| nupdates           | 540      |
| policy_entropy     | 0.826    |
| policy_loss        | 0.0847   |
| total_timesteps    | 1307075  |
| value_loss         | 0.212    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2032     |
| nupdates           | 550      |
| policy_entropy     | 0.848    |
| policy_loss        | -0.0572  |
| total_timesteps    | 1331325  |
| value_loss         | 0.199    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2032     |
| nupdates           | 560      |
| policy_entropy     | 0.878    |
| policy_loss        | 0.0213   |
| total_timesteps    | 1355575  |
| value_loss         | 0.249    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

---------------------------------
| explained_variance | 0.924    |
| fps                | 2035     |
| nupdates           | 810      |
| policy_entropy     | 0.875    |
| policy_loss        | -0.0254  |
| total_timesteps    | 1961825  |
| value_loss         | 0.575    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2035     |
| nupdates           | 820      |
| policy_entropy     | 0.876    |
| policy_loss        | -0.0454  |
| total_timesteps    | 1986075  |
| value_loss         | 0.246    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2035     |
| nupdates           | 830      |
| policy_entropy     | 0.929    |
| policy_loss        | 0.0356   |
| total_timesteps    | 2010325  |
| value_loss         | 0.413    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2039     |
| nupdates           | 1080     |
| policy_entropy     | 0.904    |
| policy_loss        | -0.0913  |
| total_timesteps    | 2616575  |
| value_loss         | 0.424    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 2039     |
| nupdates           | 1090     |
| policy_entropy     | 0.909    |
| policy_loss        | 0.00628  |
| total_timesteps    | 2640825  |
| value_loss         | 0.323    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2039     |
| nupdates           | 1100     |
| policy_entropy     | 0.878    |
| policy_loss        | -0.0487  |
| total_timesteps    | 2665075  |
| value_loss         | 0.292    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps         

---------------------------------
| explained_variance | 0.989    |
| fps                | 2042     |
| nupdates           | 1350     |
| policy_entropy     | 0.825    |
| policy_loss        | 0.187    |
| total_timesteps    | 3271325  |
| value_loss         | 0.166    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2042     |
| nupdates           | 1360     |
| policy_entropy     | 0.849    |
| policy_loss        | -0.0516  |
| total_timesteps    | 3295575  |
| value_loss         | 0.26     |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2042     |
| nupdates           | 1370     |
| policy_entropy     | 0.872    |
| policy_loss        | 0.0506   |
| total_timesteps    | 3319825  |
| value_loss         | 0.299    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.979    |
| fps                | 2045     |
| nupdates           | 1620     |
| policy_entropy     | 0.895    |
| policy_loss        | 0.16     |
| total_timesteps    | 3926075  |
| value_loss         | 0.318    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2045     |
| nupdates           | 1630     |
| policy_entropy     | 0.916    |
| policy_loss        | -0.0316  |
| total_timesteps    | 3950325  |
| value_loss         | 0.27     |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2045     |
| nupdates           | 1640     |
| policy_entropy     | 0.929    |
| policy_loss        | 0.0294   |
| total_timesteps    | 3974575  |
| value_loss         | 0.334    |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps         

---------------------------------
| explained_variance | 0.967    |
| fps                | 2047     |
| nupdates           | 1890     |
| policy_entropy     | 0.915    |
| policy_loss        | -0.146   |
| total_timesteps    | 4580825  |
| value_loss         | 0.448    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2047     |
| nupdates           | 1900     |
| policy_entropy     | 0.883    |
| policy_loss        | 0.0145   |
| total_timesteps    | 4605075  |
| value_loss         | 0.301    |
---------------------------------
---------------------------------
| explained_variance | 0.959    |
| fps                | 2047     |
| nupdates           | 1910     |
| policy_entropy     | 0.886    |
| policy_loss        | -0.0727  |
| total_timesteps    | 4629325  |
| value_loss         | 0.449    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2049     |
| nupdates           | 2160     |
| policy_entropy     | 0.916    |
| policy_loss        | -0.00119 |
| total_timesteps    | 5235575  |
| value_loss         | 0.284    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps                | 2049     |
| nupdates           | 2170     |
| policy_entropy     | 0.944    |
| policy_loss        | -0.0572  |
| total_timesteps    | 5259825  |
| value_loss         | 0.175    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 2049     |
| nupdates           | 2180     |
| policy_entropy     | 0.886    |
| policy_loss        | -0.125   |
| total_timesteps    | 5284075  |
| value_loss         | 0.523    |
---------------------------------
---------------------------------
| explained_variance | 0.939    |
| fps         

---------------------------------
| explained_variance | 0.958    |
| fps                | 2050     |
| nupdates           | 2430     |
| policy_entropy     | 0.961    |
| policy_loss        | -0.0217  |
| total_timesteps    | 5890325  |
| value_loss         | 0.505    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2050     |
| nupdates           | 2440     |
| policy_entropy     | 0.953    |
| policy_loss        | -0.0284  |
| total_timesteps    | 5914575  |
| value_loss         | 0.459    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2050     |
| nupdates           | 2450     |
| policy_entropy     | 0.883    |
| policy_loss        | -0.121   |
| total_timesteps    | 5938825  |
| value_loss         | 0.483    |
---------------------------------
---------------------------------
| explained_variance | 0.99     |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2050     |
| nupdates           | 2700     |
| policy_entropy     | 0.813    |
| policy_loss        | 0.075    |
| total_timesteps    | 6545075  |
| value_loss         | 0.34     |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps                | 2050     |
| nupdates           | 2710     |
| policy_entropy     | 0.83     |
| policy_loss        | 0.0362   |
| total_timesteps    | 6569325  |
| value_loss         | 0.299    |
---------------------------------
---------------------------------
| explained_variance | 0.967    |
| fps                | 2050     |
| nupdates           | 2720     |
| policy_entropy     | 0.879    |
| policy_loss        | -0.116   |
| total_timesteps    | 6593575  |
| value_loss         | 0.489    |
---------------------------------
---------------------------------
| explained_variance | 0.935    |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2048     |
| nupdates           | 2970     |
| policy_entropy     | 0.925    |
| policy_loss        | -0.02    |
| total_timesteps    | 7199825  |
| value_loss         | 0.312    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2048     |
| nupdates           | 2980     |
| policy_entropy     | 0.909    |
| policy_loss        | 0.00825  |
| total_timesteps    | 7224075  |
| value_loss         | 0.218    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2047     |
| nupdates           | 2990     |
| policy_entropy     | 0.858    |
| policy_loss        | -0.0497  |
| total_timesteps    | 7248325  |
| value_loss         | 0.309    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps         

---------------------------------
| explained_variance | 0.979    |
| fps                | 2042     |
| nupdates           | 3240     |
| policy_entropy     | 0.892    |
| policy_loss        | -0.0804  |
| total_timesteps    | 7854575  |
| value_loss         | 0.328    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2041     |
| nupdates           | 3250     |
| policy_entropy     | 0.945    |
| policy_loss        | 0.0182   |
| total_timesteps    | 7878825  |
| value_loss         | 0.283    |
---------------------------------
---------------------------------
| explained_variance | 0.92     |
| fps                | 2041     |
| nupdates           | 3260     |
| policy_entropy     | 0.832    |
| policy_loss        | -0.12    |
| total_timesteps    | 7903075  |
| value_loss         | 0.889    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps         

---------------------------------
| explained_variance | 0.971    |
| fps                | 2030     |
| nupdates           | 3510     |
| policy_entropy     | 0.98     |
| policy_loss        | -0.035   |
| total_timesteps    | 8509325  |
| value_loss         | 0.366    |
---------------------------------
---------------------------------
| explained_variance | 0.909    |
| fps                | 2029     |
| nupdates           | 3520     |
| policy_entropy     | 0.897    |
| policy_loss        | -0.133   |
| total_timesteps    | 8533575  |
| value_loss         | 0.874    |
---------------------------------
---------------------------------
| explained_variance | 0.909    |
| fps                | 2029     |
| nupdates           | 3530     |
| policy_entropy     | 0.919    |
| policy_loss        | -0.0898  |
| total_timesteps    | 8557825  |
| value_loss         | 0.917    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps         

---------------------------------
| explained_variance | 0.986    |
| fps                | 2013     |
| nupdates           | 3780     |
| policy_entropy     | 0.955    |
| policy_loss        | -0.00945 |
| total_timesteps    | 9164075  |
| value_loss         | 0.245    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps                | 2012     |
| nupdates           | 3790     |
| policy_entropy     | 0.846    |
| policy_loss        | 0.105    |
| total_timesteps    | 9188325  |
| value_loss         | 0.179    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2012     |
| nupdates           | 3800     |
| policy_entropy     | 0.847    |
| policy_loss        | -0.103   |
| total_timesteps    | 9212575  |
| value_loss         | 0.434    |
---------------------------------
---------------------------------
| explained_variance | 0.937    |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 1991     |
| nupdates           | 4050     |
| policy_entropy     | 0.853    |
| policy_loss        | 0.00773  |
| total_timesteps    | 9818825  |
| value_loss         | 0.27     |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 1990     |
| nupdates           | 4060     |
| policy_entropy     | 0.938    |
| policy_loss        | -0.131   |
| total_timesteps    | 9843075  |
| value_loss         | 0.456    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps                | 1989     |
| nupdates           | 4070     |
| policy_entropy     | 0.886    |
| policy_loss        | -0.0692  |
| total_timesteps    | 9867325  |
| value_loss         | 0.472    |
---------------------------------
---------------------------------
| explained_variance | 0.96     |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.969    |
| fps                | 1576     |
| nupdates           | 1        |
| policy_entropy     | 0.899    |
| policy_loss        | -0.131   |
| total_timesteps    | 0        |
| value_loss         | 0.473    |
---------------------------------
---------------------------------
| explained_variance | 0.939    |
| fps                | 2607     |
| nupdates           | 10       |
| policy_entropy     | 0.881    |
| policy_loss        | 0.326    |
| total_timesteps    | 21825    |
| value_loss         | 0.928    |
---------------------------------
---------------------------------
| explained_variance | 0.937    |
| fps                | 2269     |
| nupdates           | 20       |
| policy_entropy     | 0.859    |
| policy_loss        | -0.565   |
| total_timesteps    | 46075    |
| value_loss         | 1.13     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.985    |
| fps                | 2031     |
| nupdates           | 270      |
| policy_entropy     | 0.869    |
| policy_loss        | 0.0808   |
| total_timesteps    | 652325   |
| value_loss         | 0.21     |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2031     |
| nupdates           | 280      |
| policy_entropy     | 0.866    |
| policy_loss        | 0.0234   |
| total_timesteps    | 676575   |
| value_loss         | 0.267    |
---------------------------------
---------------------------------
| explained_variance | 0.969    |
| fps                | 2030     |
| nupdates           | 290      |
| policy_entropy     | 0.883    |
| policy_loss        | -0.067   |
| total_timesteps    | 700825   |
| value_loss         | 0.422    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

---------------------------------
| explained_variance | 0.975    |
| fps                | 2026     |
| nupdates           | 540      |
| policy_entropy     | 0.866    |
| policy_loss        | -0.00392 |
| total_timesteps    | 1307075  |
| value_loss         | 0.278    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2027     |
| nupdates           | 550      |
| policy_entropy     | 0.805    |
| policy_loss        | -0.0703  |
| total_timesteps    | 1331325  |
| value_loss         | 0.277    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2027     |
| nupdates           | 560      |
| policy_entropy     | 0.889    |
| policy_loss        | 0.033    |
| total_timesteps    | 1355575  |
| value_loss         | 0.422    |
---------------------------------
---------------------------------
| explained_variance | 0.943    |
| fps         

---------------------------------
| explained_variance | 0.933    |
| fps                | 2029     |
| nupdates           | 810      |
| policy_entropy     | 0.849    |
| policy_loss        | -0.0592  |
| total_timesteps    | 1961825  |
| value_loss         | 0.677    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2030     |
| nupdates           | 820      |
| policy_entropy     | 0.834    |
| policy_loss        | -0.0317  |
| total_timesteps    | 1986075  |
| value_loss         | 0.302    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2030     |
| nupdates           | 830      |
| policy_entropy     | 0.895    |
| policy_loss        | -0.0163  |
| total_timesteps    | 2010325  |
| value_loss         | 0.348    |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps         

---------------------------------
| explained_variance | 0.972    |
| fps                | 2033     |
| nupdates           | 1080     |
| policy_entropy     | 0.828    |
| policy_loss        | -0.0684  |
| total_timesteps    | 2616575  |
| value_loss         | 0.489    |
---------------------------------
---------------------------------
| explained_variance | 0.957    |
| fps                | 2033     |
| nupdates           | 1090     |
| policy_entropy     | 0.908    |
| policy_loss        | -0.0475  |
| total_timesteps    | 2640825  |
| value_loss         | 0.508    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps                | 2034     |
| nupdates           | 1100     |
| policy_entropy     | 0.867    |
| policy_loss        | 0.147    |
| total_timesteps    | 2665075  |
| value_loss         | 0.18     |
---------------------------------
---------------------------------
| explained_variance | 0.933    |
| fps         

---------------------------------
| explained_variance | 0.988    |
| fps                | 2037     |
| nupdates           | 1350     |
| policy_entropy     | 0.897    |
| policy_loss        | 0.00417  |
| total_timesteps    | 3271325  |
| value_loss         | 0.131    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2037     |
| nupdates           | 1360     |
| policy_entropy     | 0.846    |
| policy_loss        | 0.0169   |
| total_timesteps    | 3295575  |
| value_loss         | 0.28     |
---------------------------------
---------------------------------
| explained_variance | 0.993    |
| fps                | 2037     |
| nupdates           | 1370     |
| policy_entropy     | 0.829    |
| policy_loss        | 0.0634   |
| total_timesteps    | 3319825  |
| value_loss         | 0.132    |
---------------------------------
---------------------------------
| explained_variance | 0.952    |
| fps         

---------------------------------
| explained_variance | 0.971    |
| fps                | 2041     |
| nupdates           | 1620     |
| policy_entropy     | 0.922    |
| policy_loss        | -0.0867  |
| total_timesteps    | 3926075  |
| value_loss         | 0.358    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2041     |
| nupdates           | 1630     |
| policy_entropy     | 0.92     |
| policy_loss        | -0.0234  |
| total_timesteps    | 3950325  |
| value_loss         | 0.402    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2041     |
| nupdates           | 1640     |
| policy_entropy     | 0.775    |
| policy_loss        | 0.0861   |
| total_timesteps    | 3974575  |
| value_loss         | 0.269    |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

---------------------------------
| explained_variance | 0.974    |
| fps                | 2045     |
| nupdates           | 1890     |
| policy_entropy     | 0.863    |
| policy_loss        | 0.0496   |
| total_timesteps    | 4580825  |
| value_loss         | 0.396    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2045     |
| nupdates           | 1900     |
| policy_entropy     | 0.848    |
| policy_loss        | -0.059   |
| total_timesteps    | 4605075  |
| value_loss         | 0.391    |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps                | 2045     |
| nupdates           | 1910     |
| policy_entropy     | 0.888    |
| policy_loss        | -0.0172  |
| total_timesteps    | 4629325  |
| value_loss         | 0.369    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.978    |
| fps                | 2047     |
| nupdates           | 2160     |
| policy_entropy     | 0.907    |
| policy_loss        | 0.0311   |
| total_timesteps    | 5235575  |
| value_loss         | 0.336    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2047     |
| nupdates           | 2170     |
| policy_entropy     | 0.862    |
| policy_loss        | 0.0243   |
| total_timesteps    | 5259825  |
| value_loss         | 0.275    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps                | 2048     |
| nupdates           | 2180     |
| policy_entropy     | 0.9      |
| policy_loss        | 0.0339   |
| total_timesteps    | 5284075  |
| value_loss         | 0.28     |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 2049     |
| nupdates           | 2430     |
| policy_entropy     | 0.969    |
| policy_loss        | 0.00744  |
| total_timesteps    | 5890325  |
| value_loss         | 0.249    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2049     |
| nupdates           | 2440     |
| policy_entropy     | 0.883    |
| policy_loss        | 0.0154   |
| total_timesteps    | 5914575  |
| value_loss         | 0.339    |
---------------------------------
---------------------------------
| explained_variance | 0.956    |
| fps                | 2050     |
| nupdates           | 2450     |
| policy_entropy     | 0.872    |
| policy_loss        | -0.0628  |
| total_timesteps    | 5938825  |
| value_loss         | 0.596    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2051     |
| nupdates           | 2700     |
| policy_entropy     | 0.889    |
| policy_loss        | -0.129   |
| total_timesteps    | 6545075  |
| value_loss         | 0.48     |
---------------------------------
---------------------------------
| explained_variance | 0.956    |
| fps                | 2051     |
| nupdates           | 2710     |
| policy_entropy     | 0.885    |
| policy_loss        | -0.135   |
| total_timesteps    | 6569325  |
| value_loss         | 0.619    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps                | 2051     |
| nupdates           | 2720     |
| policy_entropy     | 0.839    |
| policy_loss        | 0.164    |
| total_timesteps    | 6593575  |
| value_loss         | 0.151    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.974    |
| fps                | 2052     |
| nupdates           | 2970     |
| policy_entropy     | 0.849    |
| policy_loss        | -0.0152  |
| total_timesteps    | 7199825  |
| value_loss         | 0.356    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps                | 2052     |
| nupdates           | 2980     |
| policy_entropy     | 0.838    |
| policy_loss        | 0.105    |
| total_timesteps    | 7224075  |
| value_loss         | 0.147    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2052     |
| nupdates           | 2990     |
| policy_entropy     | 0.896    |
| policy_loss        | 0.0308   |
| total_timesteps    | 7248325  |
| value_loss         | 0.353    |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps         

---------------------------------
| explained_variance | 0.976    |
| fps                | 2051     |
| nupdates           | 3240     |
| policy_entropy     | 0.992    |
| policy_loss        | -0.088   |
| total_timesteps    | 7854575  |
| value_loss         | 0.39     |
---------------------------------
---------------------------------
| explained_variance | 0.957    |
| fps                | 2051     |
| nupdates           | 3250     |
| policy_entropy     | 0.894    |
| policy_loss        | 0.0507   |
| total_timesteps    | 7878825  |
| value_loss         | 0.275    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2051     |
| nupdates           | 3260     |
| policy_entropy     | 0.864    |
| policy_loss        | 0.0835   |
| total_timesteps    | 7903075  |
| value_loss         | 0.216    |
---------------------------------
---------------------------------
| explained_variance | 0.983    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 2049     |
| nupdates           | 3510     |
| policy_entropy     | 0.893    |
| policy_loss        | 0.0402   |
| total_timesteps    | 8509325  |
| value_loss         | 0.346    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps                | 2049     |
| nupdates           | 3520     |
| policy_entropy     | 0.902    |
| policy_loss        | -0.0499  |
| total_timesteps    | 8533575  |
| value_loss         | 0.365    |
---------------------------------
---------------------------------
| explained_variance | 0.948    |
| fps                | 2048     |
| nupdates           | 3530     |
| policy_entropy     | 0.919    |
| policy_loss        | -0.0271  |
| total_timesteps    | 8557825  |
| value_loss         | 0.478    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps         

---------------------------------
| explained_variance | 0.974    |
| fps                | 2043     |
| nupdates           | 3780     |
| policy_entropy     | 0.836    |
| policy_loss        | 0.0622   |
| total_timesteps    | 9164075  |
| value_loss         | 0.317    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2043     |
| nupdates           | 3790     |
| policy_entropy     | 0.847    |
| policy_loss        | -0.0545  |
| total_timesteps    | 9188325  |
| value_loss         | 0.328    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2043     |
| nupdates           | 3800     |
| policy_entropy     | 0.853    |
| policy_loss        | 0.0893   |
| total_timesteps    | 9212575  |
| value_loss         | 0.302    |
---------------------------------
---------------------------------
| explained_variance | 0.966    |
| fps         

---------------------------------
| explained_variance | 0.969    |
| fps                | 2033     |
| nupdates           | 4050     |
| policy_entropy     | 0.924    |
| policy_loss        | -0.0557  |
| total_timesteps    | 9818825  |
| value_loss         | 0.369    |
---------------------------------
---------------------------------
| explained_variance | 0.93     |
| fps                | 2033     |
| nupdates           | 4060     |
| policy_entropy     | 0.859    |
| policy_loss        | -0.206   |
| total_timesteps    | 9843075  |
| value_loss         | 0.82     |
---------------------------------
---------------------------------
| explained_variance | 0.986    |
| fps                | 2033     |
| nupdates           | 4070     |
| policy_entropy     | 0.992    |
| policy_loss        | -0.00431 |
| total_timesteps    | 9867325  |
| value_loss         | 0.15     |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.984    |
| fps                | 1564     |
| nupdates           | 1        |
| policy_entropy     | 0.873    |
| policy_loss        | 0.00886  |
| total_timesteps    | 0        |
| value_loss         | 0.271    |
---------------------------------
---------------------------------
| explained_variance | 0.916    |
| fps                | 2611     |
| nupdates           | 10       |
| policy_entropy     | 0.883    |
| policy_loss        | -1.49    |
| total_timesteps    | 21825    |
| value_loss         | 3.75     |
---------------------------------
---------------------------------
| explained_variance | 0.926    |
| fps                | 2269     |
| nupdates           | 20       |
| policy_entropy     | 0.793    |
| policy_loss        | 1.43     |
| total_timesteps    | 46075    |
| value_loss         | 4.75     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.981    |
| fps                | 2035     |
| nupdates           | 270      |
| policy_entropy     | 0.848    |
| policy_loss        | -0.0273  |
| total_timesteps    | 652325   |
| value_loss         | 0.35     |
---------------------------------
---------------------------------
| explained_variance | 0.987    |
| fps                | 2034     |
| nupdates           | 280      |
| policy_entropy     | 0.848    |
| policy_loss        | -0.0629  |
| total_timesteps    | 676575   |
| value_loss         | 0.219    |
---------------------------------
---------------------------------
| explained_variance | 0.928    |
| fps                | 2034     |
| nupdates           | 290      |
| policy_entropy     | 0.854    |
| policy_loss        | -0.0525  |
| total_timesteps    | 700825   |
| value_loss         | 0.612    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps         

---------------------------------
| explained_variance | 0.952    |
| fps                | 2015     |
| nupdates           | 540      |
| policy_entropy     | 0.859    |
| policy_loss        | -0.0473  |
| total_timesteps    | 1307075  |
| value_loss         | 0.476    |
---------------------------------
---------------------------------
| explained_variance | 0.97     |
| fps                | 2014     |
| nupdates           | 550      |
| policy_entropy     | 0.947    |
| policy_loss        | -0.117   |
| total_timesteps    | 1331325  |
| value_loss         | 0.377    |
---------------------------------
---------------------------------
| explained_variance | 0.947    |
| fps                | 2014     |
| nupdates           | 560      |
| policy_entropy     | 0.91     |
| policy_loss        | -0.0855  |
| total_timesteps    | 1355575  |
| value_loss         | 0.664    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps         

---------------------------------
| explained_variance | 0.983    |
| fps                | 2011     |
| nupdates           | 810      |
| policy_entropy     | 0.852    |
| policy_loss        | 0.149    |
| total_timesteps    | 1961825  |
| value_loss         | 0.27     |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2011     |
| nupdates           | 820      |
| policy_entropy     | 0.813    |
| policy_loss        | 0.131    |
| total_timesteps    | 1986075  |
| value_loss         | 0.301    |
---------------------------------
---------------------------------
| explained_variance | 0.984    |
| fps                | 2011     |
| nupdates           | 830      |
| policy_entropy     | 0.874    |
| policy_loss        | -0.112   |
| total_timesteps    | 2010325  |
| value_loss         | 0.288    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.963    |
| fps                | 2017     |
| nupdates           | 1080     |
| policy_entropy     | 0.85     |
| policy_loss        | -0.0992  |
| total_timesteps    | 2616575  |
| value_loss         | 0.666    |
---------------------------------
---------------------------------
| explained_variance | 0.939    |
| fps                | 2017     |
| nupdates           | 1090     |
| policy_entropy     | 0.866    |
| policy_loss        | -0.16    |
| total_timesteps    | 2640825  |
| value_loss         | 0.714    |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps                | 2017     |
| nupdates           | 1100     |
| policy_entropy     | 0.892    |
| policy_loss        | -0.0707  |
| total_timesteps    | 2665075  |
| value_loss         | 0.557    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.967    |
| fps                | 2023     |
| nupdates           | 1350     |
| policy_entropy     | 0.874    |
| policy_loss        | -0.203   |
| total_timesteps    | 3271325  |
| value_loss         | 0.683    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps                | 2023     |
| nupdates           | 1360     |
| policy_entropy     | 0.911    |
| policy_loss        | 0.129    |
| total_timesteps    | 3295575  |
| value_loss         | 0.118    |
---------------------------------
---------------------------------
| explained_variance | 0.995    |
| fps                | 2023     |
| nupdates           | 1370     |
| policy_entropy     | 0.824    |
| policy_loss        | 0.076    |
| total_timesteps    | 3319825  |
| value_loss         | 0.0697   |
---------------------------------
---------------------------------
| explained_variance | 0.973    |
| fps         

---------------------------------
| explained_variance | 0.981    |
| fps                | 2029     |
| nupdates           | 1620     |
| policy_entropy     | 0.853    |
| policy_loss        | 0.113    |
| total_timesteps    | 3926075  |
| value_loss         | 0.321    |
---------------------------------
---------------------------------
| explained_variance | 0.982    |
| fps                | 2029     |
| nupdates           | 1630     |
| policy_entropy     | 0.867    |
| policy_loss        | -0.078   |
| total_timesteps    | 3950325  |
| value_loss         | 0.327    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2029     |
| nupdates           | 1640     |
| policy_entropy     | 0.866    |
| policy_loss        | 0.0149   |
| total_timesteps    | 3974575  |
| value_loss         | 0.354    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps         

---------------------------------
| explained_variance | 0.99     |
| fps                | 2033     |
| nupdates           | 1890     |
| policy_entropy     | 0.998    |
| policy_loss        | 0.106    |
| total_timesteps    | 4580825  |
| value_loss         | 0.179    |
---------------------------------
---------------------------------
| explained_variance | 0.96     |
| fps                | 2034     |
| nupdates           | 1900     |
| policy_entropy     | 0.882    |
| policy_loss        | -0.0217  |
| total_timesteps    | 4605075  |
| value_loss         | 0.582    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 2034     |
| nupdates           | 1910     |
| policy_entropy     | 0.887    |
| policy_loss        | -0.0351  |
| total_timesteps    | 4629325  |
| value_loss         | 0.313    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps         

---------------------------------
| explained_variance | 0.977    |
| fps                | 2037     |
| nupdates           | 2160     |
| policy_entropy     | 0.842    |
| policy_loss        | -0.0282  |
| total_timesteps    | 5235575  |
| value_loss         | 0.34     |
---------------------------------
---------------------------------
| explained_variance | 0.943    |
| fps                | 2037     |
| nupdates           | 2170     |
| policy_entropy     | 0.971    |
| policy_loss        | -0.381   |
| total_timesteps    | 5259825  |
| value_loss         | 1.22     |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps                | 2037     |
| nupdates           | 2180     |
| policy_entropy     | 0.925    |
| policy_loss        | -0.125   |
| total_timesteps    | 5284075  |
| value_loss         | 0.354    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps         

---------------------------------
| explained_variance | 0.973    |
| fps                | 2040     |
| nupdates           | 2430     |
| policy_entropy     | 0.936    |
| policy_loss        | 0.0293   |
| total_timesteps    | 5890325  |
| value_loss         | 0.386    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 2040     |
| nupdates           | 2440     |
| policy_entropy     | 0.87     |
| policy_loss        | -0.0265  |
| total_timesteps    | 5914575  |
| value_loss         | 0.318    |
---------------------------------
---------------------------------
| explained_variance | 0.951    |
| fps                | 2040     |
| nupdates           | 2450     |
| policy_entropy     | 0.888    |
| policy_loss        | -0.146   |
| total_timesteps    | 5938825  |
| value_loss         | 0.699    |
---------------------------------
---------------------------------
| explained_variance | 0.988    |
| fps         

---------------------------------
| explained_variance | 0.974    |
| fps                | 2042     |
| nupdates           | 2700     |
| policy_entropy     | 0.903    |
| policy_loss        | -0.0472  |
| total_timesteps    | 6545075  |
| value_loss         | 0.29     |
---------------------------------
---------------------------------
| explained_variance | 0.968    |
| fps                | 2042     |
| nupdates           | 2710     |
| policy_entropy     | 0.964    |
| policy_loss        | -0.101   |
| total_timesteps    | 6569325  |
| value_loss         | 0.44     |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 2042     |
| nupdates           | 2720     |
| policy_entropy     | 0.933    |
| policy_loss        | -0.0226  |
| total_timesteps    | 6593575  |
| value_loss         | 0.365    |
---------------------------------
---------------------------------
| explained_variance | 0.941    |
| fps         

---------------------------------
| explained_variance | 0.979    |
| fps                | 2038     |
| nupdates           | 2970     |
| policy_entropy     | 0.935    |
| policy_loss        | 0.00714  |
| total_timesteps    | 7199825  |
| value_loss         | 0.327    |
---------------------------------
---------------------------------
| explained_variance | 0.975    |
| fps                | 2038     |
| nupdates           | 2980     |
| policy_entropy     | 0.879    |
| policy_loss        | 0.0132   |
| total_timesteps    | 7224075  |
| value_loss         | 0.392    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2038     |
| nupdates           | 2990     |
| policy_entropy     | 0.925    |
| policy_loss        | 0.0602   |
| total_timesteps    | 7248325  |
| value_loss         | 0.351    |
---------------------------------
---------------------------------
| explained_variance | 0.978    |
| fps         

---------------------------------
| explained_variance | 0.979    |
| fps                | 2034     |
| nupdates           | 3240     |
| policy_entropy     | 0.999    |
| policy_loss        | -0.0149  |
| total_timesteps    | 7854575  |
| value_loss         | 0.272    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 2034     |
| nupdates           | 3250     |
| policy_entropy     | 0.893    |
| policy_loss        | 0.0703   |
| total_timesteps    | 7878825  |
| value_loss         | 0.234    |
---------------------------------
---------------------------------
| explained_variance | 0.992    |
| fps                | 2034     |
| nupdates           | 3260     |
| policy_entropy     | 0.856    |
| policy_loss        | 0.0393   |
| total_timesteps    | 7903075  |
| value_loss         | 0.137    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 2029     |
| nupdates           | 3510     |
| policy_entropy     | 0.913    |
| policy_loss        | -0.0587  |
| total_timesteps    | 8509325  |
| value_loss         | 0.356    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps                | 2029     |
| nupdates           | 3520     |
| policy_entropy     | 0.87     |
| policy_loss        | -0.00456 |
| total_timesteps    | 8533575  |
| value_loss         | 0.279    |
---------------------------------
---------------------------------
| explained_variance | 0.98     |
| fps                | 2029     |
| nupdates           | 3530     |
| policy_entropy     | 0.901    |
| policy_loss        | -0.0307  |
| total_timesteps    | 8557825  |
| value_loss         | 0.267    |
---------------------------------
---------------------------------
| explained_variance | 0.962    |
| fps         

---------------------------------
| explained_variance | 0.981    |
| fps                | 2022     |
| nupdates           | 3780     |
| policy_entropy     | 0.872    |
| policy_loss        | -0.0221  |
| total_timesteps    | 9164075  |
| value_loss         | 0.188    |
---------------------------------
---------------------------------
| explained_variance | 0.964    |
| fps                | 2022     |
| nupdates           | 3790     |
| policy_entropy     | 0.939    |
| policy_loss        | -0.035   |
| total_timesteps    | 9188325  |
| value_loss         | 0.381    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 2021     |
| nupdates           | 3800     |
| policy_entropy     | 0.934    |
| policy_loss        | -0.128   |
| total_timesteps    | 9212575  |
| value_loss         | 0.46     |
---------------------------------
---------------------------------
| explained_variance | 0.974    |
| fps         

---------------------------------
| explained_variance | 0.968    |
| fps                | 2012     |
| nupdates           | 4050     |
| policy_entropy     | 0.929    |
| policy_loss        | -0.0704  |
| total_timesteps    | 9818825  |
| value_loss         | 0.345    |
---------------------------------
---------------------------------
| explained_variance | 0.961    |
| fps                | 2012     |
| nupdates           | 4060     |
| policy_entropy     | 0.913    |
| policy_loss        | -0.0446  |
| total_timesteps    | 9843075  |
| value_loss         | 0.578    |
---------------------------------
---------------------------------
| explained_variance | 0.947    |
| fps                | 2011     |
| nupdates           | 4070     |
| policy_entropy     | 0.986    |
| policy_loss        | -0.155   |
| total_timesteps    | 9867325  |
| value_loss         | 0.615    |
---------------------------------
---------------------------------
| explained_variance | 0.979    |
| fps         

HBox(children=(IntProgress(value=0, description='Population', max=10000), HTML(value='')))

train...
phase 1
batch 1 learning rate 0.125 scaled 0.125
training...
---------------------------------
| explained_variance | 0.974    |
| fps                | 1451     |
| nupdates           | 1        |
| policy_entropy     | 0.902    |
| policy_loss        | -0.0117  |
| total_timesteps    | 0        |
| value_loss         | 0.296    |
---------------------------------
---------------------------------
| explained_variance | 0.814    |
| fps                | 2471     |
| nupdates           | 10       |
| policy_entropy     | 0.781    |
| policy_loss        | 2.52     |
| total_timesteps    | 21825    |
| value_loss         | 14.2     |
---------------------------------
---------------------------------
| explained_variance | 0.916    |
| fps                | 2172     |
| nupdates           | 20       |
| policy_entropy     | 0.737    |
| policy_loss        | -0.974   |
| total_timesteps    | 46075    |
| value_loss         | 2.97     |
---------------------------------
------------

---------------------------------
| explained_variance | 0.984    |
| fps                | 1977     |
| nupdates           | 270      |
| policy_entropy     | 0.828    |
| policy_loss        | 0.12     |
| total_timesteps    | 652325   |
| value_loss         | 0.234    |
---------------------------------
---------------------------------
| explained_variance | 0.977    |
| fps                | 1976     |
| nupdates           | 280      |
| policy_entropy     | 0.867    |
| policy_loss        | 0.0701   |
| total_timesteps    | 676575   |
| value_loss         | 0.328    |
---------------------------------
---------------------------------
| explained_variance | 0.938    |
| fps                | 1976     |
| nupdates           | 290      |
| policy_entropy     | 0.869    |
| policy_loss        | -0.145   |
| total_timesteps    | 700825   |
| value_loss         | 0.841    |
---------------------------------
---------------------------------
| explained_variance | 0.972    |
| fps         

---------------------------------
| explained_variance | 0.98     |
| fps                | 1974     |
| nupdates           | 540      |
| policy_entropy     | 0.88     |
| policy_loss        | -0.052   |
| total_timesteps    | 1307075  |
| value_loss         | 0.201    |
---------------------------------
---------------------------------
| explained_variance | 0.893    |
| fps                | 1974     |
| nupdates           | 550      |
| policy_entropy     | 0.865    |
| policy_loss        | -0.127   |
| total_timesteps    | 1331325  |
| value_loss         | 0.631    |
---------------------------------
---------------------------------
| explained_variance | 0.971    |
| fps                | 1974     |
| nupdates           | 560      |
| policy_entropy     | 0.913    |
| policy_loss        | 0.0325   |
| total_timesteps    | 1355575  |
| value_loss         | 0.404    |
---------------------------------
---------------------------------
| explained_variance | 0.989    |
| fps         

---------------------------------
| explained_variance | 0.943    |
| fps                | 1979     |
| nupdates           | 810      |
| policy_entropy     | 0.895    |
| policy_loss        | -0.119   |
| total_timesteps    | 1961825  |
| value_loss         | 0.493    |
---------------------------------
---------------------------------
| explained_variance | 0.957    |
| fps                | 1979     |
| nupdates           | 820      |
| policy_entropy     | 0.866    |
| policy_loss        | -0.0831  |
| total_timesteps    | 1986075  |
| value_loss         | 0.596    |
---------------------------------
---------------------------------
| explained_variance | 0.985    |
| fps                | 1979     |
| nupdates           | 830      |
| policy_entropy     | 0.926    |
| policy_loss        | -0.00811 |
| total_timesteps    | 2010325  |
| value_loss         | 0.2      |
---------------------------------
---------------------------------
| explained_variance | 0.948    |
| fps         

---------------------------------
| explained_variance | 0.982    |
| fps                | 1979     |
| nupdates           | 1080     |
| policy_entropy     | 0.903    |
| policy_loss        | -0.005   |
| total_timesteps    | 2616575  |
| value_loss         | 0.231    |
---------------------------------
---------------------------------
| explained_variance | 0.976    |
| fps                | 1979     |
| nupdates           | 1090     |
| policy_entropy     | 0.864    |
| policy_loss        | -0.0488  |
| total_timesteps    | 2640825  |
| value_loss         | 0.344    |
---------------------------------
---------------------------------
| explained_variance | 0.994    |
| fps                | 1979     |
| nupdates           | 1100     |
| policy_entropy     | 0.892    |
| policy_loss        | 0.0556   |
| total_timesteps    | 2665075  |
| value_loss         | 0.0928   |
---------------------------------
---------------------------------
| explained_variance | 0.963    |
| fps         

---------------------------------
| explained_variance | 0.99     |
| fps                | 1981     |
| nupdates           | 1350     |
| policy_entropy     | 0.822    |
| policy_loss        | 0.0832   |
| total_timesteps    | 3271325  |
| value_loss         | 0.123    |
---------------------------------
---------------------------------
| explained_variance | 0.981    |
| fps                | 1981     |
| nupdates           | 1360     |
| policy_entropy     | 0.839    |
| policy_loss        | -0.0309  |
| total_timesteps    | 3295575  |
| value_loss         | 0.301    |
---------------------------------
---------------------------------
| explained_variance | 0.965    |
| fps                | 1981     |
| nupdates           | 1370     |
| policy_entropy     | 0.885    |
| policy_loss        | -0.0287  |
| total_timesteps    | 3319825  |
| value_loss         | 0.336    |
---------------------------------
---------------------------------
| explained_variance | 0.991    |
| fps         

KeyboardInterrupt: 

In [None]:
cc2=Lifecycle(env='unemployment-v1',minimal=False,mortality=mortality,perustulo=False,
              randomness=randomness,pinkslip=pinkslip,plotdebug=False)
cc2.render_distrib(load='results/distrib_base_v',n=30,stats_results='results/distrib_stats')


# Työttömyysputken poisto

Työttömyysputkelle meneminen on usein hyvin suosittua elinkaarimalleissa. Tarkastellaan millainen työllisyysvaikutus on putken poistamisella.

In [None]:
cc1_putki=Lifecycle(env='unemployment-v1',minimal=False,include_putki=False,mortality=mortality,
                    perustulo=False,randomness=randomness)
cc1_putki.run_distrib(n=5,debug=False,steps1=size1,steps2=size2,pop=pop_size,deterministic=deterministic,
                train=True,predict=True,batch1=batch1,batch2=batch2,
                save=perusmalli,plot=True,cont=True,start_from=perusmalli,results='results/distrib_poisto',
                callback_minsteps=callback_minsteps,rlmodel=rlmodel,twostage=twostage)


In [None]:
cc1_putki.render_distrib(load='results/putki_results')
cc1_putki.compare_simstats('results/putki_results','results/putki_results')                    