In [1]:
import numpy as np
import math
import gym
from gym import spaces
import time
import random
from stable_baselines3 import A2C
import torch
import matplotlib.pyplot as plt
import heapq
from stable_baselines3.common.env_checker import check_env
from mpl_toolkits.mplot3d import Axes3D

In [2]:
torch.__version__

'1.13.1'

In [3]:
gym.__version__

'0.21.0'

In [4]:
%matplotlib inline

## Finite Element Model of the Space Frame Element

In [5]:
def PlaneTrussElementLength(x1,y1,z1,x2,y2,z2):
    return math.sqrt((x2-x1)*(x2-x1) + (y2-y1)*(y2-y1)+(z2-z1)*(z2-z1))

In [6]:
def SpaceFrameElementStiffness(E,G,A,Iy,Iz,J,x1,y1,z1,x2,y2,z2):
    L = PlaneTrussElementLength(x1,y1,z1,x2,y2,z2)
    w1 = E*A/L
    w2 = 12*E*Iz/(L*L*L)
    w3 = 6*E*Iz/(L*L)
    w4 = 4*E*Iz/L
    w5 = 2*E*Iz/L
    w6 = 12*E*Iy/(L*L*L)
    w7 = 6*E*Iy/(L*L)
    w8 = 4*E*Iy/L
    w9 = 2*E*Iy/L
    w10 = G*J/L
    
    kprime = np.array([[w1, 0, 0, 0, 0, 0, -w1, 0, 0, 0, 0, 0],
                        [0, w2, 0, 0, 0, w3, 0, -w2, 0, 0, 0, w3], 
                        [0, 0, w6, 0, -w7, 0, 0, 0, -w6, 0, -w7, 0],
                        [0, 0, 0, w10, 0, 0, 0, 0, 0, -w10, 0, 0],
                        [0, 0, -w7, 0, w8, 0, 0, 0, w7, 0, w9, 0],
                        [0, w3, 0, 0, 0, w4, 0, -w3, 0, 0, 0, w5],
                        [-w1, 0, 0, 0, 0, 0, w1, 0, 0, 0, 0, 0],
                        [0, -w2, 0, 0, 0, -w3, 0, w2, 0, 0, 0, -w3],
                        [0, 0, -w6, 0, w7, 0, 0, 0, w6, 0, w7, 0],
                        [0, 0, 0, -w10, 0, 0, 0, 0, 0, w10, 0, 0],
                        [0, 0, -w7, 0, w9, 0, 0, 0, w7, 0, w8, 0],
                        [0, w3, 0, 0, 0, w5, 0, -w3, 0, 0, 0, w4]])  
    
    
    if x1 == x2 and y1 == y2:
        if z2 > z1:
            Lambda = np.array([[0, 0, 1], [0, 1, 0], [-1, 0, 0]])
        else:
            Lambda = np.array([[0, 0, -1], [0, 1, 0], [1, 0, 0]])
    else:
        CXx = (x2-x1)/L
        CYx = (y2-y1)/L
        CZx = (z2-z1)/L
        D = math.sqrt(CXx*CXx + CYx*CYx)
        CXy = -CYx/D
        CYy = CXx/D
        CZy = 0
        CXz = -CXx*CZx/D
        CYz = -CYx*CZx/D
        CZz = D
        Lambda = np.array([[CXx, CYx, CZx], [CXy, CYy, CZy], [CXz, CYz, CZz]])
        
        
    R = np.array([np.concatenate((np.concatenate((Lambda,np.zeros((3,3)),np.zeros((3,3)),np.zeros((3,3))),axis=1),
        np.concatenate((np.zeros((3,3)), Lambda, np.zeros((3,3)), np.zeros((3,3))),axis=1) ,
        np.concatenate((np.zeros((3,3)), np.zeros((3,3)), Lambda, np.zeros((3,3))),axis=1), 
        np.concatenate((np.zeros((3,3)), np.zeros((3,3)), np.zeros((3,3)), Lambda),axis=1)))])[0]
    return np.dot(np.dot(R.T,kprime),R)           

In [7]:
def SpaceFrameAssemble(K,k,i,j):
    K[6*i,6*i] = K[6*i,6*i] + k[0,0]
    K[6*i,6*i+1] = K[6*i,6*i+1] + k[0,1]
    K[6*i,6*i+2] = K[6*i,6*i+2] + k[0,2]
    K[6*i,6*i+3] = K[6*i,6*i+3] + k[0,3]
    K[6*i,6*i+4] = K[6*i,6*i+4] + k[0,4]
    K[6*i,6*i+5] = K[6*i,6*i+5] + k[0,5]
    K[6*i,6*j] = K[6*i,6*j] + k[0,6]
    K[6*i,6*j+1] = K[6*i,6*j+1] + k[0,7]
    K[6*i,6*j+2] = K[6*i,6*j+2] + k[0,8]
    K[6*i,6*j+3] = K[6*i,6*j+3] + k[0,9]
    K[6*i,6*j+4] = K[6*i,6*j+4] + k[0,10]
    K[6*i,6*j+5] = K[6*i,6*j+5] + k[0,11]
    K[6*i+1,6*i] = K[6*i+1,6*i] + k[1,0]
    K[6*i+1,6*i+1] = K[6*i+1,6*i+1] + k[1,1]
    K[6*i+1,6*i+2] = K[6*i+1,6*i+2] + k[1,2]
    K[6*i+1,6*i+3] = K[6*i+1,6*i+3] + k[1,3]
    K[6*i+1,6*i+4] = K[6*i+1,6*i+4] + k[1,4]
    K[6*i+1,6*i+5] = K[6*i+1,6*i+5] + k[1,5]
    K[6*i+1,6*j] = K[6*i+1,6*j] + k[1,6]
    K[6*i+1,6*j+1] = K[6*i+1,6*j+1] + k[1,7]
    K[6*i+1,6*j+2] = K[6*i+1,6*j+2] + k[1,8]
    K[6*i+1,6*j+3] = K[6*i+1,6*j+3] + k[1,9]
    K[6*i+1,6*j+4] = K[6*i+1,6*j+4] + k[1,10]
    K[6*i+1,6*j+5] = K[6*i+1,6*j+5] + k[1,11]
    K[6*i+2,6*i]   = K[6*i+2,6*i] + k[2,0]
    K[6*i+2,6*i+1] = K[6*i+2,6*i+1] + k[2,1]
    K[6*i+2,6*i+2] = K[6*i+2,6*i+2] + k[2,2]
    K[6*i+2,6*i+3] = K[6*i+2,6*i+3] + k[2,3]
    K[6*i+2,6*i+4] = K[6*i+2,6*i+4] + k[2,4]
    K[6*i+2,6*i+5] = K[6*i+2,6*i+5] + k[2,5]
    K[6*i+2,6*j]   = K[6*i+2,6*j] + k[2,6]
    K[6*i+2,6*j+1] = K[6*i+2,6*j+1] + k[2,7]
    K[6*i+2,6*j+2] = K[6*i+2,6*j+2] + k[2,8]
    K[6*i+2,6*j+3] = K[6*i+2,6*j+3] + k[2,9]
    K[6*i+2,6*j+4] = K[6*i+2,6*j+4] + k[2,10]
    K[6*i+2,6*j+5] = K[6*i+2,6*j+5] + k[2,11]
    K[6*i+3,6*i] = K[6*i+3,6*i] + k[3,0]
    K[6*i+3,6*i+1] = K[6*i+3,6*i+1] + k[3,1]
    K[6*i+3,6*i+2] = K[6*i+3,6*i+2] + k[3,2]
    K[6*i+3,6*i+3] = K[6*i+3,6*i+3] + k[3,3]
    K[6*i+3,6*i+4] = K[6*i+3,6*i+4] + k[3,4]
    K[6*i+3,6*i+5] = K[6*i+3,6*i+5] + k[3,5]
    K[6*i+3,6*j] = K[6*i+3,6*j] + k[3,6]
    K[6*i+3,6*j+1] = K[6*i+3,6*j+1] + k[3,7]
    K[6*i+3,6*j+2] = K[6*i+3,6*j+2] + k[3,8]    
    K[6*i+3,6*j+3] = K[6*i+3,6*j+3] + k[3,9]
    K[6*i+3,6*j+4] = K[6*i+3,6*j+4] + k[3,10]
    K[6*i+3,6*j+5] = K[6*i+3,6*j+5] + k[3,11]
    K[6*i+4,6*i] = K[6*i+4,6*i] + k[4,0]
    K[6*i+4,6*i+1] = K[6*i+4,6*i+1] + k[4,1]
    K[6*i+4,6*i+2] = K[6*i+4,6*i+2] + k[4,2]
    K[6*i+4,6*i+3] = K[6*i+4,6*i+3] + k[4,3]
    K[6*i+4,6*i+4] = K[6*i+4,6*i+4] + k[4,4]
    K[6*i+4,6*i+5] = K[6*i+4,6*i+5] + k[4,5]
    K[6*i+4,6*j] = K[6*i+4,6*j] + k[4,6]
    K[6*i+4,6*j+1] = K[6*i+4,6*j+1] + k[4,7]
    K[6*i+4,6*j+2] = K[6*i+4,6*j+2] + k[4,8]
    K[6*i+4,6*j+3] = K[6*i+4,6*j+3] + k[4,9]
    K[6*i+4,6*j+4] = K[6*i+4,6*j+4] + k[4,10]
    K[6*i+4,6*j+5] = K[6*i+4,6*j+5] + k[4,11]
    K[6*i+5,6*i] = K[6*i+5,6*i] + k[5,0]
    K[6*i+5,6*i+1] = K[6*i+5,6*i+1] + k[5,1]
    K[6*i+5,6*i+2] = K[6*i+5,6*i+2] + k[5,2]
    K[6*i+5,6*i+3] = K[6*i+5,6*i+3] + k[5,3]
    K[6*i+5,6*i+4] = K[6*i+5,6*i+4] + k[5,4]
    K[6*i+5,6*i+5] = K[6*i+5,6*i+5] + k[5,5]
    K[6*i+5,6*j] = K[6*i+5,6*j] + k[5,6]
    K[6*i+5,6*j+1] = K[6*i+5,6*j+1] + k[5,7]
    K[6*i+5,6*j+2] = K[6*i+5,6*j+2] + k[5,8]
    K[6*i+5,6*j+3] = K[6*i+5,6*j+3] + k[5,9]
    K[6*i+5,6*j+4] = K[6*i+5,6*j+4] + k[5,10]
    K[6*i+5,6*j+5] = K[6*i+5,6*j+5] + k[5,11]
    K[6*j,6*i] = K[6*j,6*i] + k[6,0]
    K[6*j,6*i+1] = K[6*j,6*i+1] + k[6,1]
    K[6*j,6*i+2] = K[6*j,6*i+2] + k[6,2]
    K[6*j,6*i+3] = K[6*j,6*i+3] + k[6,3]
    K[6*j,6*i+4] = K[6*j,6*i+4] + k[6,4]
    K[6*j,6*i+5] = K[6*j,6*i+5] + k[6,5]
    K[6*j,6*j] = K[6*j,6*j] + k[6,6]
    K[6*j,6*j+1] = K[6*j,6*j+1] + k[6,7]
    K[6*j,6*j+2] = K[6*j,6*j+2] + k[6,8]
    K[6*j,6*j+3] = K[6*j,6*j+3] + k[6,9]
    K[6*j,6*j+4] = K[6*j,6*j+4] + k[6,10]
    K[6*j,6*j+5] = K[6*j,6*j+5] + k[6,11]
    K[6*j+1,6*i] = K[6*j+1,6*i] + k[7,0]
    K[6*j+1,6*i+1] = K[6*j+1,6*i+1] + k[7,1]
    K[6*j+1,6*i+2] = K[6*j+1,6*i+2] + k[7,2]
    K[6*j+1,6*i+3] = K[6*j+1,6*i+3] + k[7,3]
    K[6*j+1,6*i+4] = K[6*j+1,6*i+4] + k[7,4]
    K[6*j+1,6*i+5] = K[6*j+1,6*i+5] + k[7,5]
    K[6*j+1,6*j] = K[6*j+1,6*j] + k[7,6]
    K[6*j+1,6*j+1] = K[6*j+1,6*j+1] + k[7,7]
    K[6*j+1,6*j+2] = K[6*j+1,6*j+2] + k[7,8]
    K[6*j+1,6*j+3] = K[6*j+1,6*j+3] + k[7,9]
    K[6*j+1,6*j+4] = K[6*j+1,6*j+4] + k[7,10]
    K[6*j+1,6*j+5] = K[6*j+1,6*j+5] + k[7,11]
    K[6*j+2,6*i] = K[6*j+2,6*i] + k[8,0]
    K[6*j+2,6*i+1] = K[6*j+2,6*i+1] + k[8,1]
    K[6*j+2,6*i+2] = K[6*j+2,6*i+2] + k[8,2]
    K[6*j+2,6*i+3] = K[6*j+2,6*i+3] + k[8,3]
    K[6*j+2,6*i+4] = K[6*j+2,6*i+4] + k[8,4]
    K[6*j+2,6*i+5] = K[6*j+2,6*i+5] + k[8,5]
    K[6*j+2,6*j] = K[6*j+2,6*j] + k[8,6]
    K[6*j+2,6*j+1] = K[6*j+2,6*j+1] + k[8,7]
    K[6*j+2,6*j+2] = K[6*j+2,6*j+2] + k[8,8]
    K[6*j+2,6*j+3] = K[6*j+2,6*j+3] + k[8,9]
    K[6*j+2,6*j+4] = K[6*j+2,6*j+4] + k[8,10]
    K[6*j+2,6*j+5] = K[6*j+2,6*j+5] + k[8,11]
    K[6*j+3,6*i] = K[6*j+3,6*i] + k[9,0]
    K[6*j+3,6*i+1] = K[6*j+3,6*i+1] + k[9,1]
    K[6*j+3,6*i+2] = K[6*j+3,6*i+2] + k[9,2]
    K[6*j+3,6*i+3] = K[6*j+3,6*i+3] + k[9,3]
    K[6*j+3,6*i+4] = K[6*j+3,6*i+4] + k[9,4]
    K[6*j+3,6*i+5] = K[6*j+3,6*i+5] + k[9,5]
    K[6*j+3,6*j] = K[6*j+3,6*j] + k[9,6]
    K[6*j+3,6*j+1] = K[6*j+3,6*j+1] + k[9,7]
    K[6*j+3,6*j+2] = K[6*j+3,6*j+2] + k[9,8]
    K[6*j+3,6*j+3] = K[6*j+3,6*j+3] + k[9,9]
    K[6*j+3,6*j+4] = K[6*j+3,6*j+4] + k[9,10]
    K[6*j+3,6*j+5] = K[6*j+3,6*j+5] + k[9,11]
    K[6*j+4,6*i] = K[6*j+4,6*i] + k[10,0]
    K[6*j+4,6*i+1] = K[6*j+4,6*i+1] + k[10,1]
    K[6*j+4,6*i+2] = K[6*j+4,6*i+2] + k[10,2]
    K[6*j+4,6*i+3] = K[6*j+4,6*i+3] + k[10,3]
    K[6*j+4,6*i+4] = K[6*j+4,6*i+4] + k[10,4]
    K[6*j+4,6*i+5] = K[6*j+4,6*i+5] + k[10,5]
    K[6*j+4,6*j] = K[6*j+4,6*j] + k[10,6]
    K[6*j+4,6*j+1] = K[6*j+4,6*j+1] + k[10,7]
    K[6*j+4,6*j+2] = K[6*j+4,6*j+2] + k[10,8]
    K[6*j+4,6*j+3] = K[6*j+4,6*j+3] + k[10,9]
    K[6*j+4,6*j+4] = K[6*j+4,6*j+4] + k[10,10]
    K[6*j+4,6*j+5] = K[6*j+4,6*j+5] + k[10,11]
    K[6*j+5,6*i] = K[6*j+5,6*i] + k[11,0]
    K[6*j+5,6*i+1] = K[6*j+5,6*i+1] + k[11,1]
    K[6*j+5,6*i+2] = K[6*j+5,6*i+2] + k[11,2]
    K[6*j+5,6*i+3] = K[6*j+5,6*i+3] + k[11,3]
    K[6*j+5,6*i+4] = K[6*j+5,6*i+4] + k[11,4]
    K[6*j+5,6*i+5] = K[6*j+5,6*i+5] + k[11,5]
    K[6*j+5,6*j] = K[6*j+5,6*j] + k[11,6]
    K[6*j+5,6*j+1] = K[6*j+5,6*j+1] + k[11,7]
    K[6*j+5,6*j+2] = K[6*j+5,6*j+2] + k[11,8]
    K[6*j+5,6*j+3] = K[6*j+5,6*j+3] + k[11,9]
    K[6*j+5,6*j+4] = K[6*j+5,6*j+4] + k[11,10]
    K[6*j+5,6*j+5] = K[6*j+5,6*j+5] + k[11,11]
    
    return K

In [8]:
def FEA_u(coord, elcon, bc_node, bc_val, global_force, 
          E=210e6, G=84e6, A=2e-2, Iy=10e-5, Iz=20e-5, J=5e-5):
    coord=np.array(coord)
    elcon=np.array(elcon)
    
    K=np.zeros(shape=(6*(np.max(elcon)+1),6*(np.max(elcon)+1)))
    for el in elcon:
        k=SpaceFrameElementStiffness(E,G,A,Iy,Iz,J,
                                     coord[el[0]][0],coord[el[0]][1],coord[el[0]][2],\
                                     coord[el[1]][0],coord[el[1]][1],coord[el[1]][2])
        K=SpaceFrameAssemble(K,k,el[0],el[1])
        
    F = np.array(global_force)
    
    
    # https://github.com/CALFEM/calfem-matlab/blob/master/fem/solveq.m
    
    bc=np.array([bc_node, 
                bc_val]).T
    nd, nd=K.shape
    fdof=np.array([i for i in range(nd)]).T
    d=np.zeros(shape=(len(fdof),))
    Q=np.zeros(shape=(len(fdof),))

    pdof=bc[:,0].astype(int)
    dp=bc[:,1]
    fdof=np.delete(fdof, pdof, 0)
    s=np.linalg.lstsq(K[fdof,:][:,fdof], (F[fdof].T-np.dot(K[fdof,:][:,pdof],dp.T)).T, rcond=None)[0] 
    d[pdof]=dp
    d[fdof]=s.reshape(-1,)
    
#     Q=np.dot(K,d).T-F 
    return d

In [9]:
# 10 The Space Frame Element - verification
d=FEA_u(np.array([0,0,0,
                  3,0,0,
                  0,0,-3,
                  0,-4,0]).reshape(4,3),
        elcon=np.array([[0, 1],
                      [0, 2],
                      [0, 3]]),
        bc_node=list(range(6,24)), 
        bc_val=[0]*18,
        global_force=[-10,0,20,0,0,0,
                                0,0,0,0,0,0,
                                0,0,0,0,0,0,
                                0,0,0,0,0,0])

In [10]:
d

array([-7.05147750e-06, -6.65367100e-08,  1.41769582e-05,  1.44778793e-06,
        1.74858422e-06,  1.13605431e-06,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00])

## Utils

In [11]:
def total_length(coord,elcon):
    coord=np.array(coord)
    elcon=np.array(elcon)
    t_length=0
    for i in range(len(elcon)):
        l=PlaneTrussElementLength(coord[elcon[i][0]][0],\
                                    coord[elcon[i][0]][1],\
                                    coord[elcon[i][0]][2],\
                                    coord[elcon[i][1]][0],\
                                    coord[elcon[i][1]][1],\
                                    coord[elcon[i][1]][2])
        t_length+=l        
    return t_length    

In [12]:
def possible_lines_dic(n,m,dx,dy):
    A=[(-dx,0),(-dx,dy),(0,dy),(dx,dy),(dx,0),(dx,-dy),(0,-dy),(-dx,-dy)]
    dic={}
    t=0
    for i in range(n):
        for j in range(m):
            for item in A:
                x,y=j*dx,i*dy
                x1,y1=x+item[0],y+item[1]
                if (x1>=0 and x1<=(m-1)*dx and 
                    y1>=0 and y1<=(n-1)*dy and 
                    (x1,y1,x,y) not in dic):
                    dic[(x,y,x1,y1)]=t
                    t+=1
    return dic                

In [13]:
DIM = len(possible_lines_dic(n=5,m=5,dx=1,dy=1)) + 3 # +2 for x and y +1 for action

## Model

In [14]:
class Model:
    def __init__(self,n=5,m=5,dx=1,dy=1, force=-500,
                 E=210e6, G=84e6, A=2e-2, Iy=10e-5, Iz=20e-5, J=5e-5, break_flag=False):
        # n,m,dx,dy - grid parameters    
        self.E=E
        self.G=G
        self.A=A
        self.Iy=Iy
        self.Iz=Iz
        self.J=J
        self.n=n
        self.m=m
        self.dx=dx
        self.dy=dy
        self.dic_lines=possible_lines_dic(self.n,self.m,self.dx,self.dy)
        self.line_list=len(self.dic_lines)*[0]
        self.break_flag=break_flag
        self.coord=[[2,2,0]]    
        self.elcon=[]  
        self.el_dic={(2,2):0}
        self.max_el=0
        self.force=force
        self.bc_node = []
        self.bc_val = []
        self.global_force = [0, self.force, 0, 0, 0, 0]
        self.old_weight=float("inf")
        self.old_strength=-float("inf")
        self.visit_list = [0,0,0,0] # number of checkpoints is 4
    
    def reset(self,break_flag,force):
        self.dic_lines=possible_lines_dic(self.n, self.m, self.dx, self.dy)
        self.line_list=len(self.dic_lines)*[0]
        self.break_flag=break_flag
        self.coord=[[2,2,0]]    
        self.elcon=[]  
        self.el_dic={(2,2):0}
        self.max_el=0
        self.force=force
        self.bc_node = []
        self.bc_val = []
        self.global_force = [0, self.force, 0, 0, 0, 0]
        self.visit_list = [0,0,0,0] # number of checkpoints is 4
    
    def FEA(self):
        return FEA_u(self.coord, 
                     self.elcon, 
                     self.bc_node, 
                     self.bc_val, 
                     self.global_force, )
        
    def max_u(self, FEA_output_arr):
        t=1
        A=[]
        while t<len(FEA_output_arr):
            A.append(FEA_output_arr[t])
            t+=6            
        return min(A)    
            
    
    def length(self):
        return total_length(self.coord,self.elcon)
    
    
    def move_w(self,x,y):
        # x,y - current location
        x_new=x-self.dx
        y_new=y
        if x_new<0 or x_new>(self.m-1)*self.dx or y_new<0 or y_new>(self.n-1)*self.dy \
        or 3 in self.line_list:
            self.break_flag=True
        else:
            try:
                self.line_list[self.dic_lines[(x,y,x_new,y_new)]]+=1
            except KeyError:
                self.line_list[self.dic_lines[(x_new,y_new, x,y)]]+=1
            
            if (x_new,y_new) not in self.el_dic:
                self.max_el+=1
                self.el_dic[(x_new,y_new)]=self.max_el
                self.coord.append([x_new,y_new,0])
                if (x_new,y_new)!=(1,1) and (x_new,y_new)!=(self.m-2,1) and \
                    (x_new,y_new)!=(self.m-2,self.n-2) and \
                    (x_new,y_new)!=(1,self.n-2):
                    self.global_force += [0,self.force,0,0,0,0]
                elif (x_new,y_new)==(1,1) or (x_new,y_new)==(self.m-2,1) or \
                    (x_new,y_new)==(self.m-2,self.n-2) or \
                    (x_new,y_new)==(1,self.n-2):
                    range_ = list(range(self.el_dic[(x_new,y_new)]*6-6,self.el_dic[(x_new,y_new)]*6))
                    self.bc_node += range_
                    self.bc_val += [0]*len(range_)
                    self.global_force += [0,0,0,0,0,0]
                        
            if  (self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]) not in self.elcon and \
                 (self.el_dic[(x_new,y_new)],self.el_dic[(x,y)]) not in self.elcon:
                self.elcon.append([self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]])
            
            if (x_new,y_new) in self.el_dic:
                if (x_new,y_new)==(1,1):
                    self.visit_list[0]+=1
                elif (x_new,y_new)==(self.m-2,1):
                    self.visit_list[1]+=1
                elif (x_new,y_new)==(self.m-2,self.n-2):
                    self.visit_list[2]+=1
                elif (x_new,y_new)==(1,self.n-2):
                    self.visit_list[3]+=1
            
        return x_new, y_new
            
    def move_nw(self,x,y):
        # x,y - current location
        x_new=x-self.dx
        y_new=y+self.dy
        if x_new<0 or x_new>(self.m-1)*self.dx or y_new<0 or y_new>(self.n-1)*self.dy \
        or 3 in self.line_list:
            self.break_flag=True
        else:
            try:
                self.line_list[self.dic_lines[(x,y,x_new,y_new)]]+=1
            except KeyError:
                self.line_list[self.dic_lines[(x_new,y_new, x,y)]]+=1
            
            if (x_new,y_new) not in self.el_dic:
                self.max_el+=1
                self.el_dic[(x_new,y_new)]=self.max_el
                self.coord.append([x_new,y_new,0])
                if (x_new,y_new)!=(1,1) and (x_new,y_new)!=(self.m-2,1) and \
                    (x_new,y_new)!=(self.m-2,self.n-2) and \
                    (x_new,y_new)!=(1,self.n-2):
                    self.global_force += [0,self.force,0,0,0,0]
                elif (x_new,y_new)==(1,1) or (x_new,y_new)==(self.m-2,1) or \
                    (x_new,y_new)==(self.m-2,self.n-2) or \
                    (x_new,y_new)==(1,self.n-2):
                    range_ = list(range(self.el_dic[(x_new,y_new)]*6-6,self.el_dic[(x_new,y_new)]*6))
                    self.bc_node += range_
                    self.bc_val += [0]*len(range_)
                    self.global_force += [0,0,0,0,0,0]
                        
            if  (self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]) not in self.elcon and \
                 (self.el_dic[(x_new,y_new)],self.el_dic[(x,y)]) not in self.elcon:
                self.elcon.append([self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]]) 
                  
            if (x_new,y_new) in self.el_dic:
                if (x_new,y_new)==(1,1):
                    self.visit_list[0]+=1
                elif (x_new,y_new)==(self.m-2,1):
                    self.visit_list[1]+=1
                elif (x_new,y_new)==(self.m-2,self.n-2):
                    self.visit_list[2]+=1
                elif (x_new,y_new)==(1,self.n-2):
                    self.visit_list[3]+=1    
                
        return x_new, y_new
    
    def move_n(self,x,y):
        # x,y - current location
        x_new=x
        y_new=y+self.dy
        if x_new<0 or x_new>(self.m-1)*self.dx or y_new<0 or y_new>(self.n-1)*self.dy \
        or 3 in self.line_list:
            self.break_flag=True
        else:
            try:
                self.line_list[self.dic_lines[(x,y,x_new,y_new)]]+=1
            except KeyError:
                self.line_list[self.dic_lines[(x_new,y_new, x,y)]]+=1
            
            if (x_new,y_new) not in self.el_dic:
                self.max_el+=1
                self.el_dic[(x_new,y_new)]=self.max_el
                self.coord.append([x_new,y_new,0])
                
                if (x_new,y_new)!=(1,1) and (x_new,y_new)!=(self.m-2,1) and \
                    (x_new,y_new)!=(self.m-2,self.n-2) and \
                    (x_new,y_new)!=(1,self.n-2):
                    self.global_force += [0,self.force,0,0,0,0]
                elif (x_new,y_new)==(1,1) or (x_new,y_new)==(self.m-2,1) or \
                    (x_new,y_new)==(self.m-2,self.n-2) or \
                    (x_new,y_new)==(1,self.n-2):
                    range_ = list(range(self.el_dic[(x_new,y_new)]*6-6,self.el_dic[(x_new,y_new)]*6))
                    self.bc_node += range_
                    self.bc_val += [0]*len(range_)
                    self.global_force += [0,0,0,0,0,0]
                        
            if  (self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]) not in self.elcon and \
                 (self.el_dic[(x_new,y_new)],self.el_dic[(x,y)]) not in self.elcon:
                self.elcon.append([self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]])
                  
                  
            if (x_new,y_new) in self.el_dic:
                if (x_new,y_new)==(1,1):
                    self.visit_list[0]+=1
                elif (x_new,y_new)==(self.m-2,1):
                    self.visit_list[1]+=1
                elif (x_new,y_new)==(self.m-2,self.n-2):
                    self.visit_list[2]+=1
                elif (x_new,y_new)==(1,self.n-2):
                    self.visit_list[3]+=1     
        
        return x_new, y_new
    
    
    def move_ne(self,x,y):
        # x,y - current location
        x_new=x+self.dx
        y_new=y+self.dy
        if x_new<0 or x_new>(self.m-1)*self.dx or y_new<0 or y_new>(self.n-1)*self.dy \
        or 3 in self.line_list:
            self.break_flag=True
        else:
            try:
                self.line_list[self.dic_lines[(x,y,x_new,y_new)]]+=1
            except KeyError:
                self.line_list[self.dic_lines[(x_new,y_new, x,y)]]+=1
                        
            if (x_new,y_new) not in self.el_dic:
                self.max_el+=1
                self.el_dic[(x_new,y_new)]=self.max_el
                self.coord.append([x_new,y_new,0])
                if (x_new,y_new)!=(1,1) and (x_new,y_new)!=(self.m-2,1) and \
                    (x_new,y_new)!=(self.m-2,self.n-2) and \
                    (x_new,y_new)!=(1,self.n-2):
                    self.global_force += [0,self.force,0,0,0,0]
                elif (x_new,y_new)==(1,1) or (x_new,y_new)==(self.m-2,1) or \
                    (x_new,y_new)==(self.m-2,self.n-2) or \
                    (x_new,y_new)==(1,self.n-2):
                    range_ = list(range(self.el_dic[(x_new,y_new)]*6-6,self.el_dic[(x_new,y_new)]*6))
                    self.bc_node += range_
                    self.bc_val += [0]*len(range_)
                    self.global_force += [0,0,0,0,0,0]
                        
            if  (self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]) not in self.elcon and \
                 (self.el_dic[(x_new,y_new)],self.el_dic[(x,y)]) not in self.elcon:
                self.elcon.append([self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]])
                  
            if (x_new,y_new) in self.el_dic:
                if (x_new,y_new)==(1,1):
                    self.visit_list[0]+=1
                elif (x_new,y_new)==(self.m-2,1):
                    self.visit_list[1]+=1
                elif (x_new,y_new)==(self.m-2,self.n-2):
                    self.visit_list[2]+=1
                elif (x_new,y_new)==(1,self.n-2):
                    self.visit_list[3]+=1     
                  
        return x_new, y_new
    
    def move_e(self,x,y):
        # x,y - current location
        x_new=x+self.dx
        y_new=y
        if x_new<0 or x_new>(self.m-1)*self.dx or y_new<0 or y_new>(self.n-1)*self.dy \
        or 3 in self.line_list:
            self.break_flag=True
        else:
            try:
                self.line_list[self.dic_lines[(x,y,x_new,y_new)]]+=1
            except KeyError:
                self.line_list[self.dic_lines[(x_new,y_new, x,y)]]+=1
                       
                  
            if (x_new,y_new) not in self.el_dic:
                self.max_el+=1
                self.el_dic[(x_new,y_new)]=self.max_el
                self.coord.append([x_new,y_new,0])
                if (x_new,y_new)!=(1,1) and (x_new,y_new)!=(self.m-2,1) and \
                    (x_new,y_new)!=(self.m-2,self.n-2) and \
                    (x_new,y_new)!=(1,self.n-2):
                    self.global_force += [0,self.force,0,0,0,0]
                elif (x_new,y_new)==(1,1) or (x_new,y_new)==(self.m-2,1) or \
                    (x_new,y_new)==(self.m-2,self.n-2) or \
                    (x_new,y_new)==(1,self.n-2):
                    range_ = list(range(self.el_dic[(x_new,y_new)]*6-6,self.el_dic[(x_new,y_new)]*6))
                    self.bc_node += range_
                    self.bc_val += [0]*len(range_)
                    self.global_force += [0,0,0,0,0,0]
                        
            if  (self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]) not in self.elcon and \
                 (self.el_dic[(x_new,y_new)],self.el_dic[(x,y)]) not in self.elcon:
                self.elcon.append([self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]])   
                  
            if (x_new,y_new) in self.el_dic:
                if (x_new,y_new)==(1,1):
                    self.visit_list[0]+=1
                elif (x_new,y_new)==(self.m-2,1):
                    self.visit_list[1]+=1
                elif (x_new,y_new)==(self.m-2,self.n-2):
                    self.visit_list[2]+=1
                elif (x_new,y_new)==(1,self.n-2):
                    self.visit_list[3]+=1      
        return x_new, y_new
    
    def move_se(self,x,y):
        # x,y - current location
        x_new=x+self.dx
        y_new=y-self.dy
        if x_new<0 or x_new>(self.m-1)*self.dx or y_new<0 or y_new>(self.n-1)*self.dy \
        or 3 in self.line_list:
            self.break_flag=True
        else:
            try:
                self.line_list[self.dic_lines[(x,y,x_new,y_new)]]+=1
            except KeyError:
                self.line_list[self.dic_lines[(x_new,y_new, x,y)]]+=1
            
            if (x_new,y_new) not in self.el_dic:
                self.max_el+=1
                self.el_dic[(x_new,y_new)]=self.max_el
                self.coord.append([x_new,y_new,0])
                if (x_new,y_new)!=(1,1) and (x_new,y_new)!=(self.m-2,1) and \
                    (x_new,y_new)!=(self.m-2,self.n-2) and \
                    (x_new,y_new)!=(1,self.n-2):
                    self.global_force += [0,self.force,0,0,0,0]
                elif (x_new,y_new)==(1,1) or (x_new,y_new)==(self.m-2,1) or \
                    (x_new,y_new)==(self.m-2,self.n-2) or \
                    (x_new,y_new)==(1,self.n-2):
                    range_ = list(range(self.el_dic[(x_new,y_new)]*6-6,self.el_dic[(x_new,y_new)]*6))
                    self.bc_node += range_
                    self.bc_val += [0]*len(range_)
                    self.global_force += [0,0,0,0,0,0]
                        
            if  (self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]) not in self.elcon and \
                 (self.el_dic[(x_new,y_new)],self.el_dic[(x,y)]) not in self.elcon:
                self.elcon.append([self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]])
                  
            if (x_new,y_new) in self.el_dic:
                if (x_new,y_new)==(1,1):
                    self.visit_list[0]+=1
                elif (x_new,y_new)==(self.m-2,1):
                    self.visit_list[1]+=1
                elif (x_new,y_new)==(self.m-2,self.n-2):
                    self.visit_list[2]+=1
                elif (x_new,y_new)==(1,self.n-2):
                    self.visit_list[3]+=1      
        
        return x_new, y_new
    
    def move_s(self,x,y):
        # x,y - current location
        x_new=x
        y_new=y-self.dy
        if x_new<0 or x_new>(self.m-1)*self.dx or y_new<0 or y_new>(self.n-1)*self.dy \
        or 3 in self.line_list:
            self.break_flag=True 
        else:
            try:
                self.line_list[self.dic_lines[(x,y,x_new,y_new)]]+=1
            except KeyError:
                self.line_list[self.dic_lines[(x_new,y_new, x,y)]]+=1
            
            if (x_new,y_new) not in self.el_dic:
                self.max_el+=1
                self.el_dic[(x_new,y_new)]=self.max_el
                self.coord.append([x_new,y_new,0])
                if (x_new,y_new)!=(1,1) and (x_new,y_new)!=(self.m-2,1) and \
                    (x_new,y_new)!=(self.m-2,self.n-2) and \
                    (x_new,y_new)!=(1,self.n-2):
                    self.global_force += [0,self.force,0,0,0,0]
                elif (x_new,y_new)==(1,1) or (x_new,y_new)==(self.m-2,1) or \
                    (x_new,y_new)==(self.m-2,self.n-2) or \
                    (x_new,y_new)==(1,self.n-2):
                    range_ = list(range(self.el_dic[(x_new,y_new)]*6-6,self.el_dic[(x_new,y_new)]*6))
                    self.bc_node += range_
                    self.bc_val += [0]*len(range_)
                    self.global_force += [0,0,0,0,0,0]
                        
            if  (self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]) not in self.elcon and \
                 (self.el_dic[(x_new,y_new)],self.el_dic[(x,y)]) not in self.elcon:
                self.elcon.append([self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]])
                  
            if (x_new,y_new) in self.el_dic:
                if (x_new,y_new)==(1,1):
                    self.visit_list[0]+=1
                elif (x_new,y_new)==(self.m-2,1):
                    self.visit_list[1]+=1
                elif (x_new,y_new)==(self.m-2,self.n-2):
                    self.visit_list[2]+=1
                elif (x_new,y_new)==(1,self.n-2):
                    self.visit_list[3]+=1      
        
        return x_new, y_new
    
    def move_sw(self,x,y):
        # x,y - current location
        x_new=x-self.dx
        y_new=y-self.dy
        if x_new<0 or x_new>(self.m-1)*self.dx or y_new<0 or y_new>(self.n-1)*self.dy \
        or 3 in self.line_list:
            self.break_flag=True
        else:
            try:
                self.line_list[self.dic_lines[(x,y,x_new,y_new)]]+=1
            except KeyError:
                self.line_list[self.dic_lines[(x_new,y_new, x,y)]]+=1
                        
            if (x_new,y_new) not in self.el_dic:
                self.max_el+=1
                self.el_dic[(x_new,y_new)]=self.max_el
                self.coord.append([x_new,y_new,0])
                if (x_new,y_new)!=(1,1) and (x_new,y_new)!=(self.m-2,1) and \
                    (x_new,y_new)!=(self.m-2,self.n-2) and \
                    (x_new,y_new)!=(1,self.n-2):
                    self.global_force += [0,self.force,0,0,0,0]
                elif (x_new,y_new)==(1,1) or (x_new,y_new)==(self.m-2,1) or \
                    (x_new,y_new)==(self.m-2,self.n-2) or \
                    (x_new,y_new)==(1,self.n-2):
                    range_ = list(range(self.el_dic[(x_new,y_new)]*6-6,self.el_dic[(x_new,y_new)]*6))
                    self.bc_node += range_
                    self.bc_val += [0]*len(range_)
                    self.global_force += [0,0,0,0,0,0]
                        
            if  (self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]) not in self.elcon and \
                 (self.el_dic[(x_new,y_new)],self.el_dic[(x,y)]) not in self.elcon:
                self.elcon.append([self.el_dic[(x,y)], self.el_dic[(x_new,y_new)]])
                  
                  
            if (x_new,y_new) in self.el_dic:
                if (x_new,y_new)==(1,1):
                    self.visit_list[0]+=1
                elif (x_new,y_new)==(self.m-2,1):
                    self.visit_list[1]+=1
                elif (x_new,y_new)==(self.m-2,self.n-2):
                    self.visit_list[2]+=1
                elif (x_new,y_new)==(1,self.n-2):
                    self.visit_list[3]+=1    
                                  
        return x_new, y_new
    
        
    def action_space(self,action,x0,y0):
        if action==0:
            return self.move_w(x0,y0)
        elif action==1:    
            return self.move_nw(x0,y0)
        elif action==2:  
            return self.move_n(x0,y0)
        elif action==3:
            return self.move_ne(x0,y0)
        elif action==4:
            return self.move_e(x0,y0)
        elif action==5:
            return self.move_se(x0,y0)
        elif action==6:
            return self.move_s(x0,y0)
        elif action==7:
            return self.move_sw(x0,y0)
                        
    
    def nn_input(self,x,y,action):  
        return self.line_list+[x,y]+[action]        
    
    def reward_(self,x_new,y_new,n_steps):
        reward=2*n_steps
        if all([x>=1 for x in self.visit_list]):
            reward+=10000
            weight=self.length()
        
            FEA_output_arr=self.FEA()
            max_=self.max_u(FEA_output_arr)
            strength=max_
            if weight<=self.old_weight:
                reward+=50000
                self.old_weight=weight
            if strength>=self.old_strength: 
                reward+=100000000
                self.old_strength=strength        
            self.break_flag=True     
            return reward 
        return reward     
                                   
    def draw(self,color):
        c=self.coord
        e=self.elcon
        c=np.array(c)
        e=np.array(e)
        coord=c.reshape(np.max(e)+1,3)
        fig=plt.figure(figsize=(13,5))
        for item in e:
            ax = fig.gca(projection='3d') 
            ax.plot([coord[item[0]][0],coord[item[1]][0]],\
                     [coord[item[0]][1],coord[item[1]][1]],\
                     [coord[item[0]][2],coord[item[1]][2]],
                     color=color) 
        ax.view_init(-90,90)
        ax.set_xlim([0, 5])
        ax.set_ylim([0, 5])
        plt.show()             

## Reinforcement learning model

In [15]:
N_DISCRETE_ACTIONS=8

In [16]:
class BionicEnv(gym.Env):
    
    metadata = {"render.modes": ["human"]}

    def __init__(self):
        super().__init__()
        self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
        self.M=Model()
        self.M.reset(False,-500)
        self.x0=2
        self.y0=2
        self.obs=self.M.nn_input(self.x0,self.y0, 0)
        self.observation_space = spaces.Box(low=np.array([-np.inf for x in range(DIM)]),
                                            high=np.array([np.inf for y in range(DIM)]),
                                            shape=(DIM,),
                                           dtype=np.float64)
        self.step_=0
        self.needs_reset = True

    def step(self, action):
        
        x_new, y_new = self.x0, self.y0 
        x_new, y_new = self.M.action_space(action, x_new, y_new)
        self.obs=self.M.nn_input(x_new,y_new,action)
                
        self.step_+=1           
        reward=self.M.reward_(x_new,y_new,self.step_)
        self.x0,self.y0 = x_new,y_new
        
        done=False
        if self.M.break_flag:
            reward-=10000
            done=True
        
        if self.needs_reset:
            raise RuntimeError("Tried to step environment that needs reset")
            
        if done:
            self.needs_reset = True
      
        return np.array(self.obs), reward, done, dict()

    def reset(self):
        self.M.reset(False,-500)
        self.x0=2
        self.y0=2
        self.obs=self.M.nn_input(self.x0,self.y0,0)
        self.step_=0
        self.needs_reset = False
        return np.array(self.obs)  

    def render(self, mode="human"):
        self.M.draw('blue')    

    def close(self):
        pass

In [17]:
env = BionicEnv()
check_env(env)

In [18]:
start=time.time()
model = A2C("MlpPolicy", env,verbose=2).learn(total_timesteps=4_000_000)
end=time.time()   

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 7.56      |
|    ep_rew_mean        | 1.51e+06  |
| time/                 |           |
|    fps                | 646       |
|    iterations         | 100       |
|    time_elapsed       | 0         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -2.03     |
|    explained_variance | 8.73e-05  |
|    learning_rate      | 0.0007    |
|    n_updates          | 99        |
|    policy_loss        | -1.16e+04 |
|    value_loss         | 5.85e+07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 8.06     |
|    ep_rew_mean        | 9.91e+05 |
| time/                 |          |
|    fps                | 585      |
|    iterations         | 200      |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 16.1      |
|    ep_rew_mean        | -6.98e+03 |
| time/                 |           |
|    fps                | 622       |
|    iterations         | 1400      |
|    time_elapsed       | 11        |
|    total_timesteps    | 7000      |
| train/                |           |
|    entropy_loss       | -1.2      |
|    explained_variance | 0.000719  |
|    learning_rate      | 0.0007    |
|    n_updates          | 1399      |
|    policy_loss        | 34.2      |
|    value_loss         | 4.56e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 16.4      |
|    ep_rew_mean        | -6.07e+03 |
| time/                 |           |
|    fps                | 623       |
|    iterations         | 1500      |
|    time_elapsed       | 12        |
|    total_timesteps    | 7500      |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 18.6      |
|    ep_rew_mean        | -7.79e+03 |
| time/                 |           |
|    fps                | 612       |
|    iterations         | 2700      |
|    time_elapsed       | 22        |
|    total_timesteps    | 13500     |
| train/                |           |
|    entropy_loss       | -0.669    |
|    explained_variance | 0.000309  |
|    learning_rate      | 0.0007    |
|    n_updates          | 2699      |
|    policy_loss        | 30.2      |
|    value_loss         | 6.31e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 19.5      |
|    ep_rew_mean        | -7.96e+03 |
| time/                 |           |
|    fps                | 605       |
|    iterations         | 2800      |
|    time_elapsed       | 23        |
|    total_timesteps    | 14000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 20.3      |
|    ep_rew_mean        | -5.53e+03 |
| time/                 |           |
|    fps                | 578       |
|    iterations         | 4000      |
|    time_elapsed       | 34        |
|    total_timesteps    | 20000     |
| train/                |           |
|    entropy_loss       | -0.644    |
|    explained_variance | -1.29e-05 |
|    learning_rate      | 0.0007    |
|    n_updates          | 3999      |
|    policy_loss        | 10.6      |
|    value_loss         | 1.53e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 20.4      |
|    ep_rew_mean        | -6.23e+03 |
| time/                 |           |
|    fps                | 580       |
|    iterations         | 4100      |
|    time_elapsed       | 35        |
|    total_timesteps    | 20500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 20.8      |
|    ep_rew_mean        | -9.22e+03 |
| time/                 |           |
|    fps                | 595       |
|    iterations         | 5300      |
|    time_elapsed       | 44        |
|    total_timesteps    | 26500     |
| train/                |           |
|    entropy_loss       | -0.615    |
|    explained_variance | 0.0013    |
|    learning_rate      | 0.0007    |
|    n_updates          | 5299      |
|    policy_loss        | 81.6      |
|    value_loss         | 1.33e+04  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 20.6      |
|    ep_rew_mean        | -8.93e+03 |
| time/                 |           |
|    fps                | 596       |
|    iterations         | 5400      |
|    time_elapsed       | 45        |
|    total_timesteps    | 27000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 22.9      |
|    ep_rew_mean        | -9.43e+03 |
| time/                 |           |
|    fps                | 607       |
|    iterations         | 6600      |
|    time_elapsed       | 54        |
|    total_timesteps    | 33000     |
| train/                |           |
|    entropy_loss       | -0.133    |
|    explained_variance | -2.87e-05 |
|    learning_rate      | 0.0007    |
|    n_updates          | 6599      |
|    policy_loss        | 0.771     |
|    value_loss         | 557       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 23.3      |
|    ep_rew_mean        | -9.21e+03 |
| time/                 |           |
|    fps                | 608       |
|    iterations         | 6700      |
|    time_elapsed       | 55        |
|    total_timesteps    | 33500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | -9.18e+03 |
| time/                 |           |
|    fps                | 607       |
|    iterations         | 7900      |
|    time_elapsed       | 65        |
|    total_timesteps    | 39500     |
| train/                |           |
|    entropy_loss       | -0.0788   |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 7899      |
|    policy_loss        | 2.8       |
|    value_loss         | 5.54e+03  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 28.1     |
|    ep_rew_mean        | -9.1e+03 |
| time/                 |          |
|    fps                | 608      |
|    iterations         | 8000     |
|    time_elapsed       | 65       |
|    total_timesteps    | 40000    |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 31        |
|    ep_rew_mean        | -8.99e+03 |
| time/                 |           |
|    fps                | 610       |
|    iterations         | 9200      |
|    time_elapsed       | 75        |
|    total_timesteps    | 46000     |
| train/                |           |
|    entropy_loss       | -0.0456   |
|    explained_variance | 0.077     |
|    learning_rate      | 0.0007    |
|    n_updates          | 9199      |
|    policy_loss        | 0.357     |
|    value_loss         | 4.43e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 31.3      |
|    ep_rew_mean        | -8.98e+03 |
| time/                 |           |
|    fps                | 611       |
|    iterations         | 9300      |
|    time_elapsed       | 76        |
|    total_timesteps    | 46500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 32        |
|    ep_rew_mean        | -8.91e+03 |
| time/                 |           |
|    fps                | 573       |
|    iterations         | 10500     |
|    time_elapsed       | 91        |
|    total_timesteps    | 52500     |
| train/                |           |
|    entropy_loss       | -0.257    |
|    explained_variance | 0.281     |
|    learning_rate      | 0.0007    |
|    n_updates          | 10499     |
|    policy_loss        | 134       |
|    value_loss         | 4.55e+04  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 30.3     |
|    ep_rew_mean        | -9e+03   |
| time/                 |          |
|    fps                | 574      |
|    iterations         | 10600    |
|    time_elapsed       | 92       |
|    total_timesteps    | 53000    |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.4      |
|    ep_rew_mean        | -9.12e+03 |
| time/                 |           |
|    fps                | 577       |
|    iterations         | 11800     |
|    time_elapsed       | 102       |
|    total_timesteps    | 59000     |
| train/                |           |
|    entropy_loss       | -0.00184  |
|    explained_variance | 1.55e-06  |
|    learning_rate      | 0.0007    |
|    n_updates          | 11799     |
|    policy_loss        | 0.0281    |
|    value_loss         | 2.02e+04  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25.4      |
|    ep_rew_mean        | -9.27e+03 |
| time/                 |           |
|    fps                | 578       |
|    iterations         | 11900     |
|    time_elapsed       | 102       |
|    total_timesteps    | 59500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 33        |
|    ep_rew_mean        | -8.84e+03 |
| time/                 |           |
|    fps                | 585       |
|    iterations         | 13100     |
|    time_elapsed       | 111       |
|    total_timesteps    | 65500     |
| train/                |           |
|    entropy_loss       | -0.00479  |
|    explained_variance | 2.87e-05  |
|    learning_rate      | 0.0007    |
|    n_updates          | 13099     |
|    policy_loss        | 0.00703   |
|    value_loss         | 114       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 30.8      |
|    ep_rew_mean        | -8.99e+03 |
| time/                 |           |
|    fps                | 585       |
|    iterations         | 13200     |
|    time_elapsed       | 112       |
|    total_timesteps    | 66000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 32        |
|    ep_rew_mean        | -8.92e+03 |
| time/                 |           |
|    fps                | 584       |
|    iterations         | 14400     |
|    time_elapsed       | 123       |
|    total_timesteps    | 72000     |
| train/                |           |
|    entropy_loss       | -0.00253  |
|    explained_variance | 0.15      |
|    learning_rate      | 0.0007    |
|    n_updates          | 14399     |
|    policy_loss        | -0.0242   |
|    value_loss         | 1.82e+07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 33        |
|    ep_rew_mean        | -8.85e+03 |
| time/                 |           |
|    fps                | 584       |
|    iterations         | 14500     |
|    time_elapsed       | 124       |
|    total_timesteps    | 72500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 29.9      |
|    ep_rew_mean        | -8.97e+03 |
| time/                 |           |
|    fps                | 587       |
|    iterations         | 15700     |
|    time_elapsed       | 133       |
|    total_timesteps    | 78500     |
| train/                |           |
|    entropy_loss       | -0.0144   |
|    explained_variance | 0.963     |
|    learning_rate      | 0.0007    |
|    n_updates          | 15699     |
|    policy_loss        | -0.0178   |
|    value_loss         | 171       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.2      |
|    ep_rew_mean        | -8.97e+03 |
| time/                 |           |
|    fps                | 588       |
|    iterations         | 15800     |
|    time_elapsed       | 134       |
|    total_timesteps    | 79000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.8      |
|    ep_rew_mean        | -4.27e+03 |
| time/                 |           |
|    fps                | 566       |
|    iterations         | 17000     |
|    time_elapsed       | 150       |
|    total_timesteps    | 85000     |
| train/                |           |
|    entropy_loss       | -0.000123 |
|    explained_variance | 0.0386    |
|    learning_rate      | 0.0007    |
|    n_updates          | 16999     |
|    policy_loss        | 8.93e-05  |
|    value_loss         | 65.6      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 29.6      |
|    ep_rew_mean        | -3.95e+03 |
| time/                 |           |
|    fps                | 565       |
|    iterations         | 17100     |
|    time_elapsed       | 151       |
|    total_timesteps    | 85500     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 32       |
|    ep_rew_mean        | -541     |
| time/                 |          |
|    fps                | 530      |
|    iterations         | 18300    |
|    time_elapsed       | 172      |
|    total_timesteps    | 91500    |
| train/                |          |
|    entropy_loss       | -0.00982 |
|    explained_variance | 0.998    |
|    learning_rate      | 0.0007   |
|    n_updates          | 18299    |
|    policy_loss        | -0.0329  |
|    value_loss         | 167      |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 32        |
|    ep_rew_mean        | -541      |
| time/                 |           |
|    fps                | 528       |
|    iterations         | 18400     |
|    time_elapsed       | 174       |
|    total_timesteps    | 92000     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 31        |
|    ep_rew_mean        | 992       |
| time/                 |           |
|    fps                | 522       |
|    iterations         | 19600     |
|    time_elapsed       | 187       |
|    total_timesteps    | 98000     |
| train/                |           |
|    entropy_loss       | -0.000887 |
|    explained_variance | 0.996     |
|    learning_rate      | 0.0007    |
|    n_updates          | 19599     |
|    policy_loss        | 0.000148  |
|    value_loss         | 12.5      |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 31       |
|    ep_rew_mean        | 992      |
| time/                 |          |
|    fps                | 521      |
|    iterations         | 19700    |
|    time_elapsed       | 188      |
|    total_timesteps    | 98500    |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34.4      |
|    ep_rew_mean        | -4.98e+03 |
| time/                 |           |
|    fps                | 524       |
|    iterations         | 20900     |
|    time_elapsed       | 199       |
|    total_timesteps    | 104500    |
| train/                |           |
|    entropy_loss       | -8.97e-05 |
|    explained_variance | 0.00107   |
|    learning_rate      | 0.0007    |
|    n_updates          | 20899     |
|    policy_loss        | 0.00075   |
|    value_loss         | 1.15e+04  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34.1      |
|    ep_rew_mean        | -3.5e+03  |
| time/                 |           |
|    fps                | 520       |
|    iterations         | 21000     |
|    time_elapsed       | 201       |
|    total_timesteps    | 105000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 33.8      |
|    ep_rew_mean        | 578       |
| time/                 |           |
|    fps                | 501       |
|    iterations         | 22200     |
|    time_elapsed       | 221       |
|    total_timesteps    | 111000    |
| train/                |           |
|    entropy_loss       | -1.64e-05 |
|    explained_variance | 3.76e-06  |
|    learning_rate      | 0.0007    |
|    n_updates          | 22199     |
|    policy_loss        | 2.2e-05   |
|    value_loss         | 453       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 33.8      |
|    ep_rew_mean        | 578       |
| time/                 |           |
|    fps                | 500       |
|    iterations         | 22300     |
|    time_elapsed       | 222       |
|    total_timesteps    | 111500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34        |
|    ep_rew_mean        | 1.19e+03  |
| time/                 |           |
|    fps                | 490       |
|    iterations         | 23500     |
|    time_elapsed       | 239       |
|    total_timesteps    | 117500    |
| train/                |           |
|    entropy_loss       | -0.000522 |
|    explained_variance | 0.999     |
|    learning_rate      | 0.0007    |
|    n_updates          | 23499     |
|    policy_loss        | -1.85e-05 |
|    value_loss         | 3.01      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34        |
|    ep_rew_mean        | 1.19e+03  |
| time/                 |           |
|    fps                | 490       |
|    iterations         | 23600     |
|    time_elapsed       | 240       |
|    total_timesteps    | 118000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34.1      |
|    ep_rew_mean        | 795       |
| time/                 |           |
|    fps                | 488       |
|    iterations         | 24800     |
|    time_elapsed       | 253       |
|    total_timesteps    | 124000    |
| train/                |           |
|    entropy_loss       | -0.000144 |
|    explained_variance | 0.00316   |
|    learning_rate      | 0.0007    |
|    n_updates          | 24799     |
|    policy_loss        | -1.77e-05 |
|    value_loss         | 13        |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 34.1     |
|    ep_rew_mean        | 997      |
| time/                 |          |
|    fps                | 488      |
|    iterations         | 24900    |
|    time_elapsed       | 254      |
|    total_timesteps    | 124500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34        |
|    ep_rew_mean        | 1.19e+03  |
| time/                 |           |
|    fps                | 486       |
|    iterations         | 26100     |
|    time_elapsed       | 268       |
|    total_timesteps    | 130500    |
| train/                |           |
|    entropy_loss       | -1.65e-05 |
|    explained_variance | -0.00286  |
|    learning_rate      | 0.0007    |
|    n_updates          | 26099     |
|    policy_loss        | 9.84e-06  |
|    value_loss         | 124       |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 34       |
|    ep_rew_mean        | 1.19e+03 |
| time/                 |          |
|    fps                | 486      |
|    iterations         | 26200    |
|    time_elapsed       | 269      |
|    total_timesteps    | 131000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 33.8      |
|    ep_rew_mean        | 1.08e+03  |
| time/                 |           |
|    fps                | 483       |
|    iterations         | 27400     |
|    time_elapsed       | 283       |
|    total_timesteps    | 137000    |
| train/                |           |
|    entropy_loss       | -9.62e-05 |
|    explained_variance | 0.000168  |
|    learning_rate      | 0.0007    |
|    n_updates          | 27399     |
|    policy_loss        | 0.000266  |
|    value_loss         | 1.77e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 33.7      |
|    ep_rew_mean        | 973       |
| time/                 |           |
|    fps                | 483       |
|    iterations         | 27500     |
|    time_elapsed       | 284       |
|    total_timesteps    | 137500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 33.6      |
|    ep_rew_mean        | 668       |
| time/                 |           |
|    fps                | 482       |
|    iterations         | 28700     |
|    time_elapsed       | 297       |
|    total_timesteps    | 143500    |
| train/                |           |
|    entropy_loss       | -0.132    |
|    explained_variance | -2.66e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 28699     |
|    policy_loss        | -95.2     |
|    value_loss         | 1.72e+05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 31.9     |
|    ep_rew_mean        | -725     |
| time/                 |          |
|    fps                | 482      |
|    iterations         | 28800    |
|    time_elapsed       | 298      |
|    total_timesteps    | 144000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 30.1      |
|    ep_rew_mean        | -6.11e+03 |
| time/                 |           |
|    fps                | 486       |
|    iterations         | 30000     |
|    time_elapsed       | 308       |
|    total_timesteps    | 150000    |
| train/                |           |
|    entropy_loss       | -0.0115   |
|    explained_variance | 0.259     |
|    learning_rate      | 0.0007    |
|    n_updates          | 29999     |
|    policy_loss        | 0.295     |
|    value_loss         | 7.97e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 29.1      |
|    ep_rew_mean        | -6.27e+03 |
| time/                 |           |
|    fps                | 487       |
|    iterations         | 30100     |
|    time_elapsed       | 308       |
|    total_timesteps    | 150500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 22.6      |
|    ep_rew_mean        | -6.84e+03 |
| time/                 |           |
|    fps                | 491       |
|    iterations         | 31300     |
|    time_elapsed       | 318       |
|    total_timesteps    | 156500    |
| train/                |           |
|    entropy_loss       | -0.0239   |
|    explained_variance | 0.976     |
|    learning_rate      | 0.0007    |
|    n_updates          | 31299     |
|    policy_loss        | 0.162     |
|    value_loss         | 1.11e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 24.6      |
|    ep_rew_mean        | -5.63e+03 |
| time/                 |           |
|    fps                | 491       |
|    iterations         | 31400     |
|    time_elapsed       | 319       |
|    total_timesteps    | 157000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 14        |
|    ep_rew_mean        | -9.79e+03 |
| time/                 |           |
|    fps                | 490       |
|    iterations         | 32600     |
|    time_elapsed       | 332       |
|    total_timesteps    | 163000    |
| train/                |           |
|    entropy_loss       | -0.000454 |
|    explained_variance | 0.13      |
|    learning_rate      | 0.0007    |
|    n_updates          | 32599     |
|    policy_loss        | -0.0222   |
|    value_loss         | 4.82e+07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 14        |
|    ep_rew_mean        | -9.79e+03 |
| time/                 |           |
|    fps                | 490       |
|    iterations         | 32700     |
|    time_elapsed       | 333       |
|    total_timesteps    | 163500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25.8      |
|    ep_rew_mean        | -7.76e+03 |
| time/                 |           |
|    fps                | 491       |
|    iterations         | 33900     |
|    time_elapsed       | 344       |
|    total_timesteps    | 169500    |
| train/                |           |
|    entropy_loss       | -0.00218  |
|    explained_variance | 0.0682    |
|    learning_rate      | 0.0007    |
|    n_updates          | 33899     |
|    policy_loss        | 0.00284   |
|    value_loss         | 551       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.1      |
|    ep_rew_mean        | -8.34e+03 |
| time/                 |           |
|    fps                | 491       |
|    iterations         | 34000     |
|    time_elapsed       | 345       |
|    total_timesteps    | 170000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.9      |
|    ep_rew_mean        | -8.66e+03 |
| time/                 |           |
|    fps                | 493       |
|    iterations         | 35200     |
|    time_elapsed       | 356       |
|    total_timesteps    | 176000    |
| train/                |           |
|    entropy_loss       | -0.00831  |
|    explained_variance | 0.126     |
|    learning_rate      | 0.0007    |
|    n_updates          | 35199     |
|    policy_loss        | 0.0276    |
|    value_loss         | 1.85e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.2      |
|    ep_rew_mean        | -8.71e+03 |
| time/                 |           |
|    fps                | 493       |
|    iterations         | 35300     |
|    time_elapsed       | 357       |
|    total_timesteps    | 176500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 33        |
|    ep_rew_mean        | -4.75e+03 |
| time/                 |           |
|    fps                | 491       |
|    iterations         | 36500     |
|    time_elapsed       | 371       |
|    total_timesteps    | 182500    |
| train/                |           |
|    entropy_loss       | -0.000421 |
|    explained_variance | 0.00534   |
|    learning_rate      | 0.0007    |
|    n_updates          | 36499     |
|    policy_loss        | 0.000855  |
|    value_loss         | 3.7e+03   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 33.5      |
|    ep_rew_mean        | -5.72e+03 |
| time/                 |           |
|    fps                | 491       |
|    iterations         | 36600     |
|    time_elapsed       | 372       |
|    total_timesteps    | 183000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 27.9     |
|    ep_rew_mean        | 316      |
| time/                 |          |
|    fps                | 489      |
|    iterations         | 37800    |
|    time_elapsed       | 386      |
|    total_timesteps    | 189000   |
| train/                |          |
|    entropy_loss       | -0.00127 |
|    explained_variance | -7.42    |
|    learning_rate      | 0.0007   |
|    n_updates          | 37799    |
|    policy_loss        | -0.00376 |
|    value_loss         | 192      |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 28       |
|    ep_rew_mean        | 812      |
| time/                 |          |
|    fps                | 489      |
|    iterations         | 37900    |
|    time_elapsed       | 387      |
|    total_timesteps    | 189500   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 25       |
|    ep_rew_mean        | 650      |
| time/                 |          |
|    fps                | 482      |
|    iterations         | 39100    |
|    time_elapsed       | 405      |
|    total_timesteps    | 195500   |
| train/                |          |
|    entropy_loss       | -0.00092 |
|    explained_variance | 0.999    |
|    learning_rate      | 0.0007   |
|    n_updates          | 39099    |
|    policy_loss        | 5.89e-05 |
|    value_loss         | 1.25     |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25        |
|    ep_rew_mean        | 650       |
| time/                 |           |
|    fps                | 481       |
|    iterations         | 39200     |
|    time_elapsed       | 406       |
|    total_timesteps    | 196000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25        |
|    ep_rew_mean        | 650       |
| time/                 |           |
|    fps                | 474       |
|    iterations         | 40400     |
|    time_elapsed       | 425       |
|    total_timesteps    | 202000    |
| train/                |           |
|    entropy_loss       | -0.000338 |
|    explained_variance | 1         |
|    learning_rate      | 0.0007    |
|    n_updates          | 40399     |
|    policy_loss        | -4.95e-05 |
|    value_loss         | 3.09      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25        |
|    ep_rew_mean        | 650       |
| time/                 |           |
|    fps                | 474       |
|    iterations         | 40500     |
|    time_elapsed       | 427       |
|    total_timesteps    | 202500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25        |
|    ep_rew_mean        | 650       |
| time/                 |           |
|    fps                | 467       |
|    iterations         | 41700     |
|    time_elapsed       | 445       |
|    total_timesteps    | 208500    |
| train/                |           |
|    entropy_loss       | -0.000302 |
|    explained_variance | 0.997     |
|    learning_rate      | 0.0007    |
|    n_updates          | 41699     |
|    policy_loss        | 4.96e-05  |
|    value_loss         | 6.84      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25        |
|    ep_rew_mean        | 650       |
| time/                 |           |
|    fps                | 467       |
|    iterations         | 41800     |
|    time_elapsed       | 447       |
|    total_timesteps    | 209000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25        |
|    ep_rew_mean        | 650       |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 43000     |
|    time_elapsed       | 466       |
|    total_timesteps    | 215000    |
| train/                |           |
|    entropy_loss       | -0.000328 |
|    explained_variance | 1         |
|    learning_rate      | 0.0007    |
|    n_updates          | 42999     |
|    policy_loss        | -3.38e-05 |
|    value_loss         | 0.924     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25        |
|    ep_rew_mean        | 650       |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 43100     |
|    time_elapsed       | 468       |
|    total_timesteps    | 215500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 30.9      |
|    ep_rew_mean        | -4.84e+03 |
| time/                 |           |
|    fps                | 459       |
|    iterations         | 44300     |
|    time_elapsed       | 481       |
|    total_timesteps    | 221500    |
| train/                |           |
|    entropy_loss       | -0.0237   |
|    explained_variance | 0.85      |
|    learning_rate      | 0.0007    |
|    n_updates          | 44299     |
|    policy_loss        | 0.511     |
|    value_loss         | 1.67e+04  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 31.8      |
|    ep_rew_mean        | -4.38e+03 |
| time/                 |           |
|    fps                | 459       |
|    iterations         | 44400     |
|    time_elapsed       | 482       |
|    total_timesteps    | 222000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.3      |
|    ep_rew_mean        | -8.96e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 45600     |
|    time_elapsed       | 494       |
|    total_timesteps    | 228000    |
| train/                |           |
|    entropy_loss       | -0.034    |
|    explained_variance | 0.138     |
|    learning_rate      | 0.0007    |
|    n_updates          | 45599     |
|    policy_loss        | -26.1     |
|    value_loss         | 3.3e+07   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.4      |
|    ep_rew_mean        | -9.16e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 45700     |
|    time_elapsed       | 495       |
|    total_timesteps    | 228500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.1      |
|    ep_rew_mean        | -5.07e+03 |
| time/                 |           |
|    fps                | 462       |
|    iterations         | 46900     |
|    time_elapsed       | 507       |
|    total_timesteps    | 234500    |
| train/                |           |
|    entropy_loss       | -0.392    |
|    explained_variance | 0.851     |
|    learning_rate      | 0.0007    |
|    n_updates          | 46899     |
|    policy_loss        | -1.96     |
|    value_loss         | 636       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.3      |
|    ep_rew_mean        | -4.46e+03 |
| time/                 |           |
|    fps                | 461       |
|    iterations         | 47000     |
|    time_elapsed       | 509       |
|    total_timesteps    | 235000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25.8      |
|    ep_rew_mean        | -6.25e+03 |
| time/                 |           |
|    fps                | 461       |
|    iterations         | 48200     |
|    time_elapsed       | 522       |
|    total_timesteps    | 241000    |
| train/                |           |
|    entropy_loss       | -0.00115  |
|    explained_variance | 0.964     |
|    learning_rate      | 0.0007    |
|    n_updates          | 48199     |
|    policy_loss        | 0.00209   |
|    value_loss         | 205       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.7      |
|    ep_rew_mean        | -4.92e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 48300     |
|    time_elapsed       | 524       |
|    total_timesteps    | 241500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25.9      |
|    ep_rew_mean        | -5.28e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 49500     |
|    time_elapsed       | 536       |
|    total_timesteps    | 247500    |
| train/                |           |
|    entropy_loss       | -0.0633   |
|    explained_variance | 0.298     |
|    learning_rate      | 0.0007    |
|    n_updates          | 49499     |
|    policy_loss        | 1.77      |
|    value_loss         | 3.71e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25.3      |
|    ep_rew_mean        | -5.42e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 49600     |
|    time_elapsed       | 538       |
|    total_timesteps    | 248000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 27       |
|    ep_rew_mean        | -5.3e+03 |
| time/                 |          |
|    fps                | 458      |
|    iterations         | 50800    |
|    time_elapsed       | 553      |
|    total_timesteps    | 254000   |
| train/                |          |
|    entropy_loss       | -0.149   |
|    explained_variance | 0.92     |
|    learning_rate      | 0.0007   |
|    n_updates          | 50799    |
|    policy_loss        | -1.68    |
|    value_loss         | 612      |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.9      |
|    ep_rew_mean        | -5.72e+03 |
| time/                 |           |
|    fps                | 458       |
|    iterations         | 50900     |
|    time_elapsed       | 554       |
|    total_timesteps    | 254500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 38.4      |
|    ep_rew_mean        | -4.76e+03 |
| time/                 |           |
|    fps                | 458       |
|    iterations         | 52100     |
|    time_elapsed       | 567       |
|    total_timesteps    | 260500    |
| train/                |           |
|    entropy_loss       | -0.000388 |
|    explained_variance | 0.0174    |
|    learning_rate      | 0.0007    |
|    n_updates          | 52099     |
|    policy_loss        | 0.00257   |
|    value_loss         | 1.56e+04  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 37.1      |
|    ep_rew_mean        | -5.45e+03 |
| time/                 |           |
|    fps                | 458       |
|    iterations         | 52200     |
|    time_elapsed       | 568       |
|    total_timesteps    | 261000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26        |
|    ep_rew_mean        | -3.05e+03 |
| time/                 |           |
|    fps                | 456       |
|    iterations         | 53400     |
|    time_elapsed       | 584       |
|    total_timesteps    | 267000    |
| train/                |           |
|    entropy_loss       | -0.000466 |
|    explained_variance | -2.41     |
|    learning_rate      | 0.0007    |
|    n_updates          | 53399     |
|    policy_loss        | 0.00055   |
|    value_loss         | 143       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.8      |
|    ep_rew_mean        | -2.92e+03 |
| time/                 |           |
|    fps                | 456       |
|    iterations         | 53500     |
|    time_elapsed       | 585       |
|    total_timesteps    | 267500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 35.6      |
|    ep_rew_mean        | -6.15e+03 |
| time/                 |           |
|    fps                | 457       |
|    iterations         | 54700     |
|    time_elapsed       | 597       |
|    total_timesteps    | 273500    |
| train/                |           |
|    entropy_loss       | -0.000224 |
|    explained_variance | -0.00345  |
|    learning_rate      | 0.0007    |
|    n_updates          | 54699     |
|    policy_loss        | 0.000312  |
|    value_loss         | 179       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34.4      |
|    ep_rew_mean        | -6.45e+03 |
| time/                 |           |
|    fps                | 457       |
|    iterations         | 54800     |
|    time_elapsed       | 598       |
|    total_timesteps    | 274000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 31.3      |
|    ep_rew_mean        | -7.21e+03 |
| time/                 |           |
|    fps                | 459       |
|    iterations         | 56000     |
|    time_elapsed       | 609       |
|    total_timesteps    | 280000    |
| train/                |           |
|    entropy_loss       | -0.0628   |
|    explained_variance | 0.854     |
|    learning_rate      | 0.0007    |
|    n_updates          | 55999     |
|    policy_loss        | 8         |
|    value_loss         | 232       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 30        |
|    ep_rew_mean        | -6.92e+03 |
| time/                 |           |
|    fps                | 459       |
|    iterations         | 56100     |
|    time_elapsed       | 610       |
|    total_timesteps    | 280500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 29.4      |
|    ep_rew_mean        | -7.02e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 57300     |
|    time_elapsed       | 621       |
|    total_timesteps    | 286500    |
| train/                |           |
|    entropy_loss       | -4.74e-05 |
|    explained_variance | 0.933     |
|    learning_rate      | 0.0007    |
|    n_updates          | 57299     |
|    policy_loss        | 3.21e-05  |
|    value_loss         | 161       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.9      |
|    ep_rew_mean        | -5.46e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 57400     |
|    time_elapsed       | 623       |
|    total_timesteps    | 287000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 28.6     |
|    ep_rew_mean        | -912     |
| time/                 |          |
|    fps                | 460      |
|    iterations         | 58600    |
|    time_elapsed       | 636      |
|    total_timesteps    | 293000   |
| train/                |          |
|    entropy_loss       | -0.01    |
|    explained_variance | 0.891    |
|    learning_rate      | 0.0007   |
|    n_updates          | 58599    |
|    policy_loss        | 0.0744   |
|    value_loss         | 2.9e+03  |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 30.2      |
|    ep_rew_mean        | -1.61e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 58700     |
|    time_elapsed       | 637       |
|    total_timesteps    | 293500    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 27.5     |
|    ep_rew_mean        | -1.3e+03 |
| time/                 |          |
|    fps                | 459      |
|    iterations         | 59900    |
|    time_elapsed       | 651      |
|    total_timesteps    | 299500   |
| train/                |          |
|    entropy_loss       | -0.0508  |
|    explained_variance | 0.999    |
|    learning_rate      | 0.0007   |
|    n_updates          | 59899    |
|    policy_loss        | -0.0923  |
|    value_loss         | 83.4     |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.1      |
|    ep_rew_mean        | -3.41e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 60000     |
|    time_elapsed       | 651       |
|    total_timesteps    | 300000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 32.8      |
|    ep_rew_mean        | -4.35e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 61200     |
|    time_elapsed       | 664       |
|    total_timesteps    | 306000    |
| train/                |           |
|    entropy_loss       | -0.251    |
|    explained_variance | 0.961     |
|    learning_rate      | 0.0007    |
|    n_updates          | 61199     |
|    policy_loss        | -0.553    |
|    value_loss         | 578       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34.1      |
|    ep_rew_mean        | -4.45e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 61300     |
|    time_elapsed       | 665       |
|    total_timesteps    | 306500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34        |
|    ep_rew_mean        | 1.19e+03  |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 62500     |
|    time_elapsed       | 678       |
|    total_timesteps    | 312500    |
| train/                |           |
|    entropy_loss       | -0.00295  |
|    explained_variance | 0.998     |
|    learning_rate      | 0.0007    |
|    n_updates          | 62499     |
|    policy_loss        | -0.000857 |
|    value_loss         | 31.4      |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 33.4     |
|    ep_rew_mean        | 457      |
| time/                 |          |
|    fps                | 460      |
|    iterations         | 62600    |
|    time_elapsed       | 680      |
|    total_timesteps    | 313000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 21.9     |
|    ep_rew_mean        | -9.5e+03 |
| time/                 |          |
|    fps                | 460      |
|    iterations         | 63800    |
|    time_elapsed       | 692      |
|    total_timesteps    | 319000   |
| train/                |          |
|    entropy_loss       | -0.00067 |
|    explained_variance | 0.11     |
|    learning_rate      | 0.0007   |
|    n_updates          | 63799    |
|    policy_loss        | -0.00154 |
|    value_loss         | 4.51e+07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 22        |
|    ep_rew_mean        | -9.49e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 63900     |
|    time_elapsed       | 693       |
|    total_timesteps    | 319500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 22        |
|    ep_rew_mean        | -9.49e+03 |
| time/                 |           |
|    fps                | 462       |
|    iterations         | 65100     |
|    time_elapsed       | 704       |
|    total_timesteps    | 325500    |
| train/                |           |
|    entropy_loss       | -8.13e-06 |
|    explained_variance | 0.995     |
|    learning_rate      | 0.0007    |
|    n_updates          | 65099     |
|    policy_loss        | -8.82e-07 |
|    value_loss         | 23.8      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 22        |
|    ep_rew_mean        | -9.49e+03 |
| time/                 |           |
|    fps                | 462       |
|    iterations         | 65200     |
|    time_elapsed       | 705       |
|    total_timesteps    | 326000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 21.9      |
|    ep_rew_mean        | -9.5e+03  |
| time/                 |           |
|    fps                | 463       |
|    iterations         | 66400     |
|    time_elapsed       | 716       |
|    total_timesteps    | 332000    |
| train/                |           |
|    entropy_loss       | -0.000206 |
|    explained_variance | 0.997     |
|    learning_rate      | 0.0007    |
|    n_updates          | 66399     |
|    policy_loss        | 5.74e-05  |
|    value_loss         | 2.95      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 21.6      |
|    ep_rew_mean        | -9.51e+03 |
| time/                 |           |
|    fps                | 463       |
|    iterations         | 66500     |
|    time_elapsed       | 717       |
|    total_timesteps    | 332500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 21.5      |
|    ep_rew_mean        | -9.51e+03 |
| time/                 |           |
|    fps                | 464       |
|    iterations         | 67700     |
|    time_elapsed       | 728       |
|    total_timesteps    | 338500    |
| train/                |           |
|    entropy_loss       | -0.18     |
|    explained_variance | 0.739     |
|    learning_rate      | 0.0007    |
|    n_updates          | 67699     |
|    policy_loss        | -11.5     |
|    value_loss         | 604       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 21.1      |
|    ep_rew_mean        | -9.53e+03 |
| time/                 |           |
|    fps                | 465       |
|    iterations         | 67800     |
|    time_elapsed       | 728       |
|    total_timesteps    | 339000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 19.4      |
|    ep_rew_mean        | -9.6e+03  |
| time/                 |           |
|    fps                | 465       |
|    iterations         | 69000     |
|    time_elapsed       | 740       |
|    total_timesteps    | 345000    |
| train/                |           |
|    entropy_loss       | -0.447    |
|    explained_variance | 0.0553    |
|    learning_rate      | 0.0007    |
|    n_updates          | 68999     |
|    policy_loss        | -3.72e+03 |
|    value_loss         | 4.61e+07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 19.5      |
|    ep_rew_mean        | -9.59e+03 |
| time/                 |           |
|    fps                | 465       |
|    iterations         | 69100     |
|    time_elapsed       | 741       |
|    total_timesteps    | 345500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 17.4      |
|    ep_rew_mean        | -9.67e+03 |
| time/                 |           |
|    fps                | 467       |
|    iterations         | 70300     |
|    time_elapsed       | 751       |
|    total_timesteps    | 351500    |
| train/                |           |
|    entropy_loss       | -0.104    |
|    explained_variance | 0.993     |
|    learning_rate      | 0.0007    |
|    n_updates          | 70299     |
|    policy_loss        | 0.241     |
|    value_loss         | 15.9      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 18.5      |
|    ep_rew_mean        | -9.61e+03 |
| time/                 |           |
|    fps                | 466       |
|    iterations         | 70400     |
|    time_elapsed       | 753       |
|    total_timesteps    | 352000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.1      |
|    ep_rew_mean        | -9.01e+03 |
| time/                 |           |
|    fps                | 464       |
|    iterations         | 71600     |
|    time_elapsed       | 770       |
|    total_timesteps    | 358000    |
| train/                |           |
|    entropy_loss       | -0.0117   |
|    explained_variance | 0.958     |
|    learning_rate      | 0.0007    |
|    n_updates          | 71599     |
|    policy_loss        | 0.0751    |
|    value_loss         | 2.13e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.4      |
|    ep_rew_mean        | -9.01e+03 |
| time/                 |           |
|    fps                | 465       |
|    iterations         | 71700     |
|    time_elapsed       | 770       |
|    total_timesteps    | 358500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 20        |
|    ep_rew_mean        | -9.52e+03 |
| time/                 |           |
|    fps                | 466       |
|    iterations         | 72900     |
|    time_elapsed       | 782       |
|    total_timesteps    | 364500    |
| train/                |           |
|    entropy_loss       | -0.0974   |
|    explained_variance | 0.591     |
|    learning_rate      | 0.0007    |
|    n_updates          | 72899     |
|    policy_loss        | -358      |
|    value_loss         | 8.1e+07   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 19.3      |
|    ep_rew_mean        | -9.55e+03 |
| time/                 |           |
|    fps                | 466       |
|    iterations         | 73000     |
|    time_elapsed       | 783       |
|    total_timesteps    | 365000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 29.2      |
|    ep_rew_mean        | -6.68e+03 |
| time/                 |           |
|    fps                | 467       |
|    iterations         | 74200     |
|    time_elapsed       | 793       |
|    total_timesteps    | 371000    |
| train/                |           |
|    entropy_loss       | -0.0557   |
|    explained_variance | 0.713     |
|    learning_rate      | 0.0007    |
|    n_updates          | 74199     |
|    policy_loss        | 0.322     |
|    value_loss         | 2.32e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.9      |
|    ep_rew_mean        | -5.56e+03 |
| time/                 |           |
|    fps                | 467       |
|    iterations         | 74300     |
|    time_elapsed       | 794       |
|    total_timesteps    | 371500    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 26.9     |
|    ep_rew_mean        | -7.4e+03 |
| time/                 |          |
|    fps                | 467      |
|    iterations         | 75500    |
|    time_elapsed       | 808      |
|    total_timesteps    | 377500   |
| train/                |          |
|    entropy_loss       | -0.221   |
|    explained_variance | 0.972    |
|    learning_rate      | 0.0007   |
|    n_updates          | 75499    |
|    policy_loss        | -0.0863  |
|    value_loss         | 563      |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.1      |
|    ep_rew_mean        | -7.94e+03 |
| time/                 |           |
|    fps                | 467       |
|    iterations         | 75600     |
|    time_elapsed       | 809       |
|    total_timesteps    | 378000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 23.3      |
|    ep_rew_mean        | -4.42e+03 |
| time/                 |           |
|    fps                | 464       |
|    iterations         | 76800     |
|    time_elapsed       | 826       |
|    total_timesteps    | 384000    |
| train/                |           |
|    entropy_loss       | -0.268    |
|    explained_variance | -7.83     |
|    learning_rate      | 0.0007    |
|    n_updates          | 76799     |
|    policy_loss        | 162       |
|    value_loss         | 4.26e+05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 23        |
|    ep_rew_mean        | -3.44e+03 |
| time/                 |           |
|    fps                | 463       |
|    iterations         | 76900     |
|    time_elapsed       | 829       |
|    total_timesteps    | 384500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 24.4      |
|    ep_rew_mean        | -4.47e+03 |
| time/                 |           |
|    fps                | 461       |
|    iterations         | 78100     |
|    time_elapsed       | 846       |
|    total_timesteps    | 390500    |
| train/                |           |
|    entropy_loss       | -0.378    |
|    explained_variance | 0.848     |
|    learning_rate      | 0.0007    |
|    n_updates          | 78099     |
|    policy_loss        | -26.1     |
|    value_loss         | 1.19e+04  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 24.7      |
|    ep_rew_mean        | -3.85e+03 |
| time/                 |           |
|    fps                | 461       |
|    iterations         | 78200     |
|    time_elapsed       | 847       |
|    total_timesteps    | 391000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.2      |
|    ep_rew_mean        | -5.1e+03  |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 79400     |
|    time_elapsed       | 862       |
|    total_timesteps    | 397000    |
| train/                |           |
|    entropy_loss       | -0.00363  |
|    explained_variance | 0.658     |
|    learning_rate      | 0.0007    |
|    n_updates          | 79399     |
|    policy_loss        | -0.000511 |
|    value_loss         | 13.2      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.4      |
|    ep_rew_mean        | -4.89e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 79500     |
|    time_elapsed       | 863       |
|    total_timesteps    | 397500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 29.1      |
|    ep_rew_mean        | -6.19e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 80700     |
|    time_elapsed       | 875       |
|    total_timesteps    | 403500    |
| train/                |           |
|    entropy_loss       | -0.00617  |
|    explained_variance | 0.827     |
|    learning_rate      | 0.0007    |
|    n_updates          | 80699     |
|    policy_loss        | 0.00222   |
|    value_loss         | 67.7      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 30.1      |
|    ep_rew_mean        | -5.44e+03 |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 80800     |
|    time_elapsed       | 876       |
|    total_timesteps    | 404000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.5      |
|    ep_rew_mean        | -5.97e+03 |
| time/                 |           |
|    fps                | 461       |
|    iterations         | 82000     |
|    time_elapsed       | 887       |
|    total_timesteps    | 410000    |
| train/                |           |
|    entropy_loss       | -0.0153   |
|    explained_variance | 0.996     |
|    learning_rate      | 0.0007    |
|    n_updates          | 81999     |
|    policy_loss        | 0.00844   |
|    value_loss         | 21.3      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.7      |
|    ep_rew_mean        | -6.32e+03 |
| time/                 |           |
|    fps                | 461       |
|    iterations         | 82100     |
|    time_elapsed       | 888       |
|    total_timesteps    | 410500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 31.6      |
|    ep_rew_mean        | -3.05e+03 |
| time/                 |           |
|    fps                | 462       |
|    iterations         | 83300     |
|    time_elapsed       | 901       |
|    total_timesteps    | 416500    |
| train/                |           |
|    entropy_loss       | -0.00818  |
|    explained_variance | 0.872     |
|    learning_rate      | 0.0007    |
|    n_updates          | 83299     |
|    policy_loss        | 0.00223   |
|    value_loss         | 43        |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 32.1      |
|    ep_rew_mean        | -2.61e+03 |
| time/                 |           |
|    fps                | 462       |
|    iterations         | 83400     |
|    time_elapsed       | 902       |
|    total_timesteps    | 417000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 30.8      |
|    ep_rew_mean        | -3.08e+03 |
| time/                 |           |
|    fps                | 462       |
|    iterations         | 84600     |
|    time_elapsed       | 914       |
|    total_timesteps    | 423000    |
| train/                |           |
|    entropy_loss       | -0.0101   |
|    explained_variance | 0.995     |
|    learning_rate      | 0.0007    |
|    n_updates          | 84599     |
|    policy_loss        | 0.00249   |
|    value_loss         | 4.73      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 31.1      |
|    ep_rew_mean        | -2.76e+03 |
| time/                 |           |
|    fps                | 462       |
|    iterations         | 84700     |
|    time_elapsed       | 915       |
|    total_timesteps    | 423500    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 33.9     |
|    ep_rew_mean        | 95.6     |
| time/                 |          |
|    fps                | 462      |
|    iterations         | 85900    |
|    time_elapsed       | 928      |
|    total_timesteps    | 429500   |
| train/                |          |
|    entropy_loss       | -0.0233  |
|    explained_variance | 0.999    |
|    learning_rate      | 0.0007   |
|    n_updates          | 85899    |
|    policy_loss        | -0.00976 |
|    value_loss         | 5.66     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 33.5     |
|    ep_rew_mean        | 62.5     |
| time/                 |          |
|    fps                | 462      |
|    iterations         | 86000    |
|    time_elapsed       | 929      |
|    total_timesteps    | 430000   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34.4      |
|    ep_rew_mean        | -3.23e+03 |
| time/                 |           |
|    fps                | 462       |
|    iterations         | 87200     |
|    time_elapsed       | 942       |
|    total_timesteps    | 436000    |
| train/                |           |
|    entropy_loss       | -0.00282  |
|    explained_variance | 0.894     |
|    learning_rate      | 0.0007    |
|    n_updates          | 87199     |
|    policy_loss        | 0.00154   |
|    value_loss         | 56.9      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34.5      |
|    ep_rew_mean        | -3.33e+03 |
| time/                 |           |
|    fps                | 462       |
|    iterations         | 87300     |
|    time_elapsed       | 943       |
|    total_timesteps    | 436500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40.3      |
|    ep_rew_mean        | -6.99e+03 |
| time/                 |           |
|    fps                | 463       |
|    iterations         | 88500     |
|    time_elapsed       | 955       |
|    total_timesteps    | 442500    |
| train/                |           |
|    entropy_loss       | -0.00192  |
|    explained_variance | 0.0395    |
|    learning_rate      | 0.0007    |
|    n_updates          | 88499     |
|    policy_loss        | 0.0195    |
|    value_loss         | 5.22e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 41.3      |
|    ep_rew_mean        | -7.23e+03 |
| time/                 |           |
|    fps                | 463       |
|    iterations         | 88600     |
|    time_elapsed       | 956       |
|    total_timesteps    | 443000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.9      |
|    ep_rew_mean        | -7.13e+03 |
| time/                 |           |
|    fps                | 464       |
|    iterations         | 89800     |
|    time_elapsed       | 967       |
|    total_timesteps    | 449000    |
| train/                |           |
|    entropy_loss       | -0.0552   |
|    explained_variance | 0.696     |
|    learning_rate      | 0.0007    |
|    n_updates          | 89799     |
|    policy_loss        | 2.33      |
|    value_loss         | 1.6e+04   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.1      |
|    ep_rew_mean        | -7.38e+03 |
| time/                 |           |
|    fps                | 464       |
|    iterations         | 89900     |
|    time_elapsed       | 968       |
|    total_timesteps    | 449500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 29.7      |
|    ep_rew_mean        | -2.15e+03 |
| time/                 |           |
|    fps                | 463       |
|    iterations         | 91100     |
|    time_elapsed       | 982       |
|    total_timesteps    | 455500    |
| train/                |           |
|    entropy_loss       | -0.00481  |
|    explained_variance | 0.993     |
|    learning_rate      | 0.0007    |
|    n_updates          | 91099     |
|    policy_loss        | 0.0251    |
|    value_loss         | 1.47e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 30.3      |
|    ep_rew_mean        | -2.41e+03 |
| time/                 |           |
|    fps                | 463       |
|    iterations         | 91200     |
|    time_elapsed       | 983       |
|    total_timesteps    | 456000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 18.3     |
|    ep_rew_mean        | -543     |
| time/                 |          |
|    fps                | 463      |
|    iterations         | 92400    |
|    time_elapsed       | 997      |
|    total_timesteps    | 462000   |
| train/                |          |
|    entropy_loss       | -0.0109  |
|    explained_variance | 0.999    |
|    learning_rate      | 0.0007   |
|    n_updates          | 92399    |
|    policy_loss        | 0.00901  |
|    value_loss         | 30.9     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 18.1     |
|    ep_rew_mean        | -553     |
| time/                 |          |
|    fps                | 462      |
|    iterations         | 92500    |
|    time_elapsed       | 999      |
|    total_timesteps    | 462500   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 23.6      |
|    ep_rew_mean        | -2.51e+03 |
| time/                 |           |
|    fps                | 461       |
|    iterations         | 93700     |
|    time_elapsed       | 1015      |
|    total_timesteps    | 468500    |
| train/                |           |
|    entropy_loss       | -0.0402   |
|    explained_variance | 0.0565    |
|    learning_rate      | 0.0007    |
|    n_updates          | 93699     |
|    policy_loss        | -86.5     |
|    value_loss         | 7.97e+07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 23.5      |
|    ep_rew_mean        | -2.22e+03 |
| time/                 |           |
|    fps                | 461       |
|    iterations         | 93800     |
|    time_elapsed       | 1016      |
|    total_timesteps    | 469000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 25       |
|    ep_rew_mean        | 650      |
| time/                 |          |
|    fps                | 460      |
|    iterations         | 95000    |
|    time_elapsed       | 1031     |
|    total_timesteps    | 475000   |
| train/                |          |
|    entropy_loss       | -0.00265 |
|    explained_variance | 0.999    |
|    learning_rate      | 0.0007   |
|    n_updates          | 94999    |
|    policy_loss        | 0.00052  |
|    value_loss         | 3.83     |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25        |
|    ep_rew_mean        | 650       |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 95100     |
|    time_elapsed       | 1032      |
|    total_timesteps    | 475500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 19.4      |
|    ep_rew_mean        | -1.5e+03  |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 96300     |
|    time_elapsed       | 1044      |
|    total_timesteps    | 481500    |
| train/                |           |
|    entropy_loss       | -0.000197 |
|    explained_variance | 1         |
|    learning_rate      | 0.0007    |
|    n_updates          | 96299     |
|    policy_loss        | 9.13e-05  |
|    value_loss         | 36.5      |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 18.5     |
|    ep_rew_mean        | -535     |
| time/                 |          |
|    fps                | 460      |
|    iterations         | 96400    |
|    time_elapsed       | 1045     |
|    total_timesteps    | 482000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 23.4      |
|    ep_rew_mean        | -25.5     |
| time/                 |           |
|    fps                | 460       |
|    iterations         | 97600     |
|    time_elapsed       | 1059      |
|    total_timesteps    | 488000    |
| train/                |           |
|    entropy_loss       | -0.00196  |
|    explained_variance | 0.996     |
|    learning_rate      | 0.0007    |
|    n_updates          | 97599     |
|    policy_loss        | -0.000718 |
|    value_loss         | 12.7      |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 23.4     |
|    ep_rew_mean        | -22.9    |
| time/                 |          |
|    fps                | 460      |
|    iterations         | 97700    |
|    time_elapsed       | 1060     |
|    total_timesteps    | 488500   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 24       |
|    ep_rew_mean        | 599      |
| time/                 |          |
|    fps                | 458      |
|    iterations         | 98900    |
|    time_elapsed       | 1078     |
|    total_timesteps    | 494500   |
| train/                |          |
|    entropy_loss       | -0.00336 |
|    explained_variance | 0.993    |
|    learning_rate      | 0.0007   |
|    n_updates          | 98899    |
|    policy_loss        | -0.0017  |
|    value_loss         | 9.58     |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 24        |
|    ep_rew_mean        | 599       |
| time/                 |           |
|    fps                | 458       |
|    iterations         | 99000     |
|    time_elapsed       | 1079      |
|    total_timesteps    | 495000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 22.2      |
|    ep_rew_mean        | -5.65e+03 |
| time/                 |           |
|    fps                | 458       |
|    iterations         | 100200    |
|    time_elapsed       | 1093      |
|    total_timesteps    | 501000    |
| train/                |           |
|    entropy_loss       | -0.00597  |
|    explained_variance | 0.742     |
|    learning_rate      | 0.0007    |
|    n_updates          | 100199    |
|    policy_loss        | -0.0258   |
|    value_loss         | 504       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 19.9      |
|    ep_rew_mean        | -3.17e+03 |
| time/                 |           |
|    fps                | 458       |
|    iterations         | 100300    |
|    time_elapsed       | 1094      |
|    total_timesteps    | 501500    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 25.1     |
|    ep_rew_mean        | -4.7e+03 |
| time/                 |          |
|    fps                | 457      |
|    iterations         | 101500   |
|    time_elapsed       | 1109     |
|    total_timesteps    | 507500   |
| train/                |          |
|    entropy_loss       | -0.227   |
|    explained_variance | 0.37     |
|    learning_rate      | 0.0007   |
|    n_updates          | 101499   |
|    policy_loss        | -96.6    |
|    value_loss         | 2.74e+04 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 25       |
|    ep_rew_mean        | -5.3e+03 |
| time/                 |          |
|    fps                | 457      |
|    iterations         | 101600   |
|    time_elapsed       | 1111     |
|    total_timesteps    | 508000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 27.2     |
|    ep_rew_mean        | 369      |
| time/                 |          |
|    fps                | 456      |
|    iterations         | 102800   |
|    time_elapsed       | 1125     |
|    total_timesteps    | 514000   |
| train/                |          |
|    entropy_loss       | -0.0111  |
|    explained_variance | 0.999    |
|    learning_rate      | 0.0007   |
|    n_updates          | 102799   |
|    policy_loss        | -0.00395 |
|    value_loss         | 18.2     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 27       |
|    ep_rew_mean        | 556      |
| time/                 |          |
|    fps                | 456      |
|    iterations         | 102900   |
|    time_elapsed       | 1126     |
|    total_timesteps    | 514500   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 455       |
|    iterations         | 104100    |
|    time_elapsed       | 1141      |
|    total_timesteps    | 520500    |
| train/                |           |
|    entropy_loss       | -0.000943 |
|    explained_variance | 0.992     |
|    learning_rate      | 0.0007    |
|    n_updates          | 104099    |
|    policy_loss        | -0.00016  |
|    value_loss         | 1.74      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 455       |
|    iterations         | 104200    |
|    time_elapsed       | 1143      |
|    total_timesteps    | 521000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 24.5      |
|    ep_rew_mean        | -8.75e+03 |
| time/                 |           |
|    fps                | 455       |
|    iterations         | 105400    |
|    time_elapsed       | 1155      |
|    total_timesteps    | 527000    |
| train/                |           |
|    entropy_loss       | -0.128    |
|    explained_variance | -1.11     |
|    learning_rate      | 0.0007    |
|    n_updates          | 105399    |
|    policy_loss        | 18.6      |
|    value_loss         | 6.45e+04  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25.1      |
|    ep_rew_mean        | -7.22e+03 |
| time/                 |           |
|    fps                | 455       |
|    iterations         | 105500    |
|    time_elapsed       | 1157      |
|    total_timesteps    | 527500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 22.2      |
|    ep_rew_mean        | -9.24e+03 |
| time/                 |           |
|    fps                | 455       |
|    iterations         | 106700    |
|    time_elapsed       | 1170      |
|    total_timesteps    | 533500    |
| train/                |           |
|    entropy_loss       | -0.000293 |
|    explained_variance | 0.963     |
|    learning_rate      | 0.0007    |
|    n_updates          | 106699    |
|    policy_loss        | 0.000318  |
|    value_loss         | 37.6      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 22.9      |
|    ep_rew_mean        | -9.01e+03 |
| time/                 |           |
|    fps                | 455       |
|    iterations         | 106800    |
|    time_elapsed       | 1171      |
|    total_timesteps    | 534000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 39.4      |
|    ep_rew_mean        | -6.85e+03 |
| time/                 |           |
|    fps                | 456       |
|    iterations         | 108000    |
|    time_elapsed       | 1182      |
|    total_timesteps    | 540000    |
| train/                |           |
|    entropy_loss       | -0.000121 |
|    explained_variance | -88.4     |
|    learning_rate      | 0.0007    |
|    n_updates          | 107999    |
|    policy_loss        | -0.00108  |
|    value_loss         | 7.48e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40.1      |
|    ep_rew_mean        | -6.22e+03 |
| time/                 |           |
|    fps                | 456       |
|    iterations         | 108100    |
|    time_elapsed       | 1183      |
|    total_timesteps    | 540500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 41.9      |
|    ep_rew_mean        | -6.89e+03 |
| time/                 |           |
|    fps                | 457       |
|    iterations         | 109300    |
|    time_elapsed       | 1195      |
|    total_timesteps    | 546500    |
| train/                |           |
|    entropy_loss       | -0.000564 |
|    explained_variance | 0.529     |
|    learning_rate      | 0.0007    |
|    n_updates          | 109299    |
|    policy_loss        | 0.00366   |
|    value_loss         | 7.16e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | -5.63e+03 |
| time/                 |           |
|    fps                | 457       |
|    iterations         | 109400    |
|    time_elapsed       | 1196      |
|    total_timesteps    | 547000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 37.8      |
|    ep_rew_mean        | -2.68e+03 |
| time/                 |           |
|    fps                | 456       |
|    iterations         | 110600    |
|    time_elapsed       | 1210      |
|    total_timesteps    | 553000    |
| train/                |           |
|    entropy_loss       | -0.0139   |
|    explained_variance | 0.926     |
|    learning_rate      | 0.0007    |
|    n_updates          | 110599    |
|    policy_loss        | 0.158     |
|    value_loss         | 3.3e+03   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 38.6      |
|    ep_rew_mean        | -2.42e+03 |
| time/                 |           |
|    fps                | 456       |
|    iterations         | 110700    |
|    time_elapsed       | 1211      |
|    total_timesteps    | 553500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 42.1      |
|    ep_rew_mean        | -4.27e+03 |
| time/                 |           |
|    fps                | 456       |
|    iterations         | 111900    |
|    time_elapsed       | 1225      |
|    total_timesteps    | 559500    |
| train/                |           |
|    entropy_loss       | -0.000463 |
|    explained_variance | 0.307     |
|    learning_rate      | 0.0007    |
|    n_updates          | 111899    |
|    policy_loss        | -0.000456 |
|    value_loss         | 6.83e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 42.1      |
|    ep_rew_mean        | -4.58e+03 |
| time/                 |           |
|    fps                | 456       |
|    iterations         | 112000    |
|    time_elapsed       | 1226      |
|    total_timesteps    | 560000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 31.4      |
|    ep_rew_mean        | -2.64e+03 |
| time/                 |           |
|    fps                | 455       |
|    iterations         | 113200    |
|    time_elapsed       | 1241      |
|    total_timesteps    | 566000    |
| train/                |           |
|    entropy_loss       | -2.71e-05 |
|    explained_variance | -0.873    |
|    learning_rate      | 0.0007    |
|    n_updates          | 113199    |
|    policy_loss        | -7.49e-05 |
|    value_loss         | 928       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.7      |
|    ep_rew_mean        | -1.24e+03 |
| time/                 |           |
|    fps                | 455       |
|    iterations         | 113300    |
|    time_elapsed       | 1243      |
|    total_timesteps    | 566500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 20.7      |
|    ep_rew_mean        | -5.49e+03 |
| time/                 |           |
|    fps                | 454       |
|    iterations         | 114500    |
|    time_elapsed       | 1260      |
|    total_timesteps    | 572500    |
| train/                |           |
|    entropy_loss       | -0.00199  |
|    explained_variance | 0.815     |
|    learning_rate      | 0.0007    |
|    n_updates          | 114499    |
|    policy_loss        | 0.000704  |
|    value_loss         | 100       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 16.7      |
|    ep_rew_mean        | -9.07e+03 |
| time/                 |           |
|    fps                | 454       |
|    iterations         | 114600    |
|    time_elapsed       | 1262      |
|    total_timesteps    | 573000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 452       |
|    iterations         | 115800    |
|    time_elapsed       | 1279      |
|    total_timesteps    | 579000    |
| train/                |           |
|    entropy_loss       | -0.000323 |
|    explained_variance | 0.933     |
|    learning_rate      | 0.0007    |
|    n_updates          | 115799    |
|    policy_loss        | 0.000263  |
|    value_loss         | 90        |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 27       |
|    ep_rew_mean        | 756      |
| time/                 |          |
|    fps                | 452      |
|    iterations         | 115900   |
|    time_elapsed       | 1281     |
|    total_timesteps    | 579500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 450       |
|    iterations         | 117100    |
|    time_elapsed       | 1300      |
|    total_timesteps    | 585500    |
| train/                |           |
|    entropy_loss       | -0.000172 |
|    explained_variance | 0.245     |
|    learning_rate      | 0.0007    |
|    n_updates          | 117099    |
|    policy_loss        | 5.2e-05   |
|    value_loss         | 3.65      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 450       |
|    iterations         | 117200    |
|    time_elapsed       | 1302      |
|    total_timesteps    | 586000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 448       |
|    iterations         | 118400    |
|    time_elapsed       | 1320      |
|    total_timesteps    | 592000    |
| train/                |           |
|    entropy_loss       | -0.00134  |
|    explained_variance | 1         |
|    learning_rate      | 0.0007    |
|    n_updates          | 118399    |
|    policy_loss        | -0.000353 |
|    value_loss         | 12.4      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 448       |
|    iterations         | 118500    |
|    time_elapsed       | 1322      |
|    total_timesteps    | 592500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 446       |
|    iterations         | 119700    |
|    time_elapsed       | 1341      |
|    total_timesteps    | 598500    |
| train/                |           |
|    entropy_loss       | -0.000132 |
|    explained_variance | 0.991     |
|    learning_rate      | 0.0007    |
|    n_updates          | 119699    |
|    policy_loss        | -5.95e-05 |
|    value_loss         | 21.7      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 445       |
|    iterations         | 119800    |
|    time_elapsed       | 1343      |
|    total_timesteps    | 599000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 444       |
|    iterations         | 121000    |
|    time_elapsed       | 1362      |
|    total_timesteps    | 605000    |
| train/                |           |
|    entropy_loss       | -0.00031  |
|    explained_variance | 0.997     |
|    learning_rate      | 0.0007    |
|    n_updates          | 120999    |
|    policy_loss        | -3.94e-05 |
|    value_loss         | 3.47      |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 27       |
|    ep_rew_mean        | 756      |
| time/                 |          |
|    fps                | 443      |
|    iterations         | 121100   |
|    time_elapsed       | 1364     |
|    total_timesteps    | 605500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 441       |
|    iterations         | 122300    |
|    time_elapsed       | 1384      |
|    total_timesteps    | 611500    |
| train/                |           |
|    entropy_loss       | -0.000179 |
|    explained_variance | 0.985     |
|    learning_rate      | 0.0007    |
|    n_updates          | 122299    |
|    policy_loss        | 9.36e-06  |
|    value_loss         | 0.448     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 441       |
|    iterations         | 122400    |
|    time_elapsed       | 1386      |
|    total_timesteps    | 612000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 440       |
|    iterations         | 123600    |
|    time_elapsed       | 1403      |
|    total_timesteps    | 618000    |
| train/                |           |
|    entropy_loss       | -0.000454 |
|    explained_variance | 1         |
|    learning_rate      | 0.0007    |
|    n_updates          | 123599    |
|    policy_loss        | 8.03e-05  |
|    value_loss         | 2.87      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27        |
|    ep_rew_mean        | 756       |
| time/                 |           |
|    fps                | 440       |
|    iterations         | 123700    |
|    time_elapsed       | 1404      |
|    total_timesteps    | 618500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 35.5      |
|    ep_rew_mean        | -7.07e+03 |
| time/                 |           |
|    fps                | 439       |
|    iterations         | 124900    |
|    time_elapsed       | 1420      |
|    total_timesteps    | 624500    |
| train/                |           |
|    entropy_loss       | -0.00958  |
|    explained_variance | 0.119     |
|    learning_rate      | 0.0007    |
|    n_updates          | 124899    |
|    policy_loss        | 0.0745    |
|    value_loss         | 3.6e+03   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 35.2      |
|    ep_rew_mean        | -8.09e+03 |
| time/                 |           |
|    fps                | 439       |
|    iterations         | 125000    |
|    time_elapsed       | 1421      |
|    total_timesteps    | 625000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 39.4      |
|    ep_rew_mean        | -6.57e+03 |
| time/                 |           |
|    fps                | 439       |
|    iterations         | 126200    |
|    time_elapsed       | 1436      |
|    total_timesteps    | 631000    |
| train/                |           |
|    entropy_loss       | -0.0246   |
|    explained_variance | -3.97     |
|    learning_rate      | 0.0007    |
|    n_updates          | 126199    |
|    policy_loss        | 1.14      |
|    value_loss         | 6.53e+04  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 39        |
|    ep_rew_mean        | -6e+03    |
| time/                 |           |
|    fps                | 439       |
|    iterations         | 126300    |
|    time_elapsed       | 1437      |
|    total_timesteps    | 631500    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 38.1     |
|    ep_rew_mean        | -8.2e+03 |
| time/                 |          |
|    fps                | 438      |
|    iterations         | 127500   |
|    time_elapsed       | 1453     |
|    total_timesteps    | 637500   |
| train/                |          |
|    entropy_loss       | -0.0698  |
|    explained_variance | 0.809    |
|    learning_rate      | 0.0007   |
|    n_updates          | 127499   |
|    policy_loss        | 66.2     |
|    value_loss         | 7.97e+03 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 36.7      |
|    ep_rew_mean        | -8.59e+03 |
| time/                 |           |
|    fps                | 438       |
|    iterations         | 127600    |
|    time_elapsed       | 1454      |
|    total_timesteps    | 638000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.1      |
|    ep_rew_mean        | -2.66e+03 |
| time/                 |           |
|    fps                | 437       |
|    iterations         | 128800    |
|    time_elapsed       | 1471      |
|    total_timesteps    | 644000    |
| train/                |           |
|    entropy_loss       | -0.000723 |
|    explained_variance | -0.561    |
|    learning_rate      | 0.0007    |
|    n_updates          | 128799    |
|    policy_loss        | 0.000717  |
|    value_loss         | 4.8e+03   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 29.8      |
|    ep_rew_mean        | -2.45e+03 |
| time/                 |           |
|    fps                | 437       |
|    iterations         | 128900    |
|    time_elapsed       | 1472      |
|    total_timesteps    | 644500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.6      |
|    ep_rew_mean        | -4.81e+03 |
| time/                 |           |
|    fps                | 436       |
|    iterations         | 130100    |
|    time_elapsed       | 1489      |
|    total_timesteps    | 650500    |
| train/                |           |
|    entropy_loss       | -0.173    |
|    explained_variance | -1.17     |
|    learning_rate      | 0.0007    |
|    n_updates          | 130099    |
|    policy_loss        | 12.8      |
|    value_loss         | 4.89e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 26.3      |
|    ep_rew_mean        | -6.04e+03 |
| time/                 |           |
|    fps                | 436       |
|    iterations         | 130200    |
|    time_elapsed       | 1490      |
|    total_timesteps    | 651000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 36.5      |
|    ep_rew_mean        | -5.25e+03 |
| time/                 |           |
|    fps                | 435       |
|    iterations         | 131400    |
|    time_elapsed       | 1507      |
|    total_timesteps    | 657000    |
| train/                |           |
|    entropy_loss       | -0.00163  |
|    explained_variance | 0.00515   |
|    learning_rate      | 0.0007    |
|    n_updates          | 131399    |
|    policy_loss        | 0.00728   |
|    value_loss         | 1.42e+04  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 36.4      |
|    ep_rew_mean        | -4.16e+03 |
| time/                 |           |
|    fps                | 435       |
|    iterations         | 131500    |
|    time_elapsed       | 1509      |
|    total_timesteps    | 657500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40.1      |
|    ep_rew_mean        | 1.35e+03  |
| time/                 |           |
|    fps                | 433       |
|    iterations         | 132700    |
|    time_elapsed       | 1528      |
|    total_timesteps    | 663500    |
| train/                |           |
|    entropy_loss       | -0.00149  |
|    explained_variance | 0.997     |
|    learning_rate      | 0.0007    |
|    n_updates          | 132699    |
|    policy_loss        | -7.97e-06 |
|    value_loss         | 25.4      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 433       |
|    iterations         | 132800    |
|    time_elapsed       | 1530      |
|    total_timesteps    | 664000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 432       |
|    iterations         | 134000    |
|    time_elapsed       | 1550      |
|    total_timesteps    | 670000    |
| train/                |           |
|    entropy_loss       | -4.98e-05 |
|    explained_variance | 0.996     |
|    learning_rate      | 0.0007    |
|    n_updates          | 133999    |
|    policy_loss        | -1.07e-05 |
|    value_loss         | 10.4      |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 40       |
|    ep_rew_mean        | 1.64e+03 |
| time/                 |          |
|    fps                | 431      |
|    iterations         | 134100   |
|    time_elapsed       | 1552     |
|    total_timesteps    | 670500   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 40       |
|    ep_rew_mean        | 1.64e+03 |
| time/                 |          |
|    fps                | 428      |
|    iterations         | 135300   |
|    time_elapsed       | 1578     |
|    total_timesteps    | 676500   |
| train/                |          |
|    entropy_loss       | -0.00152 |
|    explained_variance | 0.999    |
|    learning_rate      | 0.0007   |
|    n_updates          | 135299   |
|    policy_loss        | 0.00227  |
|    value_loss         | 170      |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 428       |
|    iterations         | 135400    |
|    time_elapsed       | 1580      |
|    total_timesteps    | 677000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 39.8      |
|    ep_rew_mean        | 1.43e+03  |
| time/                 |           |
|    fps                | 426       |
|    iterations         | 136600    |
|    time_elapsed       | 1602      |
|    total_timesteps    | 683000    |
| train/                |           |
|    entropy_loss       | -0.000743 |
|    explained_variance | 0.999     |
|    learning_rate      | 0.0007    |
|    n_updates          | 136599    |
|    policy_loss        | -8.37e-05 |
|    value_loss         | 14.1      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 39.8      |
|    ep_rew_mean        | 1.43e+03  |
| time/                 |           |
|    fps                | 425       |
|    iterations         | 136700    |
|    time_elapsed       | 1604      |
|    total_timesteps    | 683500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 424       |
|    iterations         | 137900    |
|    time_elapsed       | 1624      |
|    total_timesteps    | 689500    |
| train/                |           |
|    entropy_loss       | -5.52e-05 |
|    explained_variance | 0.99      |
|    learning_rate      | 0.0007    |
|    n_updates          | 137899    |
|    policy_loss        | -5.54e-06 |
|    value_loss         | 2.68      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 424       |
|    iterations         | 138000    |
|    time_elapsed       | 1625      |
|    total_timesteps    | 690000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 40       |
|    ep_rew_mean        | 1.64e+03 |
| time/                 |          |
|    fps                | 423      |
|    iterations         | 139200   |
|    time_elapsed       | 1642     |
|    total_timesteps    | 696000   |
| train/                |          |
|    entropy_loss       | -0.0008  |
|    explained_variance | 0.996    |
|    learning_rate      | 0.0007   |
|    n_updates          | 139199   |
|    policy_loss        | 0.00103  |
|    value_loss         | 119      |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 423       |
|    iterations         | 139300    |
|    time_elapsed       | 1644      |
|    total_timesteps    | 696500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 421       |
|    iterations         | 140500    |
|    time_elapsed       | 1665      |
|    total_timesteps    | 702500    |
| train/                |           |
|    entropy_loss       | -5.45e-05 |
|    explained_variance | 0.992     |
|    learning_rate      | 0.0007    |
|    n_updates          | 140499    |
|    policy_loss        | 7.08e-06  |
|    value_loss         | 3.92      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 421       |
|    iterations         | 140600    |
|    time_elapsed       | 1667      |
|    total_timesteps    | 703000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 40       |
|    ep_rew_mean        | 1.64e+03 |
| time/                 |          |
|    fps                | 420      |
|    iterations         | 141800   |
|    time_elapsed       | 1687     |
|    total_timesteps    | 709000   |
| train/                |          |
|    entropy_loss       | -0.00082 |
|    explained_variance | 1        |
|    learning_rate      | 0.0007   |
|    n_updates          | 141799   |
|    policy_loss        | 0.000455 |
|    value_loss         | 31.8     |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 419       |
|    iterations         | 141900    |
|    time_elapsed       | 1689      |
|    total_timesteps    | 709500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 418       |
|    iterations         | 143100    |
|    time_elapsed       | 1710      |
|    total_timesteps    | 715500    |
| train/                |           |
|    entropy_loss       | -5.11e-05 |
|    explained_variance | 0.998     |
|    learning_rate      | 0.0007    |
|    n_updates          | 143099    |
|    policy_loss        | -4.06e-06 |
|    value_loss         | 1.69      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 418       |
|    iterations         | 143200    |
|    time_elapsed       | 1711      |
|    total_timesteps    | 716000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 416       |
|    iterations         | 144400    |
|    time_elapsed       | 1733      |
|    total_timesteps    | 722000    |
| train/                |           |
|    entropy_loss       | -0.000861 |
|    explained_variance | 0.999     |
|    learning_rate      | 0.0007    |
|    n_updates          | 144399    |
|    policy_loss        | 0.000824  |
|    value_loss         | 60.5      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 40        |
|    ep_rew_mean        | 1.64e+03  |
| time/                 |           |
|    fps                | 416       |
|    iterations         | 144500    |
|    time_elapsed       | 1735      |
|    total_timesteps    | 722500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34.8      |
|    ep_rew_mean        | -8.75e+03 |
| time/                 |           |
|    fps                | 416       |
|    iterations         | 145700    |
|    time_elapsed       | 1749      |
|    total_timesteps    | 728500    |
| train/                |           |
|    entropy_loss       | -0.00013  |
|    explained_variance | 0.267     |
|    learning_rate      | 0.0007    |
|    n_updates          | 145699    |
|    policy_loss        | -0.00936  |
|    value_loss         | 1.4e+07   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34.8      |
|    ep_rew_mean        | -8.75e+03 |
| time/                 |           |
|    fps                | 416       |
|    iterations         | 145800    |
|    time_elapsed       | 1750      |
|    total_timesteps    | 729000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34.9      |
|    ep_rew_mean        | -8.75e+03 |
| time/                 |           |
|    fps                | 416       |
|    iterations         | 147000    |
|    time_elapsed       | 1762      |
|    total_timesteps    | 735000    |
| train/                |           |
|    entropy_loss       | -0.00736  |
|    explained_variance | 0.991     |
|    learning_rate      | 0.0007    |
|    n_updates          | 146999    |
|    policy_loss        | -0.00407  |
|    value_loss         | 24.2      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 34.9      |
|    ep_rew_mean        | -8.75e+03 |
| time/                 |           |
|    fps                | 416       |
|    iterations         | 147100    |
|    time_elapsed       | 1763      |
|    total_timesteps    | 735500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 29.6      |
|    ep_rew_mean        | -9.04e+03 |
| time/                 |           |
|    fps                | 416       |
|    iterations         | 148300    |
|    time_elapsed       | 1780      |
|    total_timesteps    | 741500    |
| train/                |           |
|    entropy_loss       | -0.00193  |
|    explained_variance | 0.721     |
|    learning_rate      | 0.0007    |
|    n_updates          | 148299    |
|    policy_loss        | -0.0145   |
|    value_loss         | 2.64e+03  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 25.8      |
|    ep_rew_mean        | -9.25e+03 |
| time/                 |           |
|    fps                | 416       |
|    iterations         | 148400    |
|    time_elapsed       | 1781      |
|    total_timesteps    | 742000    |
| train/    

KeyboardInterrupt: 

In [None]:
print('Total time taken: {} min'.format((end - start)/60))

### Design by AI

In [None]:
obs = env.reset()

In [None]:
i=0
while i<100:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    if env.M.break_flag:
        break
    i+=1   

In [None]:
print(i)

In [None]:
env.render() 

In [None]:
env.M.length()

In [None]:
FEA_output_arr=env.M.FEA()
env.M.max_u(FEA_output_arr)