In [1]:
import sys
import os
import math
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
# from osim.env import ProstheticsEnv
from prosthetics_env_with_history import ProstheticsEnvWithHistory
from td3 import TD3
# from replay_buffer import ReplayBuffer
from env_history_sampler import EnvHistorySampler


In [2]:
CONFIG = {
    "env": {
        "integrator_accuracy": 2e-3,
    },
    "model": {
        "architecture": "TD3",
    },
    "training": {
        "episode_save_load_file": "prosthetics_env_history.h5",
        "checkpoint_save_load_prefix": "checkpoint_TD3",
        "start_timesteps": 5e3, # How many time steps purely random policy is run for
        "eval_freq": 5e3, # How often (time steps) we evaluate
        "max_timesteps": 1e8, # Max time steps to train for
        "max_episode_steps": 300, # Max number of steps to run for a single episode
        "expl_noise": 0.5, # Std of Gaussian exploration noise  # was 0.1
        "batch_size": 100, # Batch size for both actor and critic
        "discount": 0.99, # Discount factor
        "tau": 0.005, # Target network update rate
        "policy_noise": 0.2, # Noise added to target policy during critic update
        "noise_clip": 0.5, # Range to clip target policy noise
        "policy_freq": 2, # Frequency of delayed policy updates
        "frameskip": 5, # Max Frameskip steps
    }
}


In [3]:
OUTPUT_DIR = Path('.')
LOGS_DIR = OUTPUT_DIR/'logs'
CHECKPOINTS_DIR = OUTPUT_DIR/'checkpoints'
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(LOGS_DIR, exist_ok=True)
os.makedirs(CHECKPOINTS_DIR, exist_ok=True)


### Observation Hacking

- Rewrite all joint_pos, body_pos to be relative to mass_center_pos
- Subtract mass_center_vel and mass_center_acc from joint_vel, body_vel, joint_acc, body_acc?
- Either compute jounce/snap, or pass multiple timesteps, or just pass acceleration from past 3 timesteps?

Initial Env Observation:
```
{
    'joint_pos': {
        'ground_pelvis': [0.0, 0.0, 0.0, 0.0, 0.94, 0.0],
        'hip_r': [0.0, 0.0, 0.0],
        'knee_r': [0.0],
        'ankle_r': [0.0],
        'hip_l': [0.0, 0.0, 0.0],
        'knee_l': [0.0],
        'ankle_l': [0.0],
        'subtalar_l': [],
        'mtp_l': [],
        'back': [-0.0872665],
        'back_0': []
    },
    'joint_vel': {
        'ground_pelvis': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
        'hip_r': [0.0, 0.0, 0.0],
        'knee_r': [0.0],
        'ankle_r': [0.0],
        'hip_l': [0.0, 0.0, 0.0],
        'knee_l': [0.0],
        'ankle_l': [0.0],
        'subtalar_l': [],
        'mtp_l': [],
        'back': [0.0],
        'back_0': []
    },
    'joint_acc': {
        'ground_pelvis': [34.07237489546962, 3.219284560937942, 0.021285761200362296, 13.997154494145377, 0.8655672359505977, -0.6156967622871027],
        'hip_r': [-194.74323476194263, -4.441803696780512, 1.5931700403370996e-14],
        'knee_r': [305.46152469620915],
        'ankle_r': [9636.363025843913],
        'hip_l': [-208.86020665024324, 3.5702556374966354, -4.2521541843143495e-14],
        'knee_l': [399.3192427973721],
        'ankle_l': [809.4478175113452],
        'subtalar_l': [],
        'mtp_l': [],
        'back': [-2.3092638912203256e-14],
        'back_0': []
    },
    'body_pos': {
        'pelvis': [0.0, 0.94, 0.0],
        'femur_r': [-0.0707, 0.8738999999999999, 0.0835],
        'pros_tibia_r': [-0.07519985651753601, 0.47807930355164957, 0.0835],
        'pros_foot_r': [-0.07519985651753601, 0.04807930355164958, 0.0835],
        'femur_l': [-0.0707, 0.8738999999999999, -0.0835],
        'tibia_l': [-0.07519985651753601, 0.47807930355164957, -0.0835],
        'talus_l': [-0.07519985651753601, 0.04807930355164958, -0.0835],
        'calcn_l': [-0.123969856517536, 0.006129303551649576, -0.09142],
        'toes_l': [0.05483014348246398, 0.004129303551649576, -0.0925],
        'torso': [-0.1007, 1.0214999999999999, 0.0],
        'head': [-0.052764320996907754, 1.5694070821576522, 0.0]
    },
    'body_vel': {
        'pelvis': [0.0, 0.0, 0.0],
        'femur_r': [0.0, 0.0, 0.0],
        'pros_tibia_r': [0.0, 0.0, 0.0],
        'pros_foot_r': [0.0, 0.0, 0.0],
        'femur_l': [0.0, 0.0, 0.0],
        'tibia_l': [0.0, 0.0, 0.0],
        'talus_l': [0.0, 0.0, 0.0],
        'calcn_l': [0.0, 0.0, 0.0],
        'toes_l': [0.0, 0.0, 0.0],
        'torso': [0.0, 0.0, 0.0],
        'head': [0.0, 0.0, 0.0]
    },
    'body_acc': {
        'pelvis': [13.997154494145377, 0.8655672359505977, -0.6156967622871027],
        'femur_r': [16.25111583579615, -1.812159929997423, -0.826986568448235],
        'pros_tibia_r': [-49.070641675940735, 0.12065763836075294, -0.34299240980632545],
        'pros_foot_r': [13.18934420084581, 0.12065763836075294, 0.18269081860597952],
        'femur_l': [16.24756111367569, -1.2745394083207864, -0.826986568448235],
        'tibia_l': [-55.19198970892064, 1.093538716356541, -0.6879691696202773],
        'talus_l': [41.356517039396714, 1.093538716356541, -0.537051606700039],
        'calcn_l': [84.73177709400595, -49.336407951145645, -0.5212902634646581],
        'toes_l': [86.79971256249173, 135.5386990655368, -0.5243942154141731],
        'torso': [11.220255940164602, -2.565520916023193, -0.35118159441778396],
        'head': [-7.448239570993795, -0.9322384901609437, 1.4116668685846663]
    },
    'body_pos_rot': {
        'pelvis': [-0.0, 0.0, -0.0],
        'femur_r': [-0.0, 0.0, -0.0],
        'pros_tibia_r': [-0.0, 0.0, -0.0],
        'pros_foot_r': [-0.0, 0.0, -0.0],
        'femur_l': [-0.0, 0.0, -0.0],
        'tibia_l': [-0.0, 0.0, -0.0],
        'talus_l': [-0.0, 0.0, -0.0],
        'calcn_l': [-0.0, 0.0, -0.0],
        'toes_l': [-0.0, 0.0, -0.0],
        'torso': [-0.0, 0.0, -0.0872665],
        'head': [-0.0, 0.0, -0.0872665]
    },
    'body_vel_rot': {
        'pelvis': [0.0, 0.0, 0.0],
        'femur_r': [0.0, 0.0, 0.0],
        'pros_tibia_r': [0.0, 0.0, 0.0],
        'pros_foot_r': [0.0, 0.0, 0.0],
        'femur_l': [0.0, 0.0, 0.0],
        'tibia_l': [0.0, 0.0, 0.0],
        'talus_l': [0.0, 0.0, 0.0],
        'calcn_l': [0.0, 0.0, 0.0],
        'toes_l': [0.0, 0.0, 0.0],
        'torso': [0.0, 0.0, 0.0],
        'head': [0.0, 0.0, 0.0]
    },
    'body_acc_rot': {
        'pelvis': [3.219284560937942, 0.021285761200362296, 34.07237489546962],
        'femur_r': [-1.2225191358425698, 0.021285761200378228, -160.670859866473],
        'pros_tibia_r': [-1.2225191358425698, 0.021285761200378228, 144.79066482973616],
        'pros_foot_r': [-1.2225191358425698, 0.021285761200378228, 9781.15369067365],
        'femur_l': [-0.35097107655869353, 0.021285761200404818, -174.7878317547736],
        'tibia_l': [-0.35097107655869353, 0.021285761200404818, 224.5314110425985],
        'talus_l': [-0.35097107655869353, 0.021285761200404818, 1033.9792285539438],
        'calcn_l': [-0.35097107655869353, 0.021285761200404818, 1033.9792285539438],
        'toes_l': [-0.35097107655869353, 0.021285761200404818, 1033.9792285539438],
        'torso': [3.219284560937942, 0.021285761200362296, 34.0723748954696],
        'head': [3.219284560937942, 0.021285761200362296, 34.0723748954696]
    },
    'forces': {
        'abd_r': [219.6613927253564],
        'add_r': [144.87433100305103],
        'hamstrings_r': [194.30030504346755],
        'bifemsh_r': [42.728811234363775],
        'glut_max_r': [171.7873509605573],
        'iliopsoas_r': [158.01207984383657],
        'rect_fem_r': [99.0329705435046],
        'vasti_r': [436.79388413623326],
        'abd_l': [219.6613927253564],
        'add_l': [144.87433100305103],
        'hamstrings_l': [194.30030504346755],
        'bifemsh_l': [42.728811234363775],
        'glut_max_l': [171.7873509605573],
        'iliopsoas_l': [158.01207984383657],
        'rect_fem_l': [99.0329705435046],
        'vasti_l': [436.79388413623326],
        'gastroc_l': [273.0178325689043],
        'soleus_l': [370.0059951709156],
        'tib_ant_l': [104.05059952034294],
        'ankleSpring': [-0.0],
        'pros_foot_r_0': [-1.3573320551122304e-12, -388.7553514927188, 0.0, 32.46107184964201, -1.1333722660187127e-13, 3.726164264482634, 1.3573320551122304e-12, 388.7553514927188, 0.0, 0.0, 0.0, 25.50818238819416, 1.3573320551122304e-12, 388.7553514927188, 0.0, 0.0, 0.0, 25.50818238819416],
        'foot_l': [-1.7615592933859115e-12, -504.53063397142085, 0.0, -46.45915575255036, 1.622112753284362e-13, -4.943148065393853, 6.786660275561278e-13, 194.37767574636297, 0.0, 0.0, 0.0, 5.831330272390875, 1.0828932658297836e-12, 310.1529582250579, 0.0, 0.0, 0.0, 6.203059164501164, 1.0828932658297836e-12, 310.1529582250579, 0.0, 0.0, 0.0, 6.203059164501164],
        'HipLimit_r': [0.0, 0.0],
        'HipLimit_l': [0.0, 0.0],
        'KneeLimit_r': [-0.0, 0.0],
        'KneeLimit_l': [-0.0, 0.0],
        'AnkleLimit_r': [0.0, 0.0],
        'AnkleLimit_l': [0.0, 0.0],
        'HipAddLimit_r': [0.0, 0.0],
        'HipAddLimit_l': [0.0, 0.0]
    },
    'muscles': {
        'abd_r': {
            'activation': 0.05,
            'fiber_length': 0.07752306863700548,
            'fiber_velocity': 1.1700156898117815e-13,
            'fiber_force': 219.6613927253564
        },
        'add_r': {
            'activation': 0.05,
            'fiber_length': 0.05526137592854144,
            'fiber_velocity': 5.531257930905764e-11,
            'fiber_force': 146.25768888087705
        },
        'hamstrings_r': {
            'activation': 0.05,
            'fiber_length': 0.06355896214015513,
            'fiber_velocity': 2.1056261406660054e-14,
            'fiber_force': 202.45627069225887
        },
        'bifemsh_r': {
            'activation': 0.05,
            'fiber_length': 0.13434264681417835,
            'fiber_velocity': 9.542198984660805e-17,
            'fiber_force': 45.09919197278584
        },
        'glut_max_r': {
            'activation': 0.05,
            'fiber_length': 0.16084824667171801,
            'fiber_velocity': 1.0181982508865008e-12,
            'fiber_force': 171.7873509605573
        },
        'iliopsoas_r': {
            'activation': 0.05,
            'fiber_length': 0.13005768603600326,
            'fiber_velocity': 3.347183497651294e-11,
            'fiber_force': 159.26525950285387
        },
        'rect_fem_r': {
            'activation': 0.05,
            'fiber_length': 0.06027044615978444,
            'fiber_velocity': 2.2362024955832438e-15,
            'fiber_force': 99.63652479982161
        },
        'vasti_r': {
            'activation': 0.05,
            'fiber_length': 0.07890756873654925,
            'fiber_velocity': 6.168233828156989e-15,
            'fiber_force': 437.7385693769557
        },
        'abd_l': {
            'activation': 0.05,
            'fiber_length': 0.07752306863700548,
            'fiber_velocity': 1.1700156898117815e-13,
            'fiber_force': 219.6613927253564
        },
        'add_l': {
            'activation': 0.05,
            'fiber_length': 0.05526137592854144,
            'fiber_velocity': 5.531257930905764e-11,
            'fiber_force': 146.25768888087705
        },
        'hamstrings_l': {
            'activation': 0.05,
            'fiber_length': 0.06355896214015513,
            'fiber_velocity': 2.1056261406660054e-14,
            'fiber_force': 202.45627069225887
        },
        'bifemsh_l': {
            'activation': 0.05,
            'fiber_length': 0.13434264681417835,
            'fiber_velocity': 9.542198984660805e-17,
            'fiber_force': 45.09919197278584
        },
        'glut_max_l': {
            'activation': 0.05,
            'fiber_length': 0.16084824667171801,
            'fiber_velocity': 1.0181982508865008e-12,
            'fiber_force': 171.7873509605573
        },
        'iliopsoas_l': {
            'activation': 0.05,
            'fiber_length': 0.13005768603600326,
            'fiber_velocity': 3.347183497651294e-11,
            'fiber_force': 159.26525950285387
        },
        'rect_fem_l': {
            'activation': 0.05,
            'fiber_length': 0.06027044615978444,
            'fiber_velocity': 2.2362024955832438e-15,
            'fiber_force': 99.63652479982161
        },
        'vasti_l': {
            'activation': 0.05,
            'fiber_length': 0.07890756873654925,
            'fiber_velocity': 6.168233828156989e-15,
            'fiber_force': 437.7385693769557
        },
        'gastroc_l': {
            'activation': 0.05,
            'fiber_length': 0.05720257668702345,
            'fiber_velocity': 5.718949639274886e-14,
            'fiber_force': 282.79456495087237
        },
        'soleus_l': {
            'activation': 0.05,
            'fiber_length': 0.04494814124106819,
            'fiber_velocity': 3.4120643802478774e-10,
            'fiber_force': 406.4161372187392
        },
        'tib_ant_l': {
            'activation': 0.05,
            'fiber_length': 0.06288000983990409,
            'fiber_velocity': 8.525642140971546e-14,
            'fiber_force': 104.5158690359632
        }
    },
    'markers': {},
    'misc': {
        'mass_center_pos': [-0.08466565561225976, 0.9952730567231536, -0.003576087446004414],
        'mass_center_vel': [0.0, 0.0, 0.0],
        'mass_center_acc': [4.4008799039045146e-14, 2.570832209970726, -1.0237645330147003e-15]
    }
}
```

In [4]:
# Copied & modified from https://github.com/stanfordnmbl/osim-rl/blob/master/osim/env/osim.py#L452
def single_step_env_obs_to_model_obs(env_obs, target_vel=[3,0,0], include_lower_order_values=True):
    env_obs = env_obs.copy()
    has_prosthetic = 'pros_foot_r' in env_obs['body_pos']

    target_vel_x = target_vel[0]
    target_vel_z = target_vel[2]
    eps = 1e-8

    frame = {
        'pos': np.array(env_obs['misc']['mass_center_pos']),
        'vel': np.array(env_obs['misc']['mass_center_vel']),
        'acc': np.array(env_obs['misc']['mass_center_acc']),
    }

    # Transform reference frame from 0,0,0 to center of mass:
    for k, pos in env_obs['body_pos'].items():
        env_obs['body_pos'][k] = list(np.array(pos) - frame['pos'])
    for k, vel in env_obs['body_vel'].items():
        env_obs['body_vel'][k] = list(np.array(vel) - frame['vel'])
    for k, acc in env_obs['body_acc'].items():
        env_obs['body_acc'][k] = list(np.array(acc) - frame['acc'])

    # Normalize body vel/acc based on center of mass vel/acc:
#     for k, vel in env_obs['body_vel'].items():
#         env_obs['body_vel'][k] = list(np.array(vel) / (frame['vel'] + eps))
#     for k, acc in env_obs['body_acc'].items():
#         env_obs['body_acc'][k] = list(np.array(acc) / (frame['acc'] + eps))

    # Collect observation vector
    lower_order = []
    highest_order = []    
    for body_part in ["head","torso","pelvis","femur_l","femur_r","tibia_l","tibia_r","pros_tibia_r","talus_l","talus_r","toes_l","toes_r","pros_foot_r","calcn_l","calcn_r"]:
        if has_prosthetic and body_part in ["toes_r","tibia_r","talus_r","calcn_r"]:
            lower_order += [0] * 12
            highest_order += [0] * 6
            continue
        if not has_prosthetic and body_part in ["pros_foot_r","pros_tibia_r"]:
            lower_order += [0] * 12
            highest_order += [0] * 6
            continue
        lower_order += env_obs["body_pos"][body_part][0:2]
        lower_order += env_obs["body_vel"][body_part][0:2]
        highest_order += env_obs["body_acc"][body_part][0:2]
        lower_order += env_obs["body_pos_rot"][body_part][0:2]
        lower_order += env_obs["body_vel_rot"][body_part][0:2]
        highest_order += env_obs["body_acc_rot"][body_part][0:2]

    for joint in ["ground_pelvis","ankle_l","ankle_r","back","hip_l","hip_r","knee_l","knee_r"]:
        lower_order += env_obs["joint_pos"][joint]
        lower_order += env_obs["joint_vel"][joint]
        highest_order += env_obs["joint_acc"][joint]

    for muscle in sorted(env_obs["muscles"].keys()):
        highest_order += [env_obs["muscles"][muscle]["activation"]]
        highest_order += [env_obs["muscles"][muscle]["fiber_length"]]
        highest_order += [env_obs["muscles"][muscle]["fiber_velocity"]]
        highest_order += [env_obs["muscles"][muscle]["fiber_force"]]

    for force in ['abd_l', 'add_l', 'hamstrings_l', 'bifemsh_l', 'glut_max_l', 'iliopsoas_l', 'rect_fem_l', 'vasti_l', 'gastroc_l', 'soleus_l', 'tib_ant_l', 'ankleSpring', 'pros_foot_r_0', 'foot_l', 'HipLimit_l', 'KneeLimit_l', 'AnkleLimit_l', 'HipAddLimit_l']:
        highest_order += env_obs['forces'][force]
        if not '_l' in force:
            continue
        force = force.replace('_l', '_r')
        if has_prosthetic:
            if force in ['gastroc_r', 'soleus_r', 'tib_ant_r']:
                highest_order += [0]
                continue
            if force in ['foot_r']:
                highest_order += [0] * 24
                continue
        else:
            if force in ['pros_foot_r_0']:
                highest_order += [0] * 18
                continue
        highest_order += env_obs['forces'][force.replace('_l', '_r')]

    # Center of mass
    lower_order += list(frame['pos'])
    lower_order += list(frame['vel'])
    highest_order += list(frame['acc'])

    # Target velocity
    highest_order += [frame['vel'][0] - target_vel_x, frame['vel'][2] - target_vel_z]

    result = highest_order
    if include_lower_order_values:
        result = lower_order + result
    return result

def env_obs_history_to_model_obs(env_obs_history):
    env_obs_history = env_obs_history[-4:]
    # Duplicate first env_obs to ensure we have at least 4 steps of history.
    env_obs_history = env_obs_history[:1] * (4 - len(env_obs_history)) + env_obs_history

    model_obs_steps = [single_step_env_obs_to_model_obs(env_obs_history[-1])]
    model_obs_steps += [single_step_env_obs_to_model_obs(env_obs, include_lower_order_values=False) for env_obs in env_obs_history[:-1]][::-1]
    return np.concatenate(model_obs_steps)


def prepare_model_observation(env):
    df_history = env.history(current_episode_only=True)
    model_obs = env_obs_history_to_model_obs(df_history['obs'].tolist())
    return model_obs


### Action hacking

- binary activations?  Or several "bins"?
- Frameskip?
- Muscles must remain "active" for at least 10 frames once activated?  Randomized?
- Limited number of muscles can fire at one time?
- Handle prosthetic or not (strip activations of nonexistent muscles)


In [5]:
action_state = {}

def reset_frameskip(n=8):
    action_state['n_frameskip'] = n
    action_state['frames_to_skip'] = 0  # Start at 0 so we don't skip the first model action.
    action_state['frameskip_action'] = None
    
def apply_frameskip(model_action):
    if action_state.get('frames_to_skip', 0) == 0:
        # Already skipped enough frames.  Reset counter & cache unskipped frame.
        action_state['frames_to_skip'] = np.random.randint(action_state.get('n_frameskip', 0) + 1)
        action_state['frameskip_action'] = model_action
    else:
        # Need to skip this frame.  Decrement the counter & apply the cached unskipped frame.
        action_state['frames_to_skip'] -= 1
        model_action = action_state.get('frameskip_action')
    return model_action


def prepare_env_action(model_action):
    # model_action is a list of muscle activations

    # Frame skipping
    model_action = apply_frameskip(model_action)

    model_action = np.array(model_action)

    # Binarize the muscle activations
    model_action = model_action.round()
    
    return model_action.tolist()
    

### Reward Hacking

- Survival reward
- Lean forward reward (to avod models which had torso too upright) (may need to be based on speed)
- Reward for minimizing sideways velocity?
- Reward for minimizing vertical velocity (COG)?

In [6]:
def env_obs_to_custom_reward(obs):
    if type(obs) != dict:
        raise ValueError('obs must be a dict (project=False)')

    target_vel_x = 3
    target_vel_z = 0
    eps = 1e-8

    target_vel_theta = -np.arctan(target_vel_z/(target_vel_x+eps)) if target_vel_x >= 0 else np.pi - np.arctan(target_vel_z/(target_vel_x+eps))

    # Parabolic reward/penalty for tracking a target value within some tolerance. 
    # Unit reward: 1 at target value, 0 at `tolerance` distance from target value, negative outside of `tolerance`.
    # Multiply by desired scale factor / magnitude. Don't multiply by too large of a scale factor — it also amplifies the slope.
    # Make the tolerance bigger than you think — it makes the slope more gradual / less severe.
    # `tolerance`: reward is positive if within `tolerance` of target value, else negative.
    val_diff_reward = lambda val, target, tolerance: (1 - ((val - target)/tolerance)**2)
    def radians_diff_wrapped(a1, a2):
        # Make both angles within (-2pi, 2pi)
        a1, a2 = a1 % (2*np.pi), a2 % (2*np.pi)
        # Make both angles positive -- within [0, 2pi)
        a1 = a1 + 2*np.pi if a1 < 0 else a1
        a2 = a2 + 2*np.pi if a2 < 0 else a2
        # Make a1 the smaller angle
        a1, a2 = min(a1, a2), max(a1, a2)
        # Make sure a1 is within pi of a2 — two angles can't be greater than pi apart in relative (wrapped) sense.
        a1 = a1 + 2*np.pi if a2 - a1 > np.pi else a1
        return np.fabs(a2 - a1)
    
    avg_knee_joint_pos = (obs['joint_pos']['knee_l'][0] + obs['joint_pos']['knee_r'][0]) / 2
    target_avg_knee_joint_pos = 60*np.pi/180
    
    rewards = {
        'survival': 100,
        'target_velocity_x': 3 * val_diff_reward(obs['misc']['mass_center_vel'][0], target_vel_x, 5), # 3 at target velocity, 0 at 3m/s off-target, then negative
        'target_velocity_z': 3 * val_diff_reward(obs['misc']['mass_center_vel'][2], target_vel_z, 5), # 3 at target velocity, 0 at 3m/s off-target, then negative
        'head_velocity_x': 2 * val_diff_reward(obs['body_vel']['head'][0], target_vel_x, 5), # 2 at target velocity, 0 at 3m/s off-target, then negative
        'head_velocity_z': 2 * val_diff_reward(obs['body_vel']['head'][2], target_vel_z, 5), # 2 at target velocity, 0 at 3m/s off-target, then negative
        'lean_forward_x': 5 * val_diff_reward(obs['body_pos']['head'][0] - obs['body_pos']['pelvis'][0], .1 * target_vel_x, .4), # head in front of pelvis from perspective of velocity vector
        'lean_forward_z': 5 * val_diff_reward(obs['body_pos']['head'][2] - obs['body_pos']['pelvis'][2], .1 * target_vel_z, .4), # head in front of pelvis from perspective of velocity vector
        'hips_squared': 5 * val_diff_reward(radians_diff_wrapped(target_vel_theta, obs['body_pos_rot']['pelvis'][1]), 0, np.pi),
        'knee_bent_l': 5 * val_diff_reward(obs['joint_pos']['knee_l'][0], target_avg_knee_joint_pos, np.pi), # goal range of roughly [0,120] degrees
        'knee_bent_r': 5 * val_diff_reward(obs['joint_pos']['knee_r'][0], target_avg_knee_joint_pos, np.pi), # goal range of roughly [0,120] degrees
        'low_y_vel_pelvis': 5 * val_diff_reward(obs['body_vel']['pelvis'][1], 0, 1),
        'low_y_vel_head': 5 * val_diff_reward(obs['body_vel']['head'][1], 0, 1),
        'low_y_vel_toes_l': 5 * val_diff_reward(obs['body_vel']['toes_l'][1], 0, 1),
        'low_y_vel_pros_foot_r': 5 * val_diff_reward(obs['body_vel']['pros_foot_r'][1], 0, 1),
        'knees_opposite_joint_vel': 0 if avg_knee_joint_pos < (target_avg_knee_joint_pos - 15*np.pi/180) else 3 * val_diff_reward(obs['joint_vel']['knee_l'][0], -obs['joint_vel']['knee_r'][0], np.pi), # The left knee should be opening when the right knee is closing, and vice versa
        'feet_behind_mass_x': 5 * val_diff_reward(obs['misc']['mass_center_pos'][0] - (obs['body_pos']['toes_l'][0] + obs['body_pos']['pros_foot_r'][0])/2, .1 * target_vel_x, .4),
        'feet_behind_mass_z': 5 * val_diff_reward(obs['misc']['mass_center_pos'][2] - (obs['body_pos']['toes_l'][2] + obs['body_pos']['pros_foot_r'][2])/2, .1 * target_vel_z, .4),
        'one_foot_off_ground': 0,
        'femurs_parallel': 0,
        'absolute_foot_velocity': 0, # should be 0 at ground, 2x body velocity above ground
        'forefoot_strike': 0,
    }    

#     if should_abort_episode(obs, custom_rewards=rewards):
#         rewards['abort_episode'] = -100

    return rewards
    

### Episode Hacking (Custom "done" criteria)


In [7]:
def should_abort_episode(env_obs, custom_rewards=None, verbose=False):
#     print((np.array(env_obs['body_pos_rot']['torso'])*180/math.pi > 60).any())
#     if env_obs['body_pos_rot']['torso'][2] < -0.2:
#         return True
    rewards = custom_rewards if custom_rewards != None else env_obs_to_custom_reward(env_obs)
    # print(f'Custom reward: {sum(rewards.values())}')
    if (env_obs['body_pos']['head'][0] - env_obs['body_pos']['pelvis'][0]) < -.2:
        if verbose: print(f'Aborting episode due to head being > .2m behind the pelvis ({env_obs["body_pos"]["head"][0] - env_obs["body_pos"]["pelvis"][0]}).')
        return True
    if np.fabs(env_obs['body_pos']['head'][2]) > 0.5:
        if verbose: print(f'Aborting episode due to head being > 0.5m away from centerline ({env_obs["body_pos"]["head"][2]}).')
        return True
    if sum(rewards.values()) < -10:
        if verbose:
            print(f'Aborting episode due to custom reward < -10 ({sum(rewards.values())}):')
            for k,v in rewards.items():
                if v < 0:
                    print(f'  reward `{k}` = {v}')
        return True
    return False
    

### Policy Evaluator

In [8]:
# Runs policy for X episodes and returns average reward
def evaluate_episode(policy):
    obs = env.reset(**env_step_kwargs)
    reset_frameskip(0)
    done = False
    total_reward = 0
    while not done:
        action = policy.select_action(prepare_model_observation(env))
        action = prepare_env_action(action)
        obs, reward, done, _ = env.step(action, **env_step_kwargs)
        
        # We don't use the custom rewards here because we want to evaluate our progress against the environment's reward.
        # obs_dict = env.get_state_desc()
        # custom_rewards = compute_rewards(obs_dict)
        # total_rewared += reward + sum(custom_rewards.values())

        total_reward += reward
    return total_reward

def evaluate_policy(policy, eval_episodes=10):
    avg_reward = 0.
    for _ in tqdm(range(eval_episodes), desc="Evaluating policy", unit="episode"):
        avg_reward += evaluate_episode(policy)

    avg_reward /= eval_episodes

    print("---------------------------------------")
    print("Evaluation over %d episodes: %f" % (eval_episodes, avg_reward))
    print("---------------------------------------")
    return avg_reward



In [9]:
env = ProstheticsEnvWithHistory(visualize=False, integrator_accuracy=CONFIG['env']['integrator_accuracy'])
env_step_kwargs = {'project': False}

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [10]:
# state_dim = env.observation_space.shape[0]
env.reset(**env_step_kwargs)
state_dim = prepare_model_observation(env).shape[0]
action_dim = env.action_space.shape[0]
max_action = int(env.action_space.high[0])
state_dim, action_dim, max_action


(1260, 19, 1)

In [11]:
policy = TD3(state_dim, action_dim, max_action)

In [12]:
try:
    print(f'Loading checkpoint from {CHECKPOINTS_DIR/CONFIG["training"]["checkpoint_save_load_prefix"]}*')
    policy.load(CHECKPOINTS_DIR, CONFIG["training"]["checkpoint_save_load_prefix"])
except:
    print("Failed to load existing model checkpoint — this is normal if you're trying to train a new model (not from a checkpoint).")
    pass


Loading checkpoint from checkpoints/checkpoint_TD3*
Failed to load existing model checkpoint — this is normal if you're trying to train a new model (not from a checkpoint).


In [13]:
# replay_buffer = ReplayBuffer()

In [14]:
# %%timeit -n1 -r1
# evaluations = [evaluate_policy(policy)]
evaluations = []


In [15]:
# Load saved episodes from disk
if os.path.exists(CONFIG['training']['episode_save_load_file']):
    df_saved_episodes = pd.read_hdf(CONFIG['training']['episode_save_load_file'])
else:
    df_saved_episodes = pd.DataFrame(columns=env.history().columns)
    
print(f'Loaded {len(df_saved_episodes)} saved episode timesteps.')


Loaded 0 saved episode timesteps.


In [16]:
# Initialize based on loaded episode history
total_timesteps = len(df_saved_episodes)
timesteps_since_eval = len(df_saved_episodes) % CONFIG['training']['eval_freq']
episode_num = len(df_saved_episodes['episode_uuid'].unique()) - 1
done = True
episode_timesteps = df_saved_episodes['i_step'].iloc[-1] if total_timesteps > 0 else 0
total_timesteps, timesteps_since_eval, episode_num, episode_timesteps


(0, 0.0, -1, 0)

In [17]:
while total_timesteps < CONFIG['training']['max_timesteps']:
    if done: 
        if total_timesteps >= CONFIG['training']['start_timesteps']: 
            df_env_history = pd.concat([df_saved_episodes, env.history()], ignore_index=True, copy=False)
            history_sampler = EnvHistorySampler(
                df_env_history,
                env_obs_history_to_model_obs_fn=env_obs_history_to_model_obs, 
                env_obs_custom_reward_fn=lambda obs: sum(env_obs_to_custom_reward(obs).values()),
                env_obs_custom_done_fn=should_abort_episode,
            )
            if CONFIG['model']['architecture'] == "TD3":
                policy.train(
                    history_sampler,#replay_buffer, 
                    episode_timesteps, 
                    CONFIG['training']['batch_size'], 
                    CONFIG['training']['discount'], 
                    CONFIG['training']['tau'], 
                    CONFIG['training']['policy_noise'], 
                    CONFIG['training']['noise_clip'], 
                    CONFIG['training']['policy_freq'],
                )
            else: 
                policy.train(
                    history_sampler,#replay_buffer, 
                    episode_timesteps, 
                    CONFIG['training']['batch_size'], 
                    CONFIG['training']['discount'], 
                    CONFIG['training']['tau']
                )
        
            # Evaluate policy, Checkpoint policy, Checkpoint history
            if timesteps_since_eval >= CONFIG['training']['eval_freq']:
                # Reset evaluation counter
                timesteps_since_eval %= CONFIG['training']['eval_freq']
                # Evaluate policy
                evaluations.append(evaluate_policy(policy))
                # Checkpoint policy
                policy.save(CHECKPOINTS_DIR, f'{CONFIG["training"]["checkpoint_save_load_prefix"]}_episode{episode_num}_eval{evaluations[-1]:.1f}')
                policy.save(CHECKPOINTS_DIR, CONFIG["training"]["checkpoint_save_load_prefix"])
                # Checkpoint history
                pd.concat([df_saved_episodes, env.history()], ignore_index=True, copy=False).to_hdf(CONFIG['training']['episode_save_load_file'], key='df')
                # TODO Log evaluations, etc.
        
        # Reset environment
        obs = env.reset(**env_step_kwargs)
        reset_frameskip(CONFIG['training']['frameskip'])
#         obs_dict = env.get_state_desc()
        done = False
        episode_reward = 0
        episode_timesteps = 0
        episode_num += 1 
    
    # Select action randomly or according to policy
    if total_timesteps < CONFIG['training']['start_timesteps']:
        action = env.action_space.sample()
    else:
        action = policy.select_action(prepare_model_observation(env))
        if CONFIG['training']['expl_noise'] != 0: 
            action += np.random.normal(0, CONFIG['training']['expl_noise'], size=action.shape)

    # Perform action
    action = prepare_env_action(action)
    obs, reward, done, _ = env.step(action, **env_step_kwargs)
#     new_obs_dict = env.get_state_desc()

    if not done:
        done = should_abort_episode(env.get_state_desc(), verbose=True)
    done_bool = 0 if episode_timesteps + 1 == CONFIG['training']['max_episode_steps'] else float(done)

    # custom_rewards = compute_rewards(new_obs_dict)
    episode_reward += reward #+ sum(custom_rewards.values())

    # Store data in replay buffer
#     replay_buffer.add((obs_dict, new_obs_dict, action, reward, done_bool, episode_num))

#     obs = new_obs
#     obs_dict = new_obs_dict

    episode_timesteps += 1
    total_timesteps += 1
    timesteps_since_eval += 1
    
    if done:
        print(f"Total T: {total_timesteps} Episode Num: {episode_num} Episode T: {episode_timesteps} Reward: {episode_reward}")
        sys.stdout.flush()

        

-0.13876400884778411 0.06273868911010606
Aborting episode due to head being > .2m behind the pelvis (-0.20150269795789016).
Total T: 11 Episode Num: 0 Episode T: 11 Reward: 35.43506634798316
-0.15462359669252684 0.059591450039721064
Aborting episode due to head being > .2m behind the pelvis (-0.2142150467322479).
Total T: 27 Episode Num: 1 Episode T: 16 Reward: 34.57707824491329
-0.1503674592430194 0.055268684997500296
Aborting episode due to head being > .2m behind the pelvis (-0.20563614424051968).
Total T: 47 Episode Num: 2 Episode T: 20 Reward: 31.886474990890747
-0.13486544085689717 0.06552271343739556
Aborting episode due to head being > .2m behind the pelvis (-0.20038815429429274).
Total T: 57 Episode Num: 3 Episode T: 10 Reward: 35.54114907912499
-0.14341715950241202 0.07355862041109143
Aborting episode due to head being > .2m behind the pelvis (-0.21697577991350345).
Total T: 69 Episode Num: 4 Episode T: 12 Reward: 41.12720894055058
-0.14645787204372127 0.062358306584047694
Ab

-0.19147309075965144 0.019800976817729323
Aborting episode due to head being > .2m behind the pelvis (-0.21127406757738076).
Total T: 711 Episode Num: 43 Episode T: 27 Reward: 11.335725951884346
-0.14019375239456092 0.06262545976276687
Aborting episode due to head being > .2m behind the pelvis (-0.2028192121573278).
Total T: 724 Episode Num: 44 Episode T: 13 Reward: 35.24279424516603
-0.14415453227333672 0.07207441002376652
Aborting episode due to head being > .2m behind the pelvis (-0.21622894229710324).
Total T: 739 Episode Num: 45 Episode T: 15 Reward: 40.65239371405581
-0.1448257042497914 0.0646612081391531
Aborting episode due to head being > .2m behind the pelvis (-0.2094869123889445).
Total T: 752 Episode Num: 46 Episode T: 13 Reward: 36.95092482782803
-0.15432121187606407 0.055598748394860294
Aborting episode due to head being > .2m behind the pelvis (-0.20991996027092436).
Total T: 767 Episode Num: 47 Episode T: 15 Reward: 32.62538085996184
-0.15128978390600872 0.0586599140336

-0.13931820258087088 0.07217063066935563
Aborting episode due to head being > .2m behind the pelvis (-0.2114888332502265).
Total T: 1339 Episode Num: 86 Episode T: 12 Reward: 40.553722333557694
-0.1399440040539145 0.06885819220436878
Aborting episode due to head being > .2m behind the pelvis (-0.20880219625828328).
Total T: 1350 Episode Num: 87 Episode T: 11 Reward: 38.10137982614998
-0.14594495065143387 0.06896484878896438
Aborting episode due to head being > .2m behind the pelvis (-0.21490979944039823).
Total T: 1361 Episode Num: 88 Episode T: 11 Reward: 38.480205618541525
-0.1364743646336566 0.06412807718833341
Aborting episode due to head being > .2m behind the pelvis (-0.20060244182199).
Total T: 1376 Episode Num: 89 Episode T: 15 Reward: 37.04592650260125
-0.3603978993217228 -0.15803864313633492
Aborting episode due to head being > .2m behind the pelvis (-0.20235925618538786).
Total T: 1426 Episode Num: 90 Episode T: 50 Reward: -105.03291642163438
-0.15193153622342423 0.070183338

-0.1437607126500197 0.06282431160971982
Aborting episode due to head being > .2m behind the pelvis (-0.2065850242597395).
Total T: 2017 Episode Num: 129 Episode T: 19 Reward: 36.56007384574263
-0.1436047925697776 0.06490939176405872
Aborting episode due to head being > .2m behind the pelvis (-0.2085141843338363).
Total T: 2030 Episode Num: 130 Episode T: 13 Reward: 36.97525512259188
-0.14801218627841506 0.06867130991908822
Aborting episode due to head being > .2m behind the pelvis (-0.21668349619750327).
Total T: 2042 Episode Num: 131 Episode T: 12 Reward: 38.098395295080394
-0.14485072846601943 0.06678800140191749
Aborting episode due to head being > .2m behind the pelvis (-0.21163872986793691).
Total T: 2054 Episode Num: 132 Episode T: 12 Reward: 37.43504005044569
-0.14141464401918102 0.06378561057443732
Aborting episode due to head being > .2m behind the pelvis (-0.20520025459361835).
Total T: 2067 Episode Num: 133 Episode T: 13 Reward: 36.362276331703356
-0.1467449434211912 0.06030

-0.14773710897261974 0.058067418161572834
Aborting episode due to head being > .2m behind the pelvis (-0.20580452713419256).
Total T: 2681 Episode Num: 172 Episode T: 14 Reward: 33.20906181432305
-0.1714146298671788 0.039032673165753345
Aborting episode due to head being > .2m behind the pelvis (-0.21044730303293216).
Total T: 2704 Episode Num: 173 Episode T: 23 Reward: 23.140109676331498
-0.14474666311548878 0.06967614074030215
Aborting episode due to head being > .2m behind the pelvis (-0.21442280385579093).
Total T: 2716 Episode Num: 174 Episode T: 12 Reward: 39.403282680947214
-0.1536002113615742 0.04887021858197665
Aborting episode due to head being > .2m behind the pelvis (-0.20247042994355086).
Total T: 2733 Episode Num: 175 Episode T: 17 Reward: 28.398601796289583
-0.14803952769258233 0.06614584986258834
Aborting episode due to head being > .2m behind the pelvis (-0.21418537755517067).
Total T: 2747 Episode Num: 176 Episode T: 14 Reward: 37.38121069572466
-0.14168753419589664 0

-0.14010217693504462 0.06640201079717635
Aborting episode due to head being > .2m behind the pelvis (-0.20650418773222096).
Total T: 3319 Episode Num: 215 Episode T: 13 Reward: 38.29109685983724
-0.1410540482511714 0.06287550412742451
Aborting episode due to head being > .2m behind the pelvis (-0.20392955237859592).
Total T: 3334 Episode Num: 216 Episode T: 15 Reward: 34.05415631127442
-0.14793782524149166 0.07133397052049917
Aborting episode due to head being > .2m behind the pelvis (-0.21927179576199082).
Total T: 3345 Episode Num: 217 Episode T: 11 Reward: 39.55720281780917
-0.15117391685266107 0.06410926302814632
Aborting episode due to head being > .2m behind the pelvis (-0.2152831798808074).
Total T: 3358 Episode Num: 218 Episode T: 13 Reward: 36.20649547076581
-0.20765299680342392 -0.0025353133311828767
Aborting episode due to head being > .2m behind the pelvis (-0.20511768347224105).
Total T: 3401 Episode Num: 219 Episode T: 43 Reward: -3.372765902019471
-0.13663616859516903 0.

-0.14668639793881683 0.05495822333388188
Aborting episode due to head being > .2m behind the pelvis (-0.20164462127269872).
Total T: 3952 Episode Num: 258 Episode T: 15 Reward: 31.721475857769104
-0.14739734110704963 0.058262083765312535
Aborting episode due to head being > .2m behind the pelvis (-0.20565942487236216).
Total T: 3966 Episode Num: 259 Episode T: 14 Reward: 32.803838017093526
-0.1517229348308149 0.0613730183906233
Aborting episode due to head being > .2m behind the pelvis (-0.2130959532214382).
Total T: 3980 Episode Num: 260 Episode T: 14 Reward: 35.29806770236678
-0.13839344123670766 0.06656222731531765
Aborting episode due to head being > .2m behind the pelvis (-0.2049556685520253).
Total T: 3990 Episode Num: 261 Episode T: 10 Reward: 36.21429243562168
-0.14717596450082363 0.06917027134317981
Aborting episode due to head being > .2m behind the pelvis (-0.21634623584400342).
Total T: 4003 Episode Num: 262 Episode T: 13 Reward: 39.391652347745946
-0.15662868317023465 0.04

-0.13320541973209174 0.07093742212585186
Aborting episode due to head being > .2m behind the pelvis (-0.2041428418579436).
Total T: 4761 Episode Num: 301 Episode T: 11 Reward: 39.42037426021604
-0.14654169269360054 0.06628981112464029
Aborting episode due to head being > .2m behind the pelvis (-0.21283150381824084).
Total T: 4774 Episode Num: 302 Episode T: 13 Reward: 37.37513945661467
-0.13690793859495895 0.07323933935752994
Aborting episode due to head being > .2m behind the pelvis (-0.2101472779524889).
Total T: 4785 Episode Num: 303 Episode T: 11 Reward: 40.16382940437964
-0.13589472714211315 0.06552702923345098
Aborting episode due to head being > .2m behind the pelvis (-0.20142175637556414).
Total T: 4800 Episode Num: 304 Episode T: 15 Reward: 37.63004669931166
-0.1410000572238482 0.06512279096102083
Aborting episode due to head being > .2m behind the pelvis (-0.20612284818486903).
Total T: 4814 Episode Num: 305 Episode T: 14 Reward: 36.22105183129365
-0.15419643442351078 0.07337

Train model: 100%|██████████| 19/19 [00:08<00:00,  2.26batch/s]
Evaluating policy: 100%|██████████| 10/10 [03:52<00:00, 23.50s/episode]
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block0_values] [items->['episode_uuid', 'i_step', 'action', 'obs', 'reward', 'done', 'info']]

  return pytables.to_hdf(path_or_buf, key, self, **kwargs)


---------------------------------------
Evaluation over 10 episodes: -355.813508
---------------------------------------
Total T: 5084 Episode Num: 320 Episode T: 69 Reward: -362.8526506398118


Train model: 100%|██████████| 69/69 [00:33<00:00,  2.12batch/s]


-0.47073989734265154 -0.2698600239285581
Aborting episode due to head being > .2m behind the pelvis (-0.2008798734140934).
Total T: 5193 Episode Num: 321 Episode T: 109 Reward: -177.5655357119931


Train model: 100%|██████████| 109/109 [00:54<00:00,  2.07batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5118540948610792).
Total T: 5273 Episode Num: 322 Episode T: 80 Reward: -285.33536162450474


Train model: 100%|██████████| 80/80 [00:39<00:00,  2.08batch/s]


-0.5010674394403609 -0.2948673851279803
Aborting episode due to head being > .2m behind the pelvis (-0.20620005431238064).
Total T: 5351 Episode Num: 323 Episode T: 78 Reward: -200.467297058775


Train model: 100%|██████████| 78/78 [00:39<00:00,  2.07batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5065480627087413).
Total T: 5429 Episode Num: 324 Episode T: 78 Reward: -71.83931703271487


Train model: 100%|██████████| 78/78 [00:39<00:00,  1.73batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5038035625096947).
Total T: 5558 Episode Num: 325 Episode T: 129 Reward: -248.95828143905135


Train model: 100%|██████████| 129/129 [01:05<00:00,  1.94batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5129665290333216).
Total T: 5634 Episode Num: 326 Episode T: 76 Reward: -142.6881445397413


Train model: 100%|██████████| 76/76 [00:39<00:00,  2.00batch/s]


-0.4176020156636406 -0.21662851013185122
Aborting episode due to head being > .2m behind the pelvis (-0.20097350553178936).
Total T: 5717 Episode Num: 327 Episode T: 83 Reward: -144.40172779774622


Train model: 100%|██████████| 83/83 [00:42<00:00,  2.00batch/s]


-0.13325082621462347 0.06857233159512181
Aborting episode due to head being > .2m behind the pelvis (-0.2018231578097453).
Total T: 5732 Episode Num: 328 Episode T: 15 Reward: 38.70219064661516


Train model: 100%|██████████| 15/15 [00:07<00:00,  1.96batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5083410381811286).
Total T: 5805 Episode Num: 329 Episode T: 73 Reward: -209.78270911551354


Train model: 100%|██████████| 73/73 [00:39<00:00,  1.83batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5089187869212283).
Total T: 5917 Episode Num: 330 Episode T: 112 Reward: -123.78735029289824


Train model: 100%|██████████| 112/112 [01:03<00:00,  1.85batch/s]


-0.35899853577575336 -0.15650550022726029
Aborting episode due to head being > .2m behind the pelvis (-0.20249303554849307).
Total T: 6023 Episode Num: 331 Episode T: 106 Reward: -106.0572263279837


Train model: 100%|██████████| 106/106 [00:59<00:00,  1.86batch/s]


-0.4435250689251402 -0.24182433315532129
Aborting episode due to head being > .2m behind the pelvis (-0.20170073576981892).
Total T: 6105 Episode Num: 332 Episode T: 82 Reward: -163.6778501760451


Train model: 100%|██████████| 82/82 [00:45<00:00,  1.79batch/s]


-0.33618916255883224 -0.13564965220445802
Aborting episode due to head being > .2m behind the pelvis (-0.20053951035437423).
Total T: 6174 Episode Num: 333 Episode T: 69 Reward: -89.83749777308388


Train model: 100%|██████████| 69/69 [00:38<00:00,  1.83batch/s]


-0.6269042692112993 -0.42305650381625953
Aborting episode due to head being > .2m behind the pelvis (-0.20384776539503974).
Total T: 6261 Episode Num: 334 Episode T: 87 Reward: -288.6142378801695


Train model: 100%|██████████| 87/87 [00:49<00:00,  1.78batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5080275222875594).
Total T: 6375 Episode Num: 335 Episode T: 114 Reward: -159.30098902111345


Train model: 100%|██████████| 114/114 [01:05<00:00,  1.78batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5069005176837501).
Total T: 6464 Episode Num: 336 Episode T: 89 Reward: -145.97249589494572


Train model: 100%|██████████| 89/89 [00:52<00:00,  1.63batch/s]


Total T: 6529 Episode Num: 337 Episode T: 65 Reward: -352.67211626816953


Train model: 100%|██████████| 65/65 [00:38<00:00,  1.74batch/s]


-0.481692383714693 -0.27763039010961643
Aborting episode due to head being > .2m behind the pelvis (-0.20406199360507654).
Total T: 6610 Episode Num: 338 Episode T: 81 Reward: -187.63806372215453


Train model: 100%|██████████| 81/81 [00:47<00:00,  1.73batch/s]


Total T: 6679 Episode Num: 339 Episode T: 69 Reward: -380.54246791408497


Train model: 100%|██████████| 69/69 [00:40<00:00,  1.71batch/s]


-0.5932732185905103 -0.3891248545204107
Aborting episode due to head being > .2m behind the pelvis (-0.20414836407009962).
Total T: 6773 Episode Num: 340 Episode T: 94 Reward: -262.95449478382176


Train model: 100%|██████████| 94/94 [00:55<00:00,  1.76batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5082405642969245).
Total T: 6873 Episode Num: 341 Episode T: 100 Reward: -150.78158593792065


Train model: 100%|██████████| 100/100 [00:59<00:00,  1.74batch/s]


Aborting episode due to custom reward < -10 (-73.11885050653228):
  reward `lean_forward_x` = -2.127979654407268
  reward `low_y_vel_pros_foot_r` = -215.876908746171
Total T: 6885 Episode Num: 342 Episode T: 12 Reward: 28.94654853500751


Train model: 100%|██████████| 12/12 [00:06<00:00,  1.76batch/s]


-0.13971158072487044 0.06444212332338842
Aborting episode due to head being > .2m behind the pelvis (-0.20415370404825886).
Total T: 6898 Episode Num: 343 Episode T: 13 Reward: 36.78266009868168


Train model: 100%|██████████| 13/13 [00:08<00:00,  1.72batch/s]


-0.43954605341355046 -0.23866136722368797
Aborting episode due to head being > .2m behind the pelvis (-0.2008846861898625).
Total T: 6995 Episode Num: 344 Episode T: 97 Reward: -160.8034068127665


Train model: 100%|██████████| 97/97 [00:57<00:00,  1.72batch/s]


-0.48466175264362893 -0.279191534217417
Aborting episode due to head being > .2m behind the pelvis (-0.20547021842621194).
Total T: 7096 Episode Num: 345 Episode T: 101 Reward: -185.8406832121635


Train model: 100%|██████████| 101/101 [01:00<00:00,  1.70batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.512198709559291).
Total T: 7285 Episode Num: 346 Episode T: 189 Reward: 28.48108089283486


Train model: 100%|██████████| 189/189 [01:54<00:00,  1.69batch/s]


-0.4436470930908668 -0.2420688524077932
Aborting episode due to head being > .2m behind the pelvis (-0.20157824068307362).
Total T: 7394 Episode Num: 347 Episode T: 109 Reward: -161.15605113067008


Train model: 100%|██████████| 109/109 [01:06<00:00,  1.68batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5079643149384292).
Total T: 7482 Episode Num: 348 Episode T: 88 Reward: -153.3071777656132


Train model: 100%|██████████| 88/88 [00:53<00:00,  1.69batch/s]


-0.5355065300302346 -0.3346407738911831
Aborting episode due to head being > .2m behind the pelvis (-0.20086575613905155).
Total T: 7581 Episode Num: 349 Episode T: 99 Reward: -223.46268542338075


Train model: 100%|██████████| 99/99 [01:00<00:00,  1.69batch/s]


-0.4754705366157462 -0.27250220318930163
Aborting episode due to head being > .2m behind the pelvis (-0.20296833342644455).
Total T: 7699 Episode Num: 350 Episode T: 118 Reward: -177.26033302686616


Train model: 100%|██████████| 118/118 [01:13<00:00,  1.29batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5077666210601949).
Total T: 7794 Episode Num: 351 Episode T: 95 Reward: -126.55942799916102


Train model: 100%|██████████| 95/95 [00:58<00:00,  1.65batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5075456713511849).
Total T: 7892 Episode Num: 352 Episode T: 98 Reward: -56.64205223396273


Train model: 100%|██████████| 98/98 [01:00<00:00,  1.69batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.502195383701156).
Total T: 7978 Episode Num: 353 Episode T: 86 Reward: -86.408588120004


Train model: 100%|██████████| 86/86 [00:53<00:00,  1.67batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5057361677752383).
Total T: 8095 Episode Num: 354 Episode T: 117 Reward: -77.51816483292315


Train model: 100%|██████████| 117/117 [01:13<00:00,  1.64batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5013913489758944).
Total T: 8266 Episode Num: 355 Episode T: 171 Reward: 10.573585914824053


Train model: 100%|██████████| 171/171 [01:48<00:00,  1.59batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5077293178417276).
Total T: 8380 Episode Num: 356 Episode T: 114 Reward: -82.61985642495672


Train model: 100%|██████████| 114/114 [01:12<00:00,  1.47batch/s]


-0.32868063462203767 -0.12701258946222552
Aborting episode due to head being > .2m behind the pelvis (-0.20166804515981215).
Total T: 8446 Episode Num: 357 Episode T: 66 Reward: -83.8004686736069


Train model: 100%|██████████| 66/66 [00:42<00:00,  1.61batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5068193970646132).
Total T: 8547 Episode Num: 358 Episode T: 101 Reward: -140.39910755515479


Train model: 100%|██████████| 101/101 [01:04<00:00,  1.60batch/s]


-0.5205656236084045 -0.3176728542930386
Aborting episode due to head being > .2m behind the pelvis (-0.20289276931536593).
Total T: 8662 Episode Num: 359 Episode T: 115 Reward: -210.7852464505941


Train model: 100%|██████████| 115/115 [01:14<00:00,  1.57batch/s]


Aborting episode due to head being > 0.5m away from centerline (-0.5041516790372866).
Total T: 8812 Episode Num: 360 Episode T: 150 Reward: -54.998461372638204


Train model: 100%|██████████| 150/150 [01:37<00:00,  1.39batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.501681791129774).
Total T: 8880 Episode Num: 361 Episode T: 68 Reward: -85.02255265473781


Train model: 100%|██████████| 68/68 [00:44<00:00,  1.56batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5008055827599394).
Total T: 8984 Episode Num: 362 Episode T: 104 Reward: -111.52393563509774


Train model: 100%|██████████| 104/104 [01:07<00:00,  1.58batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5076872703588644).
Total T: 9107 Episode Num: 363 Episode T: 123 Reward: -49.012575313394414


Train model: 100%|██████████| 123/123 [01:20<00:00,  1.56batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5086229822694416).
Total T: 9190 Episode Num: 364 Episode T: 83 Reward: -127.61058046618349


Train model: 100%|██████████| 83/83 [00:54<00:00,  1.55batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5070484752042144).
Total T: 9283 Episode Num: 365 Episode T: 93 Reward: -133.50667967505424


Train model: 100%|██████████| 93/93 [01:01<00:00,  1.55batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.50944893327487).
Total T: 9356 Episode Num: 366 Episode T: 73 Reward: -184.87725298052362


Train model: 100%|██████████| 73/73 [00:49<00:00,  1.46batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.506899479567427).
Total T: 9454 Episode Num: 367 Episode T: 98 Reward: -104.92519949745947


Train model: 100%|██████████| 98/98 [01:05<00:00,  1.47batch/s]


-0.5172206052523423 -0.3137043386709056
Aborting episode due to head being > .2m behind the pelvis (-0.20351626658143668).
Total T: 9557 Episode Num: 368 Episode T: 103 Reward: -206.35311854597245


Train model: 100%|██████████| 103/103 [01:08<00:00,  1.51batch/s]


-0.40402456140639154 -0.20329010602583814
Aborting episode due to head being > .2m behind the pelvis (-0.2007344553805534).
Total T: 9649 Episode Num: 369 Episode T: 92 Reward: -135.82855375780798


Train model: 100%|██████████| 92/92 [01:02<00:00,  1.47batch/s]


-0.41609635071716655 -0.21079170015774665
Aborting episode due to head being > .2m behind the pelvis (-0.2053046505594199).
Total T: 9779 Episode Num: 370 Episode T: 130 Reward: -142.97635532729066


Train model: 100%|██████████| 130/130 [01:28<00:00,  1.38batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5059722855117285).
Total T: 9878 Episode Num: 371 Episode T: 99 Reward: -205.31932119608032


Train model: 100%|██████████| 99/99 [01:07<00:00,  1.40batch/s]


-0.4907334839806775 -0.286382128807682
Aborting episode due to head being > .2m behind the pelvis (-0.20435135517299552).
Total T: 9983 Episode Num: 372 Episode T: 105 Reward: -189.6040460765169


Train model: 100%|██████████| 105/105 [01:11<00:00,  1.48batch/s]


-0.4645375065393619 -0.2629088770257401
Aborting episode due to head being > .2m behind the pelvis (-0.20162862951362176).
Total T: 10081 Episode Num: 373 Episode T: 98 Reward: -172.11821715173295


Train model: 100%|██████████| 98/98 [01:07<00:00,  1.43batch/s]
Evaluating policy: 100%|██████████| 10/10 [03:23<00:00, 20.30s/episode]


---------------------------------------
Evaluation over 10 episodes: -80.566454
---------------------------------------
-0.41930628771697365 -0.21633020802006525
Aborting episode due to head being > .2m behind the pelvis (-0.2029760796969084).
Total T: 10198 Episode Num: 374 Episode T: 117 Reward: -143.91988573980325


Train model: 100%|██████████| 117/117 [01:25<00:00,  1.39batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5130151120498444).
Total T: 10277 Episode Num: 375 Episode T: 79 Reward: -211.29099280888965


Train model: 100%|██████████| 79/79 [00:57<00:00,  1.40batch/s]


-0.5445850327323527 -0.33920908288984186
Aborting episode due to head being > .2m behind the pelvis (-0.20537594984251084).
Total T: 10365 Episode Num: 376 Episode T: 88 Reward: -226.8781891688541


Train model: 100%|██████████| 88/88 [01:04<00:00,  1.41batch/s]


Total T: 10453 Episode Num: 377 Episode T: 88 Reward: -348.83776750853525


Train model: 100%|██████████| 88/88 [01:05<00:00,  1.41batch/s]


-0.3712904503741099 -0.1658847856736314
Aborting episode due to head being > .2m behind the pelvis (-0.2054056647004785).
Total T: 10531 Episode Num: 378 Episode T: 78 Reward: -111.87623121609677


Train model: 100%|██████████| 78/78 [00:58<00:00,  1.40batch/s]


-0.43441488352866453 -0.2317450265424403
Aborting episode due to head being > .2m behind the pelvis (-0.20266985698622422).
Total T: 10621 Episode Num: 379 Episode T: 90 Reward: -152.38145879609942


Train model: 100%|██████████| 90/90 [01:06<00:00,  1.37batch/s]


-0.48499623365068906 -0.2835574029097583
Aborting episode due to head being > .2m behind the pelvis (-0.20143883074093077).
Total T: 10726 Episode Num: 380 Episode T: 105 Reward: -188.62249335696742


Train model: 100%|██████████| 105/105 [01:18<00:00,  1.37batch/s]


Aborting episode due to head being > 0.5m away from centerline (-0.5125660980399699).
Total T: 10866 Episode Num: 381 Episode T: 140 Reward: -170.21379788928442


Train model: 100%|██████████| 140/140 [01:45<00:00,  1.37batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5103933671759592).
Total T: 10966 Episode Num: 382 Episode T: 100 Reward: -88.91067906246913


Train model: 100%|██████████| 100/100 [01:15<00:00,  1.37batch/s]


-0.46598106889390745 -0.26204186382017386
Aborting episode due to head being > .2m behind the pelvis (-0.2039392050737336).
Total T: 11065 Episode Num: 383 Episode T: 99 Reward: -174.5442946793032


Train model: 100%|██████████| 99/99 [01:14<00:00,  1.35batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5045613115362944).
Total T: 11160 Episode Num: 384 Episode T: 95 Reward: -123.63511724717802


Train model: 100%|██████████| 95/95 [01:11<00:00,  1.35batch/s]


Aborting episode due to head being > 0.5m away from centerline (-0.5110182834889248).
Total T: 11305 Episode Num: 385 Episode T: 145 Reward: -138.26677485917196


Train model: 100%|██████████| 145/145 [01:50<00:00,  1.34batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5057443807337634).
Total T: 11435 Episode Num: 386 Episode T: 130 Reward: -99.88972061973686


Train model: 100%|██████████| 130/130 [01:38<00:00,  1.35batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5052908559286944).
Total T: 11531 Episode Num: 387 Episode T: 96 Reward: 48.51426964016063


Train model: 100%|██████████| 96/96 [01:13<00:00,  1.35batch/s]


Total T: 11602 Episode Num: 388 Episode T: 71 Reward: -297.50259626887276


Train model: 100%|██████████| 71/71 [00:55<00:00,  1.33batch/s]


-0.364604890794946 -0.16345442037531446
Aborting episode due to head being > .2m behind the pelvis (-0.20115047041963155).
Total T: 11690 Episode Num: 389 Episode T: 88 Reward: -113.80073359870848


Train model: 100%|██████████| 88/88 [01:08<00:00,  1.17batch/s]


Aborting episode due to head being > 0.5m away from centerline (-0.5041658308395528).
Total T: 11832 Episode Num: 390 Episode T: 142 Reward: -375.48773249080546


Train model: 100%|██████████| 142/142 [01:49<00:00,  1.33batch/s]


-0.39102862727224325 -0.18864459501417666
Aborting episode due to head being > .2m behind the pelvis (-0.2023840322580666).
Total T: 11915 Episode Num: 391 Episode T: 83 Reward: -124.43344310609783


Train model: 100%|██████████| 83/83 [01:04<00:00,  1.32batch/s]


Aborting episode due to head being > 0.5m away from centerline (-0.5064997800528779).
Total T: 12052 Episode Num: 392 Episode T: 137 Reward: -270.189729848181


Train model: 100%|██████████| 137/137 [01:47<00:00,  1.30batch/s]


-0.6282518760357088 -0.42775738632139515
Aborting episode due to head being > .2m behind the pelvis (-0.2004944897143136).
Total T: 12161 Episode Num: 393 Episode T: 109 Reward: -284.9831925558155


Train model: 100%|██████████| 109/109 [01:25<00:00,  1.30batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.509683336715327).
Total T: 12241 Episode Num: 394 Episode T: 80 Reward: -368.2870637640428


Train model: 100%|██████████| 80/80 [01:02<00:00,  1.32batch/s]


-0.4240872447580793 -0.2213460229641988
Aborting episode due to head being > .2m behind the pelvis (-0.2027412217938805).
Total T: 12343 Episode Num: 395 Episode T: 102 Reward: -147.44246370571275


Train model: 100%|██████████| 102/102 [01:20<00:00,  1.31batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5039567545172978).
Total T: 12418 Episode Num: 396 Episode T: 75 Reward: -264.6047717549828


Train model: 100%|██████████| 75/75 [01:00<00:00,  1.28batch/s]


-0.13985006093425204 0.06062509433918038
Aborting episode due to head being > .2m behind the pelvis (-0.2004751552734324).
Total T: 12432 Episode Num: 397 Episode T: 14 Reward: 32.900659970101884


Train model: 100%|██████████| 14/14 [00:10<00:00,  1.29batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5018899076426664).
Total T: 12530 Episode Num: 398 Episode T: 98 Reward: -141.99691049683432


Train model: 100%|██████████| 98/98 [01:18<00:00,  1.28batch/s]


Total T: 12598 Episode Num: 399 Episode T: 68 Reward: -357.74156326688285


Train model: 100%|██████████| 68/68 [00:54<00:00,  1.29batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5038577195972307).
Total T: 12683 Episode Num: 400 Episode T: 85 Reward: -89.80129051074998


Train model: 100%|██████████| 85/85 [01:08<00:00,  1.27batch/s]


-0.525138343371355 -0.32356643843999544
Aborting episode due to head being > .2m behind the pelvis (-0.20157190493135957).
Total T: 12779 Episode Num: 401 Episode T: 96 Reward: -218.0453811763876


Train model: 100%|██████████| 96/96 [01:17<00:00,  1.28batch/s]


-0.41756054843213053 -0.21559885420437197
Aborting episode due to head being > .2m behind the pelvis (-0.20196169422775856).
Total T: 12846 Episode Num: 402 Episode T: 67 Reward: -146.30012173644462


Train model: 100%|██████████| 67/67 [00:53<00:00,  1.26batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5060610574108043).
Total T: 12951 Episode Num: 403 Episode T: 105 Reward: -120.04856930173415


Train model: 100%|██████████| 105/105 [01:25<00:00,  1.26batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5040411523405681).
Total T: 13045 Episode Num: 404 Episode T: 94 Reward: -151.90574904103852


Train model: 100%|██████████| 94/94 [01:16<00:00,  1.27batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5048886766433485).
Total T: 13147 Episode Num: 405 Episode T: 102 Reward: -128.62804773228868


Train model: 100%|██████████| 102/102 [01:23<00:00,  1.26batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5077796696927486).
Total T: 13232 Episode Num: 406 Episode T: 85 Reward: -34.80129272705392


Train model: 100%|██████████| 85/85 [01:09<00:00,  1.25batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5083059897942042).
Total T: 13342 Episode Num: 407 Episode T: 110 Reward: 33.91585135573814


Train model: 100%|██████████| 110/110 [01:30<00:00,  1.09batch/s]


-0.4001235145476148 -0.1951847184081542
Aborting episode due to head being > .2m behind the pelvis (-0.2049387961394606).
Total T: 13403 Episode Num: 408 Episode T: 61 Reward: -135.08093262298016


Train model: 100%|██████████| 61/61 [00:50<00:00,  1.23batch/s]


-0.5328705916365801 -0.332126035830661
Aborting episode due to head being > .2m behind the pelvis (-0.2007445558059191).
Total T: 13513 Episode Num: 409 Episode T: 110 Reward: -222.1661177529024


Train model: 100%|██████████| 110/110 [01:30<00:00,  1.23batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5087895339755222).
Total T: 13604 Episode Num: 410 Episode T: 91 Reward: -160.0030293464116


Train model: 100%|██████████| 91/91 [01:14<00:00,  1.23batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5006702481118005).
Total T: 13683 Episode Num: 411 Episode T: 79 Reward: -121.35058948156755


Train model: 100%|██████████| 79/79 [01:06<00:00,  1.22batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5042826803754012).
Total T: 13779 Episode Num: 412 Episode T: 96 Reward: -145.22811669766048


Train model: 100%|██████████| 96/96 [01:20<00:00,  1.23batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5079357524651558).
Total T: 13850 Episode Num: 413 Episode T: 71 Reward: -224.68114949801446


Train model: 100%|██████████| 71/71 [00:59<00:00,  1.22batch/s]


-0.4029263605822295 -0.19792582941257442
Aborting episode due to head being > .2m behind the pelvis (-0.2050005311696551).
Total T: 13931 Episode Num: 414 Episode T: 81 Reward: -132.9510720860407


Train model: 100%|██████████| 81/81 [01:08<00:00,  1.18s/batch]


-0.4228125797949376 -0.22251038666930142
Aborting episode due to head being > .2m behind the pelvis (-0.20030219312563616).
Total T: 14011 Episode Num: 415 Episode T: 80 Reward: -148.27423341063272


Train model: 100%|██████████| 80/80 [01:06<00:00,  1.23batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5047030795341809).
Total T: 14152 Episode Num: 416 Episode T: 141 Reward: -201.9475539194066


Train model: 100%|██████████| 141/141 [01:58<00:00,  1.21batch/s]


-0.6840564304905441 -0.47918383284509586
Aborting episode due to head being > .2m behind the pelvis (-0.20487259764544824).
Total T: 14260 Episode Num: 417 Episode T: 108 Reward: -322.2991970544544


Train model: 100%|██████████| 108/108 [01:31<00:00,  1.22batch/s]


-0.8213761466627099 -0.6186305412224637
Aborting episode due to head being > .2m behind the pelvis (-0.2027456054402461).
Total T: 14389 Episode Num: 418 Episode T: 129 Reward: -418.4130232700524


Train model: 100%|██████████| 129/129 [01:50<00:00,  1.20batch/s]


Total T: 14461 Episode Num: 419 Episode T: 72 Reward: -352.3810710834527


Train model: 100%|██████████| 72/72 [01:01<00:00,  1.20batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5007277480060934).
Total T: 14568 Episode Num: 420 Episode T: 107 Reward: -113.30719596997807


Train model: 100%|██████████| 107/107 [01:31<00:00,  1.20batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5023557939669049).
Total T: 14709 Episode Num: 421 Episode T: 141 Reward: -205.99943385842977


Train model: 100%|██████████| 141/141 [02:00<00:00,  1.19batch/s]


-0.5021830935202118 -0.29769297742281575
Aborting episode due to head being > .2m behind the pelvis (-0.2044901160973961).
Total T: 14817 Episode Num: 422 Episode T: 108 Reward: -196.45083054599985


Train model: 100%|██████████| 108/108 [01:33<00:00,  1.19batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5109566668143836).
Total T: 14902 Episode Num: 423 Episode T: 85 Reward: -270.48452626285473


Train model: 100%|██████████| 85/85 [01:14<00:00,  1.18batch/s]


-0.6009188730971657 -0.4005526851877295
Aborting episode due to head being > .2m behind the pelvis (-0.20036618790943622).
Total T: 14996 Episode Num: 424 Episode T: 94 Reward: -270.57424824591845


Train model: 100%|██████████| 94/94 [01:22<00:00,  1.12s/batch]


-0.42331930576675375 -0.22310490954685164
Aborting episode due to head being > .2m behind the pelvis (-0.2002143962199021).
Total T: 15084 Episode Num: 425 Episode T: 88 Reward: -148.76014878590655


Train model: 100%|██████████| 88/88 [01:16<00:00,  1.18batch/s]
Evaluating policy: 100%|██████████| 10/10 [03:28<00:00, 20.90s/episode]


---------------------------------------
Evaluation over 10 episodes: -80.536562
---------------------------------------
Aborting episode due to head being > 0.5m away from centerline (-0.5014018005636663).
Total T: 15210 Episode Num: 426 Episode T: 126 Reward: -53.993086751403276


Train model: 100%|██████████| 126/126 [01:54<00:00,  1.14batch/s]


-0.5293441815981776 -0.32875358701994095
Aborting episode due to head being > .2m behind the pelvis (-0.2005905945782367).
Total T: 15301 Episode Num: 427 Episode T: 91 Reward: -219.34607413402063


Train model: 100%|██████████| 91/91 [01:22<00:00,  1.12batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5094510851746367).
Total T: 15393 Episode Num: 428 Episode T: 92 Reward: -140.3057166492464


Train model: 100%|██████████| 92/92 [01:23<00:00,  1.12batch/s]


-0.43490742136812666 -0.2287963872422748
Aborting episode due to head being > .2m behind the pelvis (-0.20611103412585186).
Total T: 15483 Episode Num: 429 Episode T: 90 Reward: -152.9271701076407


Train model: 100%|██████████| 90/90 [01:23<00:00,  1.12batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5126540501024635).
Total T: 15562 Episode Num: 430 Episode T: 79 Reward: -343.3473275856


Train model: 100%|██████████| 79/79 [01:12<00:00,  1.11batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5062327670536741).
Total T: 15631 Episode Num: 431 Episode T: 69 Reward: -134.69049007600768


Train model: 100%|██████████| 69/69 [01:03<00:00,  1.10batch/s]


-0.5373825075991854 -0.3304426363521939
Aborting episode due to head being > .2m behind the pelvis (-0.20693987124699148).
Total T: 15734 Episode Num: 432 Episode T: 103 Reward: -219.41322570074828


Train model: 100%|██████████| 103/103 [01:34<00:00,  1.10batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5058406939593301).
Total T: 15817 Episode Num: 433 Episode T: 83 Reward: -170.7624391020557


Train model: 100%|██████████| 83/83 [01:16<00:00,  1.10batch/s]


-0.14246922780864346 0.061544488913523175
Aborting episode due to head being > .2m behind the pelvis (-0.20401371672216664).
Total T: 15832 Episode Num: 434 Episode T: 15 Reward: 33.98237130327064


Train model: 100%|██████████| 15/15 [00:13<00:00,  1.09batch/s]


-0.6323166729680816 -0.4289161704782273
Aborting episode due to head being > .2m behind the pelvis (-0.2034005024898543).
Total T: 15962 Episode Num: 435 Episode T: 130 Reward: -286.7313655389872


Train model: 100%|██████████| 130/130 [02:01<00:00,  1.02s/batch]


-0.4716167782985352 -0.27114284439839786
Aborting episode due to head being > .2m behind the pelvis (-0.20047393390013735).
Total T: 16064 Episode Num: 436 Episode T: 102 Reward: -180.06420283863847


Train model: 100%|██████████| 102/102 [01:34<00:00,  1.11batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5004645870261228).
Total T: 16143 Episode Num: 437 Episode T: 79 Reward: -131.12401698488344


Train model: 100%|██████████| 79/79 [01:13<00:00,  1.09batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5052348703699829).
Total T: 16247 Episode Num: 438 Episode T: 104 Reward: -142.4055053091936


Train model: 100%|██████████| 104/104 [01:38<00:00,  1.03s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5002320042172212).
Total T: 16357 Episode Num: 439 Episode T: 110 Reward: 45.64013283865444


Train model: 100%|██████████| 110/110 [01:42<00:00,  1.10batch/s]


-0.4624896582432334 -0.2624123648451553
Aborting episode due to head being > .2m behind the pelvis (-0.20007729339807812).
Total T: 16449 Episode Num: 440 Episode T: 92 Reward: -172.29220490556438


Train model: 100%|██████████| 92/92 [01:26<00:00,  1.09batch/s]


-0.7450002619004457 -0.5379592752088348
Aborting episode due to head being > .2m behind the pelvis (-0.20704098669161086).
Total T: 16547 Episode Num: 441 Episode T: 98 Reward: -372.5081542409477


Train model: 100%|██████████| 98/98 [01:33<00:00,  1.08batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.503117928631662).
Total T: 16635 Episode Num: 442 Episode T: 88 Reward: -133.66688700205188


Train model: 100%|██████████| 88/88 [01:23<00:00,  1.08batch/s]


-0.8191647477662274 -0.6158202515223479
Aborting episode due to head being > .2m behind the pelvis (-0.2033444962438795).
Total T: 16735 Episode Num: 443 Episode T: 100 Reward: -428.682062437664


Train model: 100%|██████████| 100/100 [01:34<00:00,  1.08batch/s]


Total T: 16809 Episode Num: 444 Episode T: 74 Reward: -366.246335974679


Train model: 100%|██████████| 74/74 [01:12<00:00,  1.44s/batch]


-0.31381807853296384 -0.11375174645457514
Aborting episode due to head being > .2m behind the pelvis (-0.2000663320783887).
Total T: 16939 Episode Num: 445 Episode T: 130 Reward: -85.29296527179424


Train model: 100%|██████████| 130/130 [02:03<00:00,  1.07batch/s]


-0.29702139628011315 -0.0962809718986845
Aborting episode due to head being > .2m behind the pelvis (-0.20074042438142864).
Total T: 17007 Episode Num: 446 Episode T: 68 Reward: -66.17939793460333


Train model: 100%|██████████| 68/68 [01:05<00:00,  1.07batch/s]


-0.4659323608492191 -0.264377033994842
Aborting episode due to head being > .2m behind the pelvis (-0.20155532685437705).
Total T: 17103 Episode Num: 447 Episode T: 96 Reward: -176.52240179115955


Train model: 100%|██████████| 96/96 [01:33<00:00,  1.03batch/s]


Total T: 17168 Episode Num: 448 Episode T: 65 Reward: -348.2183143502339


Train model: 100%|██████████| 65/65 [01:01<00:00,  1.06batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5112660573523993).
Total T: 17253 Episode Num: 449 Episode T: 85 Reward: -134.43618233809042


Train model: 100%|██████████| 85/85 [01:22<00:00,  1.02batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5014363072293251).
Total T: 17339 Episode Num: 450 Episode T: 86 Reward: -111.61684234349151


Train model: 100%|██████████| 86/86 [01:23<00:00,  1.06batch/s]


-0.36033221287661776 -0.15541528743316987
Aborting episode due to head being > .2m behind the pelvis (-0.2049169254434479).
Total T: 17397 Episode Num: 451 Episode T: 58 Reward: -107.95529071873884


Train model: 100%|██████████| 58/58 [00:56<00:00,  1.01batch/s]


-0.4206235627435006 -0.21574294792325777
Aborting episode due to head being > .2m behind the pelvis (-0.20488061482024283).
Total T: 17483 Episode Num: 452 Episode T: 86 Reward: -144.22729524832954


Train model: 100%|██████████| 86/86 [01:23<00:00,  1.05batch/s]


-0.4137585533784827 -0.2117058467936747
Aborting episode due to head being > .2m behind the pelvis (-0.202052706584808).
Total T: 17571 Episode Num: 453 Episode T: 88 Reward: -141.4925046091431


Train model: 100%|██████████| 88/88 [01:26<00:00,  1.05batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5020904313691872).
Total T: 17668 Episode Num: 454 Episode T: 97 Reward: -136.80066109560826


Train model: 100%|██████████| 97/97 [01:34<00:00,  1.04batch/s]


-0.5812311564198914 -0.3810907867396623
Aborting episode due to head being > .2m behind the pelvis (-0.20014036968022914).
Total T: 17782 Episode Num: 455 Episode T: 114 Reward: -251.64556048148904


Train model: 100%|██████████| 114/114 [01:53<00:00,  1.05batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5104650799978481).
Total T: 17912 Episode Num: 456 Episode T: 130 Reward: -37.47244357482581


Train model: 100%|██████████| 130/130 [02:08<00:00,  1.03batch/s]


-0.7567851003590618 -0.5520854619940802
Aborting episode due to head being > .2m behind the pelvis (-0.20469963836498162).
Total T: 18018 Episode Num: 457 Episode T: 106 Reward: -382.2638835715986


Train model: 100%|██████████| 106/106 [01:44<00:00,  1.04batch/s]


-0.41306935401866157 -0.21116062137296268
Aborting episode due to head being > .2m behind the pelvis (-0.2019087326456989).
Total T: 18089 Episode Num: 458 Episode T: 71 Reward: -143.43602137177203


Train model: 100%|██████████| 71/71 [01:10<00:00,  1.03batch/s]


-0.5351490053427226 -0.3344668665279505
Aborting episode due to head being > .2m behind the pelvis (-0.2006821388147721).
Total T: 18194 Episode Num: 459 Episode T: 105 Reward: -222.84170982002482


Train model: 100%|██████████| 105/105 [01:43<00:00,  1.02batch/s]


-0.4409162367703858 -0.2371543190181207
Aborting episode due to head being > .2m behind the pelvis (-0.20376191775226513).
Total T: 18280 Episode Num: 460 Episode T: 86 Reward: -157.90314119926455


Train model: 100%|██████████| 86/86 [01:27<00:00,  1.15s/batch]


-0.5114015862368713 -0.30687113682772893
Aborting episode due to head being > .2m behind the pelvis (-0.20453044940914233).
Total T: 18372 Episode Num: 461 Episode T: 92 Reward: -203.05195500723053


Train model: 100%|██████████| 92/92 [01:32<00:00,  1.02batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5078060775078879).
Total T: 18454 Episode Num: 462 Episode T: 82 Reward: -113.71989300881833


Train model: 100%|██████████| 82/82 [01:22<00:00,  1.02batch/s]


-0.6431456114685976 -0.4391747184942049
Aborting episode due to head being > .2m behind the pelvis (-0.2039708929743927).
Total T: 18553 Episode Num: 463 Episode T: 99 Reward: -296.54520546192475


Train model: 100%|██████████| 99/99 [01:39<00:00,  1.01batch/s]


Aborting episode due to head being > 0.5m away from centerline (0.5146517878477829).
Total T: 18627 Episode Num: 464 Episode T: 74 Reward: -282.9550773262084


Train model: 100%|██████████| 74/74 [01:14<00:00,  1.02batch/s]


-0.5498023024290462 -0.3461596119515907
Aborting episode due to head being > .2m behind the pelvis (-0.20364269047745548).
Total T: 18729 Episode Num: 465 Episode T: 102 Reward: -230.0625165560932


Train model: 100%|██████████| 102/102 [01:44<00:00,  1.25s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.502153802515233).
Total T: 18826 Episode Num: 466 Episode T: 97 Reward: -142.47441720487586


Train model: 100%|██████████| 97/97 [01:37<00:00,  1.01batch/s]


Total T: 18882 Episode Num: 467 Episode T: 56 Reward: -298.8253208301773


Train model: 100%|██████████| 56/56 [00:57<00:00,  1.03s/batch]


Total T: 18947 Episode Num: 468 Episode T: 65 Reward: -359.2326649763976


Train model: 100%|██████████| 65/65 [01:04<00:00,  1.00s/batch]


Total T: 19022 Episode Num: 469 Episode T: 75 Reward: -388.96707415360225


Train model: 100%|██████████| 75/75 [01:16<00:00,  1.01s/batch]


-0.5854022664069125 -0.37981240240727593
Aborting episode due to head being > .2m behind the pelvis (-0.20558986399963658).
Total T: 19112 Episode Num: 470 Episode T: 90 Reward: -260.5596494511561


Train model: 100%|██████████| 90/90 [01:33<00:00,  1.14s/batch]


-0.4929141393401899 -0.2926871970765901
Aborting episode due to head being > .2m behind the pelvis (-0.20022694226359983).
Total T: 19195 Episode Num: 471 Episode T: 83 Reward: -198.4757846912065


Train model: 100%|██████████| 83/83 [01:25<00:00,  1.03s/batch]


-0.3848824414091029 -0.1835430947291666
Aborting episode due to head being > .2m behind the pelvis (-0.20133934667993628).
Total T: 19282 Episode Num: 472 Episode T: 87 Reward: -121.28999953105978


Train model: 100%|██████████| 87/87 [01:29<00:00,  1.01s/batch]


Total T: 19556 Episode Num: 473 Episode T: 274 Reward: 39.98182569334986


Train model: 100%|██████████| 274/274 [04:44<00:00,  1.01s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5039020023428586).
Total T: 19629 Episode Num: 474 Episode T: 73 Reward: -301.06665249855735


Train model: 100%|██████████| 73/73 [01:16<00:00,  1.02s/batch]


-0.339311832680046 -0.13309532865442888
Aborting episode due to head being > .2m behind the pelvis (-0.20621650402561711).
Total T: 19694 Episode Num: 475 Episode T: 65 Reward: -90.74416440343819


Train model: 100%|██████████| 65/65 [01:08<00:00,  1.02s/batch]


Total T: 19765 Episode Num: 476 Episode T: 71 Reward: -372.01202334805987


Train model: 100%|██████████| 71/71 [01:14<00:00,  1.03s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5089461068132874).
Total T: 19843 Episode Num: 477 Episode T: 78 Reward: -169.47483593985677


Train model: 100%|██████████| 78/78 [01:21<00:00,  1.02s/batch]


-0.5010028313126915 -0.2967441885605754
Aborting episode due to head being > .2m behind the pelvis (-0.20425864275211614).
Total T: 19942 Episode Num: 478 Episode T: 99 Reward: -197.18750055083896


Train model: 100%|██████████| 99/99 [01:43<00:00,  1.03s/batch]


-0.6059439170344165 -0.4057953164382083
Aborting episode due to head being > .2m behind the pelvis (-0.2001486005962082).
Total T: 20040 Episode Num: 479 Episode T: 98 Reward: -276.74871703677434


Train model: 100%|██████████| 98/98 [01:44<00:00,  1.16s/batch]
Evaluating policy: 100%|██████████| 10/10 [03:35<00:00, 21.53s/episode]


---------------------------------------
Evaluation over 10 episodes: -80.552263
---------------------------------------
-0.7377387353538707 -0.5295867543151995
Aborting episode due to head being > .2m behind the pelvis (-0.20815198103867116).
Total T: 20136 Episode Num: 480 Episode T: 96 Reward: -365.34017712853347


Train model: 100%|██████████| 96/96 [01:45<00:00,  1.07s/batch]


-0.7726929737241577 -0.5690320557868884
Aborting episode due to head being > .2m behind the pelvis (-0.20366091793726926).
Total T: 20252 Episode Num: 481 Episode T: 116 Reward: -390.1844280672774


Train model: 100%|██████████| 116/116 [02:08<00:00,  1.15s/batch]


-0.6029565984603112 -0.3941485808152208
Aborting episode due to head being > .2m behind the pelvis (-0.20880801764509044).
Total T: 20333 Episode Num: 482 Episode T: 81 Reward: -271.04592705796387


Train model: 100%|██████████| 81/81 [01:27<00:00,  1.08s/batch]


Total T: 20508 Episode Num: 483 Episode T: 175 Reward: 129.219791945221


Train model: 100%|██████████| 175/175 [03:14<00:00,  1.09s/batch]


Aborting episode due to custom reward < -10 (-88.09252537680928):
  reward `lean_forward_x` = -2.14196991244655
  reward `low_y_vel_pros_foot_r` = -231.43033843981115
Total T: 20520 Episode Num: 484 Episode T: 12 Reward: 28.00059443372837


Train model: 100%|██████████| 12/12 [00:13<00:00,  1.10s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5020956269701171).
Total T: 20593 Episode Num: 485 Episode T: 73 Reward: -151.96521611976226


Train model: 100%|██████████| 73/73 [01:21<00:00,  1.09s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5093058053439696).
Total T: 20674 Episode Num: 486 Episode T: 81 Reward: -213.4735342826619


Train model: 100%|██████████| 81/81 [01:30<00:00,  1.10s/batch]


Total T: 20755 Episode Num: 487 Episode T: 81 Reward: -349.24389845578764


Train model: 100%|██████████| 81/81 [01:30<00:00,  1.09s/batch]


-0.44507251130085174 -0.2412882027355096
Aborting episode due to head being > .2m behind the pelvis (-0.20378430856534213).
Total T: 20839 Episode Num: 488 Episode T: 84 Reward: -162.25356561510722


Train model: 100%|██████████| 84/84 [01:33<00:00,  1.09s/batch]


-0.427484500120451 -0.22517936522643164
Aborting episode due to head being > .2m behind the pelvis (-0.20230513489401938).
Total T: 20933 Episode Num: 489 Episode T: 94 Reward: -152.38028649620887


Train model: 100%|██████████| 94/94 [01:44<00:00,  1.09s/batch]


-0.47279807500522286 -0.26838757076816555
Aborting episode due to head being > .2m behind the pelvis (-0.2044105042370573).
Total T: 21021 Episode Num: 490 Episode T: 88 Reward: -179.0858596201823


Train model: 100%|██████████| 88/88 [01:38<00:00,  1.08s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5030210050958719).
Total T: 21132 Episode Num: 491 Episode T: 111 Reward: -142.21773728555894


Train model: 100%|██████████| 111/111 [02:03<00:00,  1.10s/batch]


-0.6909085725702804 -0.4878882335931532
Aborting episode due to head being > .2m behind the pelvis (-0.20302033897712718).
Total T: 21233 Episode Num: 492 Episode T: 101 Reward: -334.81767526655267


Train model: 100%|██████████| 101/101 [01:53<00:00,  1.10s/batch]


Aborting episode due to head being > 0.5m away from centerline (-0.5018064284232019).
Total T: 21359 Episode Num: 493 Episode T: 126 Reward: -190.0974864974891


Train model: 100%|██████████| 126/126 [02:23<00:00,  1.10s/batch]


-0.48712813723117 -0.285254352502295
Aborting episode due to head being > .2m behind the pelvis (-0.20187378472887496).
Total T: 21477 Episode Num: 494 Episode T: 118 Reward: -189.04183662774753


Train model: 100%|██████████| 118/118 [02:12<00:00,  1.10s/batch]


-0.5955374236589799 -0.39486597228758147
Aborting episode due to head being > .2m behind the pelvis (-0.2006714513713984).
Total T: 21597 Episode Num: 495 Episode T: 120 Reward: -259.73756791499454


Train model: 100%|██████████| 120/120 [02:15<00:00,  1.10s/batch]


-0.4819681499211987 -0.2784809947470607
Aborting episode due to head being > .2m behind the pelvis (-0.203487155174138).
Total T: 21698 Episode Num: 496 Episode T: 101 Reward: -185.3367334939004


Train model: 100%|██████████| 101/101 [01:54<00:00,  1.11s/batch]


-0.3682027385949409 -0.16648440678747933
Aborting episode due to head being > .2m behind the pelvis (-0.20171833180746157).
Total T: 21749 Episode Num: 497 Episode T: 51 Reward: -119.85933518881615


Train model: 100%|██████████| 51/51 [00:59<00:00,  1.12s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5114394041278554).
Total T: 21826 Episode Num: 498 Episode T: 77 Reward: -137.56613055025684


Train model: 100%|██████████| 77/77 [01:28<00:00,  1.12s/batch]


-0.5252911346485445 -0.32513912551786156
Aborting episode due to head being > .2m behind the pelvis (-0.20015200913068293).
Total T: 21934 Episode Num: 499 Episode T: 108 Reward: -214.59625527616737


Train model: 100%|██████████| 108/108 [02:03<00:00,  1.12s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5024232517182882).
Total T: 21998 Episode Num: 500 Episode T: 64 Reward: -93.52418857049001


Train model: 100%|██████████| 64/64 [01:14<00:00,  1.18s/batch]


-0.13876428407506847 0.061647340270513024
Aborting episode due to head being > .2m behind the pelvis (-0.2004116243455815).
Total T: 22012 Episode Num: 501 Episode T: 14 Reward: 34.05378288614108


Train model: 100%|██████████| 14/14 [00:15<00:00,  1.12s/batch]


-0.46330392597765635 -0.2627997204687786
Aborting episode due to head being > .2m behind the pelvis (-0.20050420550887776).
Total T: 22147 Episode Num: 502 Episode T: 135 Reward: -173.83895191432063


Train model: 100%|██████████| 135/135 [02:34<00:00,  1.13s/batch]


-0.4502821074877339 -0.24753368579764573
Aborting episode due to head being > .2m behind the pelvis (-0.20274842169008817).
Total T: 22244 Episode Num: 503 Episode T: 97 Reward: -160.69095508770593


Train model: 100%|██████████| 97/97 [01:55<00:00,  1.13s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5069216171352329).
Total T: 22338 Episode Num: 504 Episode T: 94 Reward: -162.64208942221723


Train model: 100%|██████████| 94/94 [01:48<00:00,  1.12s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5093866125036768).
Total T: 22420 Episode Num: 505 Episode T: 82 Reward: -109.93943252894113


Train model: 100%|██████████| 82/82 [01:35<00:00,  1.13s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5027879071052427).
Total T: 22504 Episode Num: 506 Episode T: 84 Reward: -134.0954857135766


Train model: 100%|██████████| 84/84 [01:37<00:00,  1.13s/batch]


-0.4798972207549811 -0.2772168268941894
Aborting episode due to head being > .2m behind the pelvis (-0.20268039386079167).
Total T: 22584 Episode Num: 507 Episode T: 80 Reward: -186.7164457672181


Train model: 100%|██████████| 80/80 [01:33<00:00,  1.13s/batch]


-0.8949156930123476 -0.6909619530295561
Aborting episode due to head being > .2m behind the pelvis (-0.20395373998279154).
Total T: 22700 Episode Num: 508 Episode T: 116 Reward: -481.76799009694406


Train model: 100%|██████████| 116/116 [02:14<00:00,  1.18s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5046041127007319).
Total T: 22799 Episode Num: 509 Episode T: 99 Reward: -143.35370971877825


Train model: 100%|██████████| 99/99 [01:57<00:00,  1.15s/batch]


Aborting episode due to head being > 0.5m away from centerline (-0.5078809481766253).
Total T: 22957 Episode Num: 510 Episode T: 158 Reward: -73.69178712181224


Train model: 100%|██████████| 158/158 [03:06<00:00,  1.15s/batch]


-0.6934859540749054 -0.49313036204009253
Aborting episode due to head being > .2m behind the pelvis (-0.2003555920348129).
Total T: 23057 Episode Num: 511 Episode T: 100 Reward: -341.2290613742581


Train model: 100%|██████████| 100/100 [01:57<00:00,  1.15s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5081666564819106).
Total T: 23168 Episode Num: 512 Episode T: 111 Reward: -167.95877134964192


Train model: 100%|██████████| 111/111 [02:10<00:00,  1.16s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5055835697898989).
Total T: 23252 Episode Num: 513 Episode T: 84 Reward: -145.4512352252282


Train model: 100%|██████████| 84/84 [01:39<00:00,  1.15s/batch]


-0.3450978858663967 -0.14313932448052502
Aborting episode due to head being > .2m behind the pelvis (-0.2019585613858717).
Total T: 23347 Episode Num: 514 Episode T: 95 Reward: -95.82672607114166


Train model: 100%|██████████| 95/95 [01:52<00:00,  1.16s/batch]


-0.42054346239616747 -0.2202896041533799
Aborting episode due to head being > .2m behind the pelvis (-0.20025385824278757).
Total T: 23443 Episode Num: 515 Episode T: 96 Reward: -146.6569797116032


Train model: 100%|██████████| 96/96 [01:54<00:00,  1.16s/batch]


-0.6114546285224749 -0.4024409906900902
Aborting episode due to head being > .2m behind the pelvis (-0.2090136378323847).
Total T: 23540 Episode Num: 516 Episode T: 97 Reward: -272.4023405081625


Train model: 100%|██████████| 97/97 [01:55<00:00,  1.17s/batch]


-0.33731949473569983 -0.13647040121873008
Aborting episode due to head being > .2m behind the pelvis (-0.20084909351696975).
Total T: 23607 Episode Num: 517 Episode T: 67 Reward: -92.9224195658961


Train model: 100%|██████████| 67/67 [01:20<00:00,  1.42s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5031162161843872).
Total T: 23690 Episode Num: 518 Episode T: 83 Reward: -125.38405450934358


Train model: 100%|██████████| 83/83 [01:37<00:00,  1.18s/batch]


-0.41427714469681665 -0.21154317066351325
Aborting episode due to head being > .2m behind the pelvis (-0.2027339740333034).
Total T: 23789 Episode Num: 519 Episode T: 99 Reward: -140.01212705781342


Train model: 100%|██████████| 99/99 [01:58<00:00,  1.17s/batch]


Total T: 23865 Episode Num: 520 Episode T: 76 Reward: -369.2674607024093


Train model: 100%|██████████| 76/76 [01:31<00:00,  1.17s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5049747523264294).
Total T: 23994 Episode Num: 521 Episode T: 129 Reward: 99.13724071376369


Train model: 100%|██████████| 129/129 [02:37<00:00,  1.69s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5001358296935956).
Total T: 24063 Episode Num: 522 Episode T: 69 Reward: -281.26519727836984


Train model: 100%|██████████| 69/69 [01:21<00:00,  1.19s/batch]


-0.5979869119848878 -0.39227836778147207
Aborting episode due to head being > .2m behind the pelvis (-0.20570854420341578).
Total T: 24173 Episode Num: 523 Episode T: 110 Reward: -264.8108337048499


Train model: 100%|██████████| 110/110 [02:12<00:00,  1.18s/batch]


-0.4337974217305544 -0.2308793251763746
Aborting episode due to head being > .2m behind the pelvis (-0.2029180965541798).
Total T: 24255 Episode Num: 524 Episode T: 82 Reward: -153.36432410327728


Train model: 100%|██████████| 82/82 [01:39<00:00,  1.18s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5082838303408567).
Total T: 24376 Episode Num: 525 Episode T: 121 Reward: -155.70231437940674


Train model: 100%|██████████| 121/121 [02:26<00:00,  1.19s/batch]


-0.3919420282309892 -0.1905027236450943
Aborting episode due to head being > .2m behind the pelvis (-0.20143930458589493).
Total T: 24461 Episode Num: 526 Episode T: 85 Reward: -126.23972043049496


Train model: 100%|██████████| 85/85 [01:43<00:00,  1.20s/batch]


-0.8462311710190158 -0.6454575396183302
Aborting episode due to head being > .2m behind the pelvis (-0.2007736314006856).
Total T: 24599 Episode Num: 527 Episode T: 138 Reward: -439.85742484260936


Train model: 100%|██████████| 138/138 [02:50<00:00,  1.20s/batch]


-0.4021633054705209 -0.19583737815027258
Aborting episode due to head being > .2m behind the pelvis (-0.20632592732024835).
Total T: 24694 Episode Num: 528 Episode T: 95 Reward: -128.45257255680204


Train model: 100%|██████████| 95/95 [01:56<00:00,  1.24s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5086965768778215).
Total T: 24768 Episode Num: 529 Episode T: 74 Reward: -193.85320554464238


Train model: 100%|██████████| 74/74 [01:28<00:00,  1.20s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5107028319527429).
Total T: 24857 Episode Num: 530 Episode T: 89 Reward: -154.1734434370078


Train model: 100%|██████████| 89/89 [01:49<00:00,  1.21s/batch]


Aborting episode due to head being > 0.5m away from centerline (0.5069596749150653).
Total T: 24935 Episode Num: 531 Episode T: 78 Reward: -135.51871554428703


Train model: 100%|██████████| 78/78 [01:36<00:00,  1.20s/batch]


Aborting episode due to head being > 0.5m away from centerline (-0.5172219346787169).
Total T: 25111 Episode Num: 532 Episode T: 176 Reward: -90.95824123347685


Train model: 100%|██████████| 176/176 [03:38<00:00,  1.20s/batch]
Evaluating policy: 100%|██████████| 10/10 [03:41<00:00, 22.12s/episode]


---------------------------------------
Evaluation over 10 episodes: -80.558020
---------------------------------------


MemoryError: 

In [None]:
!sudo shutdown

In [65]:
# pd.concat([df_saved_episodes, env.history()], ignore_index=True, copy=False)
# df_saved_episodes.to_hdf(CONFIG['training']['episode_save_load_file'].replace('.','_start.'), key='df', format='table')
# env.history().to_hdf(CONFIG['training']['episode_save_load_file'].replace('.','_start.'), key='df', format='table', append=True)
pd.concat([df_saved_episodes, env_history], ignore_index=True, copy=False).to_hdf(CONFIG['training']['episode_save_load_file'], key='df')
# env.history()['i_step'].dtype
# df_saved_episodes['i_step'].dtype


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block0_values] [items->['episode_uuid', 'i_step', 'action', 'obs', 'reward', 'done', 'info']]

  return pytables.to_hdf(path_or_buf, key, self, **kwargs)


MemoryError: 