In [1]:
import sys
import os
import math
import json
import uuid
import datetime
import subprocess
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
from models.td3 import TD3
# from osim.env import ProstheticsEnv
from environment.prosthetics_env_with_history import ProstheticsEnvWithHistory
from environment.observations import prepare_model_observation, env_obs_history_to_model_obs
# from environment.actions import prepare_env_action, reset_frameskip
from environment.rewards import env_obs_to_custom_reward
from distributed.database import persist_timesteps, persist_event, get_total_timesteps, clear_clients_for_thread
from distributed.db_history_sampler import DatabaseHistorySampler
from distributed.s3_checkpoints import load_s3_model_checkpoint, save_s3_model_checkpoint
import torch
import torch.utils.data



In [2]:
with open('config_distributed.json', 'r') as f:
    CONFIG = json.load(f)
print(json.dumps(CONFIG, indent=4))


{
    "env": {
        "integrator_accuracy": 0.002
    },
    "model": {
        "architecture": "TD3"
    },
    "rollout": {
        "#": "Frameskip will be applied for random durations between 0 and `frameskip` timesteps.",
        "max_episode_steps": 600,
        "expl_noise": 0.25,
        "frameskip": 5
    },
    "distributed": {
        "policy_weights_dir_s3": "s3://colllin-nips-2018-prosthetics/checkpoints/",
        "policy_weights_basename": "checkpoint_TD3",
        "#": "How often (episodes) we download model weights during rollout.",
        "rollout_refresh_model_freq": 5
    },
    "training": {
        "#": "Frequency of delayed policy updates",
        "eval_freq": 2500,
        "batch_size": 100,
        "discount": 0.99,
        "tau": 0.005,
        "policy_noise": 0.2,
        "noise_clip": 0.5,
        "policy_freq": 2
    }
}


### Create Policy, Download & load latest weights

In [3]:
# state_dim = env.observation_space.shape[0]
env = ProstheticsEnvWithHistory(visualize=False, integrator_accuracy=CONFIG['env']['integrator_accuracy'])
env.reset()
state_dim = prepare_model_observation(env).shape[0]
action_dim = env.action_space.shape[0]
max_action = int(env.action_space.high[0])
del env
state_dim, action_dim, max_action


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


(1260, 19, 1)

In [4]:
policy = TD3(state_dim, action_dim, max_action)

In [5]:
print(f"Loading policy checkpoints from {CONFIG['distributed']['policy_weights_dir_s3']}{CONFIG['distributed']['policy_weights_basename']}*")
load_s3_model_checkpoint(
    policy, 
    s3_dir=CONFIG['distributed']['policy_weights_dir_s3'],
    basename=CONFIG['distributed']['policy_weights_basename'],
    map_location='cpu'
)
persist_event('train_load_latest_checkpoint', f'Loaded policy checkpoint from {CONFIG["distributed"]["policy_weights_dir_s3"]}{CONFIG["distributed"]["policy_weights_basename"]}*')



Loading policy checkpoints from s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*


### Episode Hacking (Custom "done" criteria)


In [6]:
def should_abort_episode(env_obs, custom_rewards=None, verbose=False):
#     print((np.array(env_obs['body_pos_rot']['torso'])*180/math.pi > 60).any())
#     if env_obs['body_pos_rot']['torso'][2] < -0.2:
#         return True
    rewards = custom_rewards if custom_rewards != None else env_obs_to_custom_reward(env_obs)
    # print(f'Custom reward: {sum(rewards.values())}')
    if (env_obs['body_pos']['head'][0] - env_obs['body_pos']['pelvis'][0]) < -.2:
        if verbose: print(f'Aborting episode due to head being > .2m behind the pelvis ({env_obs["body_pos"]["head"][0] - env_obs["body_pos"]["pelvis"][0]}).')
        return True
    if np.fabs(env_obs['body_pos']['head'][2]) > 0.5:
        if verbose: print(f'Aborting episode due to head being > 0.5m away from centerline ({env_obs["body_pos"]["head"][2]}).')
        return True
    if sum(rewards.values()) < -10:
        if verbose:
            print(f'Aborting episode due to custom reward < -10 ({sum(rewards.values())}):')
            for k,v in rewards.items():
                if v < 0:
                    print(f'  reward `{k}` = {v}')
        return True
    return False
    

### Init dataloader

In [7]:
history_sampler = DatabaseHistorySampler(
    env_obs_history_to_model_obs_fn=env_obs_history_to_model_obs, 
    n_obs_history=3,
    env_obs_custom_reward_fn=lambda obs: sum(env_obs_to_custom_reward(obs).values()),
    env_obs_custom_done_fn=should_abort_episode,
)

def load_batch(fake_batch):
    return history_sampler.sample(len(fake_batch))

fake_dataset_len = CONFIG['training']['eval_freq'] * CONFIG['training']['batch_size']
fake_dataset = list(range(int(fake_dataset_len)))
dataloader = torch.utils.data.DataLoader(
    fake_dataset,
    batch_size=CONFIG['training']['batch_size'], 
#     shuffle=False, 
#     sampler=None, 
#     batch_sampler=None, 
    num_workers=6, 
    collate_fn=load_batch, 
    pin_memory=True, 
    drop_last=True, 
#     timeout=0, 
    worker_init_fn=lambda instance_id: clear_clients_for_thread()
)




# Model Training

In [8]:
while True:
    # Train for `eval_freq` batches:
    if CONFIG['model']['architecture'] == "TD3":
        policy.train(
            dataloader,
            CONFIG['training']['discount'], 
            CONFIG['training']['tau'], 
            CONFIG['training']['policy_noise'], 
            CONFIG['training']['noise_clip'], 
            CONFIG['training']['policy_freq'],
        )
    else: 
        policy.train(
            history_sampler,#replay_buffer, 
            int(CONFIG['training']['eval_freq']),
            CONFIG['training']['batch_size'], 
            CONFIG['training']['discount'], 
            CONFIG['training']['tau']
        )
    persist_event('train_epoch_completed', f'Trained policy for {len(dataloader)} batches of {dataloader.batch_size}')
      
    # Upload policy weights to S3, to be picked up by instances running the Rollout Distributed process.
    print(f"SAving policy checkpoints to {CONFIG['distributed']['policy_weights_dir_s3']}{CONFIG['distributed']['policy_weights_basename']}*")
    save_s3_model_checkpoint(
        policy, 
        s3_dir=CONFIG['distributed']['policy_weights_dir_s3'],
        basename=CONFIG['distributed']['policy_weights_basename'],
    )
    persist_event('train_update_s3_checkpoint', f'Uploaded policy checkpoint to {CONFIG["distributed"]["policy_weights_dir_s3"]}{CONFIG["distributed"]["policy_weights_basename"]}*')
    
    # Also upload policy weights under unique name as a historical checkpoint.
    total_timesteps = get_total_timesteps()
    evalname = f"{CONFIG['distributed']['policy_weights_basename']}_T{total_timesteps}_{datetime.datetime.now().isoformat()}"
    print(f"SAving policy checkpoints to {CONFIG['distributed']['policy_weights_dir_s3']}{evalname}*")
    save_s3_model_checkpoint(
        policy, 
        s3_dir=CONFIG['distributed']['policy_weights_dir_s3'],
        basename=evalname,
    )
    persist_event('train_save_historical_checkpoint', f'Uploaded policy checkpoint to {CONFIG["distributed"]["policy_weights_dir_s3"]}{evalname}*')

    # Run Evaluation script
    evaldir = str(uuid.uuid4())
    print(f"SAving policy checkpoints to {evaldir}/{evalname}*")
    os.makedirs(evaldir, exist_ok=True)
    policy.save(evaldir, evalname)
    evalcmd = f"CHECKPOINT_DIR={evaldir} CHECKPOINT_NAME={evalname} pipenv run python evaluate_policy.py"
    print(f"Launching evaluation script with cmd: `{evalcmd}`")
    subprocess.Popen(evalcmd, shell=True)
    
    

Train model: 100%|██████████| 2500/2500 [05:47<00:00,  8.49batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T90860_2018-09-26T13:49:24.037055*
SAving policy checkpoints to 5bb7964d-2562-4c4d-ab70-d3ad8907e8a3/checkpoint_TD3_T90860_2018-09-26T13:49:24.037055*
Launching evaluation script with cmd: `CHECKPOINT_DIR=5bb7964d-2562-4c4d-ab70-d3ad8907e8a3 CHECKPOINT_NAME=checkpoint_TD3_T90860_2018-09-26T13:49:24.037055 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [07:16<00:00,  8.03batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T116034_2018-09-26T13:56:42.709661*
SAving policy checkpoints to f3ae6fa0-3acb-4094-9daa-25dca51c3b56/checkpoint_TD3_T116034_2018-09-26T13:56:42.709661*
Launching evaluation script with cmd: `CHECKPOINT_DIR=f3ae6fa0-3acb-4094-9daa-25dca51c3b56 CHECKPOINT_NAME=checkpoint_TD3_T116034_2018-09-26T13:56:42.709661 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [07:55<00:00,  5.25batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T144787_2018-09-26T14:04:40.988324*
SAving policy checkpoints to c36e4b75-e519-4a44-89eb-849b59409835/checkpoint_TD3_T144787_2018-09-26T14:04:40.988324*
Launching evaluation script with cmd: `CHECKPOINT_DIR=c36e4b75-e519-4a44-89eb-849b59409835 CHECKPOINT_NAME=checkpoint_TD3_T144787_2018-09-26T14:04:40.988324 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:10<00:00,  5.10batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T174003_2018-09-26T14:12:53.801034*
SAving policy checkpoints to c35270eb-8598-4f89-a287-bfc1cfac9e58/checkpoint_TD3_T174003_2018-09-26T14:12:53.801034*
Launching evaluation script with cmd: `CHECKPOINT_DIR=c35270eb-8598-4f89-a287-bfc1cfac9e58 CHECKPOINT_NAME=checkpoint_TD3_T174003_2018-09-26T14:12:53.801034 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:14<00:00,  5.05batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T203407_2018-09-26T14:21:12.044560*
SAving policy checkpoints to 0f41befa-46dc-40a3-a8f5-d9c9e4875b6e/checkpoint_TD3_T203407_2018-09-26T14:21:12.044560*
Launching evaluation script with cmd: `CHECKPOINT_DIR=0f41befa-46dc-40a3-a8f5-d9c9e4875b6e CHECKPOINT_NAME=checkpoint_TD3_T203407_2018-09-26T14:21:12.044560 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:08<00:00,  5.12batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T233107_2018-09-26T14:29:23.896472*
SAving policy checkpoints to e8786e8a-0e68-4055-b50f-79c49261c3e6/checkpoint_TD3_T233107_2018-09-26T14:29:23.896472*
Launching evaluation script with cmd: `CHECKPOINT_DIR=e8786e8a-0e68-4055-b50f-79c49261c3e6 CHECKPOINT_NAME=checkpoint_TD3_T233107_2018-09-26T14:29:23.896472 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:13<00:00,  5.06batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T262859_2018-09-26T14:37:41.574132*
SAving policy checkpoints to a2c08ba7-d07e-4c3b-a069-567cd3b3a818/checkpoint_TD3_T262859_2018-09-26T14:37:41.574132*
Launching evaluation script with cmd: `CHECKPOINT_DIR=a2c08ba7-d07e-4c3b-a069-567cd3b3a818 CHECKPOINT_NAME=checkpoint_TD3_T262859_2018-09-26T14:37:41.574132 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:06<00:00,  7.45batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T292200_2018-09-26T14:45:52.522100*
SAving policy checkpoints to 44b815e5-ee48-4e59-90a2-5873ba980cba/checkpoint_TD3_T292200_2018-09-26T14:45:52.522100*
Launching evaluation script with cmd: `CHECKPOINT_DIR=44b815e5-ee48-4e59-90a2-5873ba980cba CHECKPOINT_NAME=checkpoint_TD3_T292200_2018-09-26T14:45:52.522100 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:05<00:00,  5.15batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T316663_2018-09-26T14:54:02.643708*
SAving policy checkpoints to 5867afba-33a7-492d-968c-e5429d5fbdee/checkpoint_TD3_T316663_2018-09-26T14:54:02.643708*
Launching evaluation script with cmd: `CHECKPOINT_DIR=5867afba-33a7-492d-968c-e5429d5fbdee CHECKPOINT_NAME=checkpoint_TD3_T316663_2018-09-26T14:54:02.643708 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:07<00:00,  5.13batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T326717_2018-09-26T15:02:15.028588*
SAving policy checkpoints to 3a6cbc10-cdde-4a05-a7b6-e08842c8381e/checkpoint_TD3_T326717_2018-09-26T15:02:15.028588*
Launching evaluation script with cmd: `CHECKPOINT_DIR=3a6cbc10-cdde-4a05-a7b6-e08842c8381e CHECKPOINT_NAME=checkpoint_TD3_T326717_2018-09-26T15:02:15.028588 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:10<00:00,  5.10batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T336865_2018-09-26T15:10:29.951491*
SAving policy checkpoints to 8b740726-0284-4c5d-b02a-74f7a53da875/checkpoint_TD3_T336865_2018-09-26T15:10:29.951491*
Launching evaluation script with cmd: `CHECKPOINT_DIR=8b740726-0284-4c5d-b02a-74f7a53da875 CHECKPOINT_NAME=checkpoint_TD3_T336865_2018-09-26T15:10:29.951491 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:07<00:00,  5.13batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T347014_2018-09-26T15:18:42.608827*
SAving policy checkpoints to c94b53fd-9163-4921-a4bd-765c332c6f56/checkpoint_TD3_T347014_2018-09-26T15:18:42.608827*
Launching evaluation script with cmd: `CHECKPOINT_DIR=c94b53fd-9163-4921-a4bd-765c332c6f56 CHECKPOINT_NAME=checkpoint_TD3_T347014_2018-09-26T15:18:42.608827 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:06<00:00,  8.26batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T357040_2018-09-26T15:26:54.092054*
SAving policy checkpoints to a4d9668b-8df5-4592-89b8-01644a81afc0/checkpoint_TD3_T357040_2018-09-26T15:26:54.092054*
Launching evaluation script with cmd: `CHECKPOINT_DIR=a4d9668b-8df5-4592-89b8-01644a81afc0 CHECKPOINT_NAME=checkpoint_TD3_T357040_2018-09-26T15:26:54.092054 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:07<00:00,  5.13batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T367127_2018-09-26T15:35:06.730147*
SAving policy checkpoints to 81123610-6540-4af4-a113-0304a8a29cee/checkpoint_TD3_T367127_2018-09-26T15:35:06.730147*
Launching evaluation script with cmd: `CHECKPOINT_DIR=81123610-6540-4af4-a113-0304a8a29cee CHECKPOINT_NAME=checkpoint_TD3_T367127_2018-09-26T15:35:06.730147 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:08<00:00,  5.12batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T377048_2018-09-26T15:43:20.319652*
SAving policy checkpoints to 4e75188e-c20d-4205-addc-3c595efc6f70/checkpoint_TD3_T377048_2018-09-26T15:43:20.319652*
Launching evaluation script with cmd: `CHECKPOINT_DIR=4e75188e-c20d-4205-addc-3c595efc6f70 CHECKPOINT_NAME=checkpoint_TD3_T377048_2018-09-26T15:43:20.319652 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:09<00:00,  5.10batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T387164_2018-09-26T15:51:37.235230*
SAving policy checkpoints to 208f4a91-0d40-4675-9bc1-a06628f16080/checkpoint_TD3_T387164_2018-09-26T15:51:37.235230*
Launching evaluation script with cmd: `CHECKPOINT_DIR=208f4a91-0d40-4675-9bc1-a06628f16080 CHECKPOINT_NAME=checkpoint_TD3_T387164_2018-09-26T15:51:37.235230 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:01<00:00,  5.19batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T397049_2018-09-26T15:59:45.275985*
SAving policy checkpoints to 8fad5d9a-2501-4615-b210-43032167ca57/checkpoint_TD3_T397049_2018-09-26T15:59:45.275985*
Launching evaluation script with cmd: `CHECKPOINT_DIR=8fad5d9a-2501-4615-b210-43032167ca57 CHECKPOINT_NAME=checkpoint_TD3_T397049_2018-09-26T15:59:45.275985 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:11<00:00,  5.09batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T407208_2018-09-26T16:08:01.871089*
SAving policy checkpoints to 9dccafb2-2fa0-4711-b12d-83a00b7ab01e/checkpoint_TD3_T407208_2018-09-26T16:08:01.871089*
Launching evaluation script with cmd: `CHECKPOINT_DIR=9dccafb2-2fa0-4711-b12d-83a00b7ab01e CHECKPOINT_NAME=checkpoint_TD3_T407208_2018-09-26T16:08:01.871089 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:09<00:00,  5.11batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T417392_2018-09-26T16:16:17.116208*
SAving policy checkpoints to 68080a93-9b1e-4717-a172-1d64d64b3206/checkpoint_TD3_T417392_2018-09-26T16:16:17.116208*
Launching evaluation script with cmd: `CHECKPOINT_DIR=68080a93-9b1e-4717-a172-1d64d64b3206 CHECKPOINT_NAME=checkpoint_TD3_T417392_2018-09-26T16:16:17.116208 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:11<00:00,  5.09batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T427288_2018-09-26T16:24:35.678515*
SAving policy checkpoints to ccc6d18f-eb92-4109-afe5-258cad6e3860/checkpoint_TD3_T427288_2018-09-26T16:24:35.678515*
Launching evaluation script with cmd: `CHECKPOINT_DIR=ccc6d18f-eb92-4109-afe5-258cad6e3860 CHECKPOINT_NAME=checkpoint_TD3_T427288_2018-09-26T16:24:35.678515 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:09<00:00,  5.11batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T437530_2018-09-26T16:32:50.591437*
SAving policy checkpoints to d024cbc3-5f27-4746-b234-105566cd46c1/checkpoint_TD3_T437530_2018-09-26T16:32:50.591437*
Launching evaluation script with cmd: `CHECKPOINT_DIR=d024cbc3-5f27-4746-b234-105566cd46c1 CHECKPOINT_NAME=checkpoint_TD3_T437530_2018-09-26T16:32:50.591437 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:02<00:00,  5.18batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T447169_2018-09-26T16:40:59.661575*
SAving policy checkpoints to d9da1aa8-e997-48e9-96c7-a5e969b85a27/checkpoint_TD3_T447169_2018-09-26T16:40:59.661575*
Launching evaluation script with cmd: `CHECKPOINT_DIR=d9da1aa8-e997-48e9-96c7-a5e969b85a27 CHECKPOINT_NAME=checkpoint_TD3_T447169_2018-09-26T16:40:59.661575 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:07<00:00,  6.01batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T457127_2018-09-26T16:49:13.119521*
SAving policy checkpoints to 0f863122-b67a-4725-9802-3d1d3f6870db/checkpoint_TD3_T457127_2018-09-26T16:49:13.119521*
Launching evaluation script with cmd: `CHECKPOINT_DIR=0f863122-b67a-4725-9802-3d1d3f6870db CHECKPOINT_NAME=checkpoint_TD3_T457127_2018-09-26T16:49:13.119521 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:10<00:00,  5.10batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T466810_2018-09-26T16:57:32.499984*
SAving policy checkpoints to 4e7364c0-d0d9-41c6-ae27-85c284a675a9/checkpoint_TD3_T466810_2018-09-26T16:57:32.499984*
Launching evaluation script with cmd: `CHECKPOINT_DIR=4e7364c0-d0d9-41c6-ae27-85c284a675a9 CHECKPOINT_NAME=checkpoint_TD3_T466810_2018-09-26T16:57:32.499984 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:06<00:00,  5.14batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T476687_2018-09-26T17:05:45.548323*
SAving policy checkpoints to 7ca2197b-cfb1-4385-afe3-a70026f256fa/checkpoint_TD3_T476687_2018-09-26T17:05:45.548323*
Launching evaluation script with cmd: `CHECKPOINT_DIR=7ca2197b-cfb1-4385-afe3-a70026f256fa CHECKPOINT_NAME=checkpoint_TD3_T476687_2018-09-26T17:05:45.548323 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:15<00:00,  5.04batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T486611_2018-09-26T17:14:10.292386*
SAving policy checkpoints to 1884c496-8968-4f7a-8172-63fcb254f0e1/checkpoint_TD3_T486611_2018-09-26T17:14:10.292386*
Launching evaluation script with cmd: `CHECKPOINT_DIR=1884c496-8968-4f7a-8172-63fcb254f0e1 CHECKPOINT_NAME=checkpoint_TD3_T486611_2018-09-26T17:14:10.292386 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:03<00:00,  5.17batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T496183_2018-09-26T17:22:20.862578*
SAving policy checkpoints to 3c007c38-f811-4210-b25b-9af798e746f2/checkpoint_TD3_T496183_2018-09-26T17:22:20.862578*
Launching evaluation script with cmd: `CHECKPOINT_DIR=3c007c38-f811-4210-b25b-9af798e746f2 CHECKPOINT_NAME=checkpoint_TD3_T496183_2018-09-26T17:22:20.862578 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:04<00:00,  5.16batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T505956_2018-09-26T17:30:37.510970*
SAving policy checkpoints to f2ea39ed-4036-42e9-beaf-1d9f7e8f65d9/checkpoint_TD3_T505956_2018-09-26T17:30:37.510970*
Launching evaluation script with cmd: `CHECKPOINT_DIR=f2ea39ed-4036-42e9-beaf-1d9f7e8f65d9 CHECKPOINT_NAME=checkpoint_TD3_T505956_2018-09-26T17:30:37.510970 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:01<00:00,  5.19batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T515776_2018-09-26T17:38:56.717782*
SAving policy checkpoints to a7ad22d3-1418-4d36-a52a-f14ff201af17/checkpoint_TD3_T515776_2018-09-26T17:38:56.717782*
Launching evaluation script with cmd: `CHECKPOINT_DIR=a7ad22d3-1418-4d36-a52a-f14ff201af17 CHECKPOINT_NAME=checkpoint_TD3_T515776_2018-09-26T17:38:56.717782 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:17<00:00,  5.03batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T525835_2018-09-26T17:47:23.569276*
SAving policy checkpoints to 382e7492-75cd-499a-a260-7101f70a1aa2/checkpoint_TD3_T525835_2018-09-26T17:47:23.569276*
Launching evaluation script with cmd: `CHECKPOINT_DIR=382e7492-75cd-499a-a260-7101f70a1aa2 CHECKPOINT_NAME=checkpoint_TD3_T525835_2018-09-26T17:47:23.569276 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:09<00:00,  5.11batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T536241_2018-09-26T17:55:50.770278*
SAving policy checkpoints to 49991646-8065-495d-85da-3a32aea4369c/checkpoint_TD3_T536241_2018-09-26T17:55:50.770278*
Launching evaluation script with cmd: `CHECKPOINT_DIR=49991646-8065-495d-85da-3a32aea4369c CHECKPOINT_NAME=checkpoint_TD3_T536241_2018-09-26T17:55:50.770278 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [07:56<00:00,  5.25batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T546357_2018-09-26T18:04:07.252086*
SAving policy checkpoints to 45b796d6-b198-4e44-acba-4e1d28d7b25b/checkpoint_TD3_T546357_2018-09-26T18:04:07.252086*
Launching evaluation script with cmd: `CHECKPOINT_DIR=45b796d6-b198-4e44-acba-4e1d28d7b25b CHECKPOINT_NAME=checkpoint_TD3_T546357_2018-09-26T18:04:07.252086 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:12<00:00,  5.07batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T556668_2018-09-26T18:12:37.690017*
SAving policy checkpoints to d72cafbb-66d8-436f-8b12-7fe8035322a2/checkpoint_TD3_T556668_2018-09-26T18:12:37.690017*
Launching evaluation script with cmd: `CHECKPOINT_DIR=d72cafbb-66d8-436f-8b12-7fe8035322a2 CHECKPOINT_NAME=checkpoint_TD3_T556668_2018-09-26T18:12:37.690017 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [07:55<00:00,  7.29batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T566612_2018-09-26T18:20:53.153737*
SAving policy checkpoints to 20e045f0-9314-41af-ad79-1254c63c8001/checkpoint_TD3_T566612_2018-09-26T18:20:53.153737*
Launching evaluation script with cmd: `CHECKPOINT_DIR=20e045f0-9314-41af-ad79-1254c63c8001 CHECKPOINT_NAME=checkpoint_TD3_T566612_2018-09-26T18:20:53.153737 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [08:15<00:00,  8.16batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T576738_2018-09-26T18:29:29.838588*
SAving policy checkpoints to 076e2d42-d0fd-4c54-a2be-4386dcc46d87/checkpoint_TD3_T576738_2018-09-26T18:29:29.838588*
Launching evaluation script with cmd: `CHECKPOINT_DIR=076e2d42-d0fd-4c54-a2be-4386dcc46d87 CHECKPOINT_NAME=checkpoint_TD3_T576738_2018-09-26T18:29:29.838588 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [07:56<00:00,  5.25batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T586604_2018-09-26T18:37:45.533023*
SAving policy checkpoints to 11d3e450-8776-4eb8-b383-b7b63d640efb/checkpoint_TD3_T586604_2018-09-26T18:37:45.533023*
Launching evaluation script with cmd: `CHECKPOINT_DIR=11d3e450-8776-4eb8-b383-b7b63d640efb CHECKPOINT_NAME=checkpoint_TD3_T586604_2018-09-26T18:37:45.533023 pipenv run python evaluate_policy.py`


Train model: 100%|██████████| 2500/2500 [07:56<00:00,  6.49batch/s]


SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3*
SAving policy checkpoints to s3://colllin-nips-2018-prosthetics/checkpoints/checkpoint_TD3_T595372_2018-09-26T18:46:01.445519*
SAving policy checkpoints to bc503e6a-2e68-4b7c-8266-2dd75f16975b/checkpoint_TD3_T595372_2018-09-26T18:46:01.445519*
Launching evaluation script with cmd: `CHECKPOINT_DIR=bc503e6a-2e68-4b7c-8266-2dd75f16975b CHECKPOINT_NAME=checkpoint_TD3_T595372_2018-09-26T18:46:01.445519 pipenv run python evaluate_policy.py`


Train model:   2%|▏         | 62/2500 [00:14<09:30,  4.27batch/s]Process Process-223:
Process Process-226:
Process Process-225:
Process Process-228:
Process Process-224:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Process Process-227:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ubun

  File "/home/ubuntu/.local/share/virtualenvs/nips-2018-ai-for-prosthetics-fCqIkKV7/lib/python3.6/site-packages/pymongo/message.py", line 1463, in unpack_response
    return bson.decode_all(self.payload_document, codec_options)
  File "/home/ubuntu/.local/share/virtualenvs/nips-2018-ai-for-prosthetics-fCqIkKV7/lib/python3.6/site-packages/bson/timestamp.py", line 33, in __init__
    def __init__(self, time, inc):
KeyboardInterrupt
  File "/home/ubuntu/.local/share/virtualenvs/nips-2018-ai-for-prosthetics-fCqIkKV7/lib/python3.6/site-packages/pymongo/pool.py", line 745, in _raise_connection_failure
    raise error
  File "/home/ubuntu/.local/share/virtualenvs/nips-2018-ai-for-prosthetics-fCqIkKV7/lib/python3.6/site-packages/pymongo/pool.py", line 579, in command
    unacknowledged=unacknowledged)
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

  File "/home/ubuntu/.local/share/virtualenvs/nips-2018-ai-for-prosthetics-fCqIkKV7/lib/p

Traceback (most recent call last):
  File "/home/ubuntu/.local/share/virtualenvs/nips-2018-ai-for-prosthetics-fCqIkKV7/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-28c7e80da0cf>", line 10, in <module>
    CONFIG['training']['policy_freq'],
  File "/home/ubuntu/nips-2018-ai-for-prosthetics/models/td3.py", line 111, in train
    for ibatch, (x, y, u, r, d) in enumerate(tqdm(iter(replay_dataloader), desc='Train model', unit='batch')):
  File "/home/ubuntu/.local/share/virtualenvs/nips-2018-ai-for-prosthetics-fCqIkKV7/lib/python3.6/site-packages/tqdm/_tqdm.py", line 937, in __iter__
    for obj in iterable:
  File "/home/ubuntu/.local/share/virtualenvs/nips-2018-ai-for-prosthetics-fCqIkKV7/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 330, in __next__
    idx, batch = self._get_batch()
  File "/home/ubuntu/.local/share/virtualenvs/nips-2018-ai-for-pros

KeyboardInterrupt: 

  File "/home/ubuntu/.local/share/virtualenvs/nips-2018-ai-for-prosthetics-fCqIkKV7/lib/python3.6/site-packages/pymongo/network.py", line 173, in receive_message
    _receive_data_on_socket(sock, 16))
  File "/home/ubuntu/.local/share/virtualenvs/nips-2018-ai-for-prosthetics-fCqIkKV7/lib/python3.6/site-packages/pymongo/network.py", line 232, in _receive_data_on_socket
    chunk_length = sock.recv_into(mv[bytes_read:])
  File "/usr/lib/python3.6/ssl.py", line 1009, in recv_into
    return self.read(nbytes, buffer)
  File "/usr/lib/python3.6/ssl.py", line 871, in read
    return self._sslobj.read(len, buffer)
  File "/usr/lib/python3.6/ssl.py", line 631, in read
    v = self._sslobj.read(len, buffer)
KeyboardInterrupt
