In [11]:
def expert_loc(env):
    if env == 'Ant-v2':
        expert_loc = 'ant/data/expert_fully_corrective'
    elif env == 'Humanoid-v2':
        expert_loc = 'humanoid/data/expert_fully_corrective_T100k_saved'
    elif env == 'HalfCheetah-v2':
        expert_loc = 'cheetah/data/reduce4_proper_dims'
    return expert_loc

def experiment_name(pretrain=False, during_train=False, eps=0, epoch=0, base=False):
    
    base_name = []
    if pretrain and not during_train:
        base_name.append('pretrain')
    elif during_train and not pretrain:
        base_name.append('epsgreedy')
    elif during_train and pretrain:
        base_name.append('both')
    
    if base:
        base_name.append('base')
    else:
        if pretrain and epoch > 0:
            base_name.append('epochs%02d' % epoch)
        if during_train and eps > 0:
            base_name.append('eps%03d' % (eps*1000))
    
    return '_'.join(base_name)

def format_cmd(base_cmd, exp_name, exp_id, eps=0.0, epoch=0, seed=[-1]):
    elements = [base_cmd]
    elements.append('--exp_name %s' % exp_name)
    elements.append('--eps %f' % eps)
    elements.append('--pretrain_epochs %d' % epoch)
    elements.append('--exp_id %s' % exp_id)
    
    elements.append('--epochs %d' % 400)
    
    seed = [str(s) for s in seed]
    elements.append('--seed %s' % ' '.join(seed))
    return ' '.join(elements)

def create_commands(env='Ant-v2', algo='vpg', exp_id='test',
                    pretrain=False, during_train=False, 
                    eps=[], pretrain_epochs=[], seeds=[0,10,20]):
    
    base_cmd = 'python run.py --env %s --algo %s ' % (env, algo)
    cmds = []
    base_exp = experiment_name(pretrain, during_train, base=True)
    
    if during_train:
        cmds = cmds + [format_cmd(base_cmd, base_exp, exp_id, seed=seeds)]
    
    base_cmd = 'python run.py --env %s --algo %s --expert_loc %s ' % (env, algo, expert_loc(env))
    
    if pretrain:
        base_cmd = base_cmd + '--explore_pre_training '
    if during_train:
        base_cmd = base_cmd + '--explore_during_training '
    
    if pretrain and not during_train:
        for ep in pretrain_epochs:
            exp_name = experiment_name(pretrain, during_train, epoch=ep)
            cmds = cmds + [format_cmd(base_cmd, exp_name, exp_id, epoch=ep, seed=seeds)]
            
    if during_train and not pretrain:
        for e in eps:
            exp_name = experiment_name(pretrain, during_train, eps=e)
            cmds = cmds + [format_cmd(base_cmd, exp_name, exp_id, eps=e, seed=seeds)] 
    
    if during_train and pretrain:
        for e in eps:
            for ep in pretrain_epochs:
                exp_name = experiment_name(pretrain, during_train, eps=e, epoch=ep)
                cmds = cmds + [format_cmd(base_cmd, exp_name, exp_id, eps=e, epoch=ep, seed=seeds)]
                
    return cmds


In [None]:
import os
import sys
import subprocess
from subprocess import Popen, PIPE
import multiprocessing
import random, time
import datetime

def worker(cmd):
    
    sleep_time = random.random()*50
    time.sleep(sleep_time)
    
    print('running command: %s\n' % cmd)
    p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
    stdout, stderr = p.communicate()
    p.kill()

envs = ['HalfCheetah-v2']
algos = ['vpg']
# eps = [.005, .01, .05, .10, .20, .30]
eps = [0.0005, .001, .0025, .005, .01]
pretrain_epochs = [10, 20, 30]
seeds = [-1, -1, -1, -1]

cmds = []
i = 0
for env in envs:
    for algo in algos:
        
        print(datetime.datetime.now().month)
        print(datetime.datetime.now().day)
        
        eps_id = 'eps_%s_%s_%s_%s' % (datetime.datetime.now().month,
                                     datetime.datetime.now().day,
                                     datetime.datetime.now().hour,
                                     datetime.datetime.now().minute)
        pre_id = 'pretrain_%s_%s_%s_%s' % (datetime.datetime.now().month,
                                     datetime.datetime.now().day,
                                     datetime.datetime.now().hour,
                                     datetime.datetime.now().minute)
        
        cmds = cmds + create_commands(env=env, algo=algo, 
                                      exp_id='epsgreedy_uniform_epoch500',
                                      pretrain=False, during_train=True, 
                                      pretrain_epochs=pretrain_epochs, eps=eps, 
                                      seeds=seeds)
#         cmds = cmds + create_commands(env=env, algo=algo, exp_id=pre_id,
#                                       pretrain=True, during_train=False, 
#                                       pretrain_epochs=pretrain_epochs, eps=eps, 
#                                       seeds=seeds)

num_cmds = len(cmds)
count = int(multiprocessing.cpu_count()/2)
num_processes = num_cmds if num_cmds < count else count
print(num_processes)

pool = multiprocessing.Pool(processes=num_processes)
pool.map(worker, cmds)
pool.close()

4
4
6
running command: python run.py --env HalfCheetah-v2 --algo vpg --expert_loc cheetah/data/reduce4_proper_dims --explore_during_training  --exp_name epsgreedy_eps005 --eps 0.005000 --pretrain_epochs 0 --exp_id epsgreedy_uniform_epoch500 --epochs 400 --seed -1 -1 -1 -1

running command: python run.py --env HalfCheetah-v2 --algo vpg --expert_loc cheetah/data/reduce4_proper_dims --explore_during_training  --exp_name epsgreedy_eps010 --eps 0.010000 --pretrain_epochs 0 --exp_id epsgreedy_uniform_epoch500 --epochs 400 --seed -1 -1 -1 -1

running command: python run.py --env HalfCheetah-v2 --algo vpg --expert_loc cheetah/data/reduce4_proper_dims --explore_during_training  --exp_name epsgreedy_eps002 --eps 0.002500 --pretrain_epochs 0 --exp_id epsgreedy_uniform_epoch500 --epochs 400 --seed -1 -1 -1 -1

running command: python run.py --env HalfCheetah-v2 --algo vpg  --exp_name epsgreedy_base --eps 0.000000 --pretrain_epochs 0 --exp_id epsgreedy_uniform_epoch500 --epochs 400 --seed -1 -1 -1 