In [1]:
def expert_loc(env):
    if env == 'Ant-v2':
        expert_loc = 'ant/data/expert_fully_corrective'
    elif env == 'Humanoid-v2':
        expert_loc = 'humanoid/data/expert_fully_corrective'
    return expert_loc

def experiment_name(pretrain=False, during_train=False, eps=0, epoch=0, base=False):
    
    base_name = []
    if pretrain and not during_train:
        base_name.append('pretrain')
    elif during_train and not pretrain:
        base_name.append('epsgreedy')
    elif during_train and pretrain:
        base_name.append('both')
    
    if base:
        base_name.append('base')
    else:
        if pretrain and epoch > 0:
            base_name.append('epochs%02d' % epoch)
        if during_train and eps > 0:
            base_name.append('eps%02d' % (eps*100))
    
    return '_'.join(base_name)

def format_cmd(base_cmd, exp_name, eps=0.0, epoch=0, seed=-1):
    elements = [base_cmd]
    elements.append('--exp_name=%s' % exp_name)
    elements.append('--eps=%f' % eps)
    elements.append('--pretrain_epochs=%d' % epoch)
    elements.append('--seed=%d' % seed)
    return ' '.join(elements)

def create_commands(env='Ant-v2', algo='vpg', 
                    pretrain=False, during_train=False, 
                    eps=[], pretrain_epochs=[], seeds=[0,10,20]):
    
    base_cmd = 'python run.py --env=%s --algo=%s ' % (env, algo)
    cmds = []
    base_exp = experiment_name(pretrain, during_train, base=True)
    cmds = cmds + [format_cmd(base_cmd, base_exp, seed=s) for s in seeds]
    
    base_cmd = 'python run.py --env=%s --algo=%s --expert_loc=%s ' % (env, algo, expert_loc(env))
    
    if pretrain:
        base_cmd = base_cmd + '--explore_pre_training '
    if during_train:
        base_cmd = base_cmd + '--explore_during_training '
    
    if pretrain and not during_train:
        for ep in pretrain_epochs:
            exp_name = experiment_name(pretrain, during_train, epoch=ep)
            cmds = cmds + [format_cmd(base_cmd, exp_name, epoch=ep, seed=s) for s in seeds]
            
    if during_train and not pretrain:
        for e in eps:
            exp_name = experiment_name(pretrain, during_train, eps=e)
            cmds = cmds + [format_cmd(base_cmd, exp_name, eps=e, seed=s) for s in seeds]   
    
    if during_train and pretrain:
        for e in eps:
            for ep in pretrain_epochs:
                exp_name = experiment_name(pretrain, during_train, eps=e, epoch=ep)
                cmds = cmds + [format_cmd(base_cmd, exp_name, eps=e, epoch=ep, seed=s) for s in seeds]
                
    return cmds


In [None]:
import os
import sys
import subprocess
from subprocess import Popen, PIPE
import multiprocessing
import random, time

def worker(cmd):
    
    sleep_time = random.random()*50
    time.sleep(sleep_time)
    
    print('running command: %s' % cmd)
    p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
    stdout, stderr = p.communicate()
    p.kill()

envs = ['Humanoid-v2', 'Ant-v2']
algos = ['ddpg']
eps = [.01, .05, .10, .20, .30]
pretrain_epochs = [5, 10, 20, 30]
seeds = [0, 10, 20]

cmds = []
for env in envs:
    for algo in algos:
        cmds = cmds + create_commands(env=env, algo=algo, 
                                      pretrain=False, during_train=True, 
                                      pretrain_epochs=pretrain_epochs, eps=eps, 
                                      seeds=seeds)

num_cmds = len(cmds)
count = int(multiprocessing.cpu_count()/2)
num_processes = num_cmds if num_cmds < count else count
print(num_processes)

pool = multiprocessing.Pool(processes=num_processes)
pool.map(worker, cmds)
pool.close()

36
running command: python run.py --env=Ant-v2 --algo=ddpg --expert_loc=ant/data/expert_fully_corrective --explore_during_training  --exp_name=epsgreedy_eps20 --eps=0.200000 --pretrain_epochs=0 --seed=0
running command: python run.py --env=Ant-v2 --algo=ddpg  --exp_name=epsgreedy_base --eps=0.000000 --pretrain_epochs=0 --seed=20
running command: python run.py --env=Humanoid-v2 --algo=ddpg --expert_loc=humanoid/data/expert_fully_corrective --explore_during_training  --exp_name=epsgreedy_eps30 --eps=0.300000 --pretrain_epochs=0 --seed=20
running command: python run.py --env=Ant-v2 --algo=ddpg --expert_loc=ant/data/expert_fully_corrective --explore_during_training  --exp_name=epsgreedy_eps01 --eps=0.010000 --pretrain_epochs=0 --seed=10
running command: python run.py --env=Humanoid-v2 --algo=ddpg --expert_loc=humanoid/data/expert_fully_corrective --explore_during_training  --exp_name=epsgreedy_eps05 --eps=0.050000 --pretrain_epochs=0 --seed=10
running command: python run.py --env=Ant-v2 --