In [2]:
import subprocess
import os

In [35]:
# Define the directory for single ring experiments and the number of training runs
single_folder = '../pareto/single_ring'
single_num_runs = 3  # Number of training runs for each experiment

def single_ring_train_script_text(
        output_log='marcus_output-%j.log',
        run_dir='.', 
        worker_kwargs=[{'circumference': 1000}], 
        n_workers=45,
        n_rollouts_per_step=45,
        warmup_steps=2000,
        skip_stat_steps=5000,
        horizon=5000,
        global_reward=True,
        n_steps=50,
        alg='TRPO',
        use_critic=False,
        gamma=0.9995,
        beta=0,
        scale_ttc=1,
        scale_drac=1,
        seed_np=False,
        seed_torch=False,
        residual_transfer=False,
        mrtl=False,
        handcraft=False,
        step_save=False,
        lr=1e-4,
        wb=False,
        tb=False
    ):
    """
    Generate a script text for training on a single ring road environment.
    The script is formatted to be runnable on the supercloud.

    Args:
        output_log (str): Output log filename pattern.
        run_dir (str): The directory where the script will be executed.
        worker_kwargs (list): List of dictionaries containing worker-specific parameters.
        n_workers (int): Number of worker processes.
        n_rollouts_per_step (int): Number of rollouts per training step.
        warmup_steps (int): Number of warmup steps before training.
        skip_stat_steps (int): Number of steps to skip when collecting statistics.
        horizon (int): Length of each rollout.
        global_reward (bool): Whether to use global reward in training.
        n_steps (int): Total number of training steps.
        alg (str): The reinforcement learning algorithm to use.
        use_critic (bool): Whether to use a critic in the algorithm.
        gamma (float): Discount factor for future rewards.
        beta (float): Coefficient for regularization terms.
        scale_ttc (float): Scaling factor for Time-To-Collision.
        scale_drac (float): Scaling factor for deceleration rate.
        seed_np (bool): Whether to seed NumPy random number generator.
        seed_torch (bool): Whether to seed PyTorch random number generator.
        residual_transfer (bool): Whether to use residual transfer learning.
        mrtl (bool): Whether to use multi-round transfer learning.
        handcraft (bool): Whether to use handcrafted features.
        step_save (bool): Whether to save the model at each step.
        lr (float): Learning rate for the optimizer.
        wb (bool): Whether to use Weights & Biases for logging.
        tb (bool): Whether to use TensorBoard for logging.

    Returns:
        str: The formatted script text ready to be submitted to the supercloud.
    """
    # Format the script text with the provided arguments
    script_text = (f'''#!/bin/sh

#SBATCH -o {output_log}
#SBATCH --time=72:00:00          # Total run time limit (HH:MM:SS)
#SBATCH -c {n_workers}

'''
    f'''python $F/ring.py {run_dir} "worker_kwargs={worker_kwargs}" "n_workers={n_workers}" "n_rollouts_per_step={n_rollouts_per_step}" "warmup_steps={warmup_steps}"'''
    f''' "skip_stat_steps={skip_stat_steps}" "horizon={horizon}" "global_reward={global_reward}" "n_steps={n_steps}" "alg='{alg}'" "use_critic={use_critic}" "gamma={gamma}" "beta={beta}"'''
    f''' "scale_ttc={scale_ttc}" "scale_drac={scale_drac}" "seed_np={seed_np}" "seed_torch={seed_torch}" "residual_transfer={residual_transfer}" "mrtl={mrtl}" "handcraft={handcraft}" "step_save={step_save}" "lr={lr}" "wb={wb}" "tb={tb}" '''
                      )
    
    return script_text



In [46]:
def create_file_with_contents(file_path, contents):
    """
    Create a file at the specified path and write the provided contents to it.

    Args:
        file_path (str): The path to the file to be created.
        contents (str): The content to write into the file.
    """
    try:
        print(file_path)
        with open(file_path, 'w') as file:
            file.write(contents)
    except Exception as e:
        print(f"Error: {e}")

def submit_job(script_path):
    """
    Submit a job to the job scheduler using the LLsub command.

    Args:
        script_path (str): The path to the script to be submitted.

    Returns:
        tuple: The standard output and standard error from the submission command.
    """
    output = subprocess.run(f'LLsub {script_path}', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    print(output)
    return output.stdout, output.stderr

In [53]:
random_seed = 5

# Define the directory for storing results from seeded single ring experiments
single_ring_seed_dir = f'{single_folder}/seeding2'
cur_dir = single_ring_seed_dir

output_logs = []
output_log_name = f'{single_folder}/train1/train.log'
beta = 0.0 #optimize performance

train_script_text = single_ring_train_script_text(output_log=output_log_name, run_dir=cur_dir, beta=0.0, n_steps=400, seed_torch=random_seed, seed_np=random_seed)
train_script_path = f'{cur_dir}/train1.sh'
create_file_with_contents(train_script_path, train_script_text)

../pareto/single_ring/seeding2/train1.sh


In [54]:
out, err = submit_job(train_script_path)
output_logs.append(output_log_name)

CompletedProcess(args='LLsub ../pareto/single_ring/seeding2/train1.sh', returncode=127, stdout='', stderr='/bin/sh: LLsub: command not found\n')


In [55]:
print(output_logs)

['../pareto/single_ring/train1/train.log']
