In [1]:
import os
import itertools

In [2]:
def mkdir(dir):
    if not os.path.exists(dir):
        os.mkdir(dir)

In [3]:
# global job parameters

job_directory = f"math"
out_dir = 'math/.out'
time_str = '00-48:00:00'
partition = 'gpu'
ntasks = 1
nodes = 1
cpu_per_task = 8
mem_per_cpu = 2
n_gpus = 1
gpus_constraints = '"rtx3090|v100|a100|rtx2080ti"'
project_dir = "/home/ma2393/Documents/relational/experiments/math"


mkdir(job_directory)
mkdir(out_dir)

In [4]:
# define params of individual jobs

models = ['transformer', 'relational_abstractor', 'relational_abstractor2']
tasks = ['polynomials__add']
model_sizes = ['medium']
n_epochs = [50]
train_sizes = [-1]

jobs_params = []

for model, task, model_size, n_epochs, train_size in itertools.product(models, tasks, model_sizes, n_epochs, train_sizes):
    jobs_params.append(dict(model=model, task=task, model_size=model_size, n_epochs=n_epochs, train_size=train_size))

In [5]:
jobs_params

[{'model': 'transformer',
  'task': 'polynomials__add',
  'model_size': 'medium',
  'n_epochs': 50,
  'train_size': -1},
 {'model': 'relational_abstractor',
  'task': 'polynomials__add',
  'model_size': 'medium',
  'n_epochs': 50,
  'train_size': -1}]

In [6]:
# create jobs
created_jobs = []
for params in jobs_params:

    job_name = f"math-{params['task']}-{params['model']}-{params['model_size']}"

    job_file = os.path.join(job_directory, f"{job_name}.job")

    with open(job_file, 'w') as fh:
        fh.writelines(f"#!/bin/bash\n")
        fh.writelines(f"#SBATCH --partition={partition}\n")
        fh.writelines(f"#SBATCH --job-name={job_name}\n")
        fh.writelines(f"#SBATCH --output={out_dir}/{job_name}-%j.out\n")
        fh.writelines(f"#SBATCH --ntasks={ntasks} --nodes={nodes}\n")
        fh.writelines(f"#SBATCH --cpus-per-task={cpu_per_task}\n")
        fh.writelines(f"#SBATCH --mem-per-cpu={mem_per_cpu}G\n")
        fh.writelines(f"#SBATCH --time={time_str}\n")
        fh.writelines(f"#SBATCH --mail-type=ALL\n")
        fh.writelines(f"#SBATCH -C {gpus_constraints} --gpus={n_gpus}\n")

        fh.writelines(f"cd {project_dir}\n") # navigate to project directory
        fh.writelines(f"module restore python_env\n") # load modules i need
        fh.writelines(f"conda activate tf\n") # activate conda environment

        fh.writelines(f"nvidia-smi -L\n") # print gpu information

        # run python script
        fh.writelines(f"python train_model.py --model '{params['model']}' --task '{params['task']}' --model_size '{params['model_size']}' ")
        fh.writelines(f"--n_epochs {params['n_epochs']} --train_size {params['train_size']}\n")


    created_jobs.append(job_file)


In [7]:
# run jobs
input('confirm that you would like to run those jobs')
for job in created_jobs:
    os.system(f"sbatch {job}")
    print(f'submitted {job}')

Submitted batch job 24855268
submitted math/math-polynomials__add-transformer-medium.job
Submitted batch job 24855269
submitted math/math-polynomials__add-relational_abstractor-medium.job
