In [13]:
import subprocess
import json

from utils.iter import generate_combinations

In [20]:
def gen_sbatch(config_string):
    sbatch_string = f"""#!/bin/bash

    #SBATCH --nodes=1
    #SBATCH --ntasks-per-node=1
    #SBATCH --cpus-per-task=8
    #SBATCH --time=0:40:00
    #SBATCH --mem=32GB
    #SBATCH --gres=gpu:1
    #SBATCH --job-name=diffeo
    #SBATCH --mail-type=FAIL
    #SBATCH --mail-user=cm6627@nyu.edu
    #SBATCH --output=./slurm/slurm_%j.out
    #SBATCH --error=./slurm/slurm_%j.err
    
    module purge
    
    singularity exec --nv \
      --overlay /scratch/cm6627/diffeo_cnn/my_env/overlay-15GB-500K.ext3:ro \
      --overlay /vast/work/public/ml-datasets/imagenet/imagenet-val.sqf:ro \
      --overlay /vast/work/public/ml-datasets/imagenet/imagenet-test.sqf:ro \
      --overlay /vast/work/public/ml-datasets/imagenet/imagenet-train.sqf:ro \
      /scratch/work/public/singularity/cuda12.3.2-cudnn9.0.0-ubuntu-22.04.4.sif \
      /bin/bash -c "source /ext3/env.sh; python fit.py '{config_string}'" """
    return sbatch_string

def submit(config):
    config_string = str(json.dumps(config)).replace('"', '\\"')
    sbatch_string = gen_sbatch(config_string)

    with open("submit.sbatch", "w") as f:
        f.write(sbatch_string)
    
    subprocess.run(["sbatch ./submit.sbatch"], shell=True)

In [24]:
config = {
    "dropout_rate": [0.2, 0.4, 0.6],
    "batch_size": [16, 32],
    "epochs": 1500,
    "optimizer": {
        "name": "AdamW",
        "lr": [1e-3, 1e-4],
        "weight_decay": [0.02, 0.02, 2.0]
    },
    "scheduler": {
        "name": "ReduceLROnPlateau",
        "factor": 0.1,
    },
    "dataset": "resnet18_imagenet1k_train"
}

for config in generate_combinations(config):
    submit(config)

Submitted batch job 57049229
Submitted batch job 57049230
Submitted batch job 57049231
Submitted batch job 57049232
Submitted batch job 57049233
Submitted batch job 57049234
Submitted batch job 57049235
Submitted batch job 57049236
Submitted batch job 57049237
Submitted batch job 57049238
Submitted batch job 57049239
Submitted batch job 57049240
Submitted batch job 57049241
Submitted batch job 57049242
Submitted batch job 57049243
Submitted batch job 57049244
Submitted batch job 57049245
Submitted batch job 57049246
Submitted batch job 57049247
Submitted batch job 57049248
Submitted batch job 57049249
Submitted batch job 57049250
Submitted batch job 57049251
Submitted batch job 57049252
Submitted batch job 57049253
Submitted batch job 57049254
Submitted batch job 57049255
Submitted batch job 57049256
Submitted batch job 57049257
Submitted batch job 57049258
Submitted batch job 57049259
Submitted batch job 57049260
Submitted batch job 57049261
Submitted batch job 57049262
Submitted batc

 data	  slurm			  submit.sbatch   utils
 fit.py  'submission GUI.ipynb'   train		  wandb
