In [2]:
import subprocess
import json

from utils.iter import generate_combinations

In [3]:
def gen_sbatch(config_string):
    sbatch_string = f"""#!/bin/bash

    #SBATCH --nodes=1
    #SBATCH --ntasks-per-node=1
    #SBATCH --cpus-per-task=8
    #SBATCH --time=0:59:00
    #SBATCH --mem=32GB
    #SBATCH --gres=gpu:1
    #SBATCH --job-name=diffeo
    #SBATCH --mail-type=FAIL
    #SBATCH --mail-user=cm6627@nyu.edu
    #SBATCH --output=./slurm/slurm_%j.out
    #SBATCH --error=./slurm/slurm_%j.err
    
    module purge
    
    singularity exec --nv \
      --overlay /scratch/cm6627/diffeo_cnn/my_env/overlay-15GB-500K.ext3:ro \
      --overlay /vast/work/public/ml-datasets/imagenet/imagenet-val.sqf:ro \
      --overlay /vast/work/public/ml-datasets/imagenet/imagenet-test.sqf:ro \
      --overlay /vast/work/public/ml-datasets/imagenet/imagenet-train.sqf:ro \
      /scratch/work/public/singularity/cuda12.3.2-cudnn9.0.0-ubuntu-22.04.4.sif \
      /bin/bash -c "source /ext3/env.sh; python fit.py '{config_string}'" """
    return sbatch_string

def submit(config):
    config_string = str(json.dumps(config)).replace('"', '\\"')
    sbatch_string = gen_sbatch(config_string)

    with open("submit.sbatch", "w") as f:
        f.write(sbatch_string)
    
    subprocess.run(["sbatch ./submit.sbatch"], shell=True)

In [9]:
!squeue -u cm6627

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
          57190391 short,cs, submit.s   cm6627 PD       0:00      1 (Priority)


In [10]:
config = {
    "batch_size": [1,16, 32, 64],
    "epochs": 1500,
    "model": {
        "name": "MLP_v1",
    },
    "optimizer": {
        "name": "Adam",
        "lr": [1e-2, 1e-3, 1e-4, 1e-5],
    },
    "scheduler": {
        "name": "None",
    },
    "dataset": "resnet18_layer13_imagenet1ktrain_goldfishonly"
}

for config in generate_combinations(config):
    submit(config)

Submitted batch job 57190396
Submitted batch job 57190397
Submitted batch job 57190398
Submitted batch job 57190399
Submitted batch job 57190400
Submitted batch job 57190401
Submitted batch job 57190402
Submitted batch job 57190403
Submitted batch job 57190404
Submitted batch job 57190405
Submitted batch job 57190406
Submitted batch job 57190407
Submitted batch job 57190408
Submitted batch job 57190409
Submitted batch job 57190410
Submitted batch job 57190411
