In [13]:
import subprocess
import json

from utils.iter import generate_combinations

In [49]:
def gen_sbatch(config_string):
    sbatch_string = f"""#!/bin/bash

    #SBATCH --nodes=1
    #SBATCH --ntasks-per-node=1
    #SBATCH --cpus-per-task=8
    #SBATCH --time=0:59:00
    #SBATCH --mem=32GB
    #SBATCH --gres=gpu:1
    #SBATCH --job-name=diffeo
    #SBATCH --mail-type=FAIL
    #SBATCH --mail-user=cm6627@nyu.edu
    #SBATCH --output=./slurm/slurm_%j.out
    #SBATCH --error=./slurm/slurm_%j.err
    
    module purge
    
    singularity exec --nv \
      --overlay /scratch/cm6627/diffeo_cnn/my_env/overlay-15GB-500K.ext3:ro \
      --overlay /vast/work/public/ml-datasets/imagenet/imagenet-val.sqf:ro \
      --overlay /vast/work/public/ml-datasets/imagenet/imagenet-test.sqf:ro \
      --overlay /vast/work/public/ml-datasets/imagenet/imagenet-train.sqf:ro \
      /scratch/work/public/singularity/cuda12.3.2-cudnn9.0.0-ubuntu-22.04.4.sif \
      /bin/bash -c "source /ext3/env.sh; python fit.py '{config_string}'" """
    return sbatch_string

def submit(config):
    config_string = str(json.dumps(config)).replace('"', '\\"')
    sbatch_string = gen_sbatch(config_string)

    with open("submit.sbatch", "w") as f:
        f.write(sbatch_string)
    
    subprocess.run(["sbatch ./submit.sbatch"], shell=True)

In [53]:
config = {
    "batch_size": [1,16,64],
    "epochs": 1500,
    "model": {
        "name": "MLP_v2",
        "dropout_rate": [0.2, 0.02, 0.002],
    },
    "optimizer": {
        "name": "SGD",
        "lr": 1e-4,
        "momentum": [0.5, 0.7, 0.9, 0.95, 0.99]
    },
    "scheduler": {
        "name": "ReduceLROnPlateau",
        "factor": [0.1, 0.05, 0.01],
        "patience": [10, 50, 100, 250],
    },
    "dataset": "resnet18_imagenet1k_train"
}

for config in generate_combinations(config):
    submit(config)

Submitted batch job 57062113
Submitted batch job 57062114
Submitted batch job 57062115
Submitted batch job 57062116
Submitted batch job 57062117
Submitted batch job 57062118
Submitted batch job 57062119
Submitted batch job 57062120
Submitted batch job 57062121
Submitted batch job 57062122
Submitted batch job 57062123
Submitted batch job 57062124
Submitted batch job 57062125
Submitted batch job 57062126
Submitted batch job 57062127
Submitted batch job 57062128
Submitted batch job 57062129
Submitted batch job 57062130
Submitted batch job 57062131
Submitted batch job 57062132
Submitted batch job 57062133
Submitted batch job 57062134
Submitted batch job 57062135
Submitted batch job 57062136
Submitted batch job 57062137
Submitted batch job 57062138
Submitted batch job 57062139
Submitted batch job 57062140
Submitted batch job 57062141
Submitted batch job 57062142
Submitted batch job 57062143
Submitted batch job 57062144
Submitted batch job 57062145
Submitted batch job 57062146
Submitted batc