In [3]:
import configparser
from itertools import product
from copy import deepcopy

def generate_config_combinations(config_options:dict)->configparser.ConfigParser:
    # Separate sections and values for each option
    sections = config_options.keys()
    options = {section: [dict(zip(config_options[section], values))
                         for values in product(*config_options[section].values())]
               for section in sections}

    # Generate Cartesian product of options across all sections
    all_combinations = product(*options.values())
    configs = []

    # Create a ConfigParser object for each combination
    for combination in all_combinations:
        config = configparser.ConfigParser()
        for section, option_dict in zip(sections, combination):
            config[section] = {key: str(value) for key, value in option_dict.items()}
        configs.append(deepcopy(config))

    return configs

In [4]:
def create_directory_for_config(base_dir:str, config_options:dict)->str:
    # Generate a unique subdirectory name based on the hyperparameters
    sub_dir_name = "_".join(f"{key}={value}" for key, value in config.items())
    dir_path = os.path.join(base_dir, sub_dir_name)

    # Create the directory if it doesn't exist
    os.makedirs(dir_path, exist_ok=True)
    return dir_path

In [5]:
def update_configparser(base_config:configparser.ConfigParser, update_config:configparser.ConfigParser):
    # Iterate over all sections in the update_config
    for section in update_config.sections():
        # If the section does not exist in base_config, add it
        if not base_config.has_section(section):
            base_config.add_section(section)
        
        # Iterate over all options in the section and update base_config
        for key, value in update_config.items(section):
            base_config.set(section, key, value)

In [6]:
def submit_sbatch(batch_dict,nsleep = 0.05):
        with open('./submit_batch.ini', 'r') as file:
            fstring = file.read()
        for key in batch_dict:
            fstring = fstring.replace(key, batch_dict[key])
        with open('./submit_batch.sbatch', 'w') as file:
            file.write(fstring)
        subprocess.run(["sbatch", "submit_batch.sbatch"])
        time.sleep(nsleep)
        subprocess.run(["rm", "submit_batch.sbatch"])

In [58]:
import os
import copy
class JobManager:
    def __init__(self,default_config_path:str):
        config = configparser.ConfigParser()
        config.read(default_config_path)
        self.base_config = config 
        self.batch_dict = {"NUM_NODES":"1",
                           "GPUS_PER_NODE":"1",
                           "CPUS_PER_NODE":"1",
                           "GRES":"gpu:rtx8000:1",
                           "CONDA_ENV":"dl_env",
                           "TIME":"100:00:00",
                           "MEM_PER_NODE":"1GB",
                           "PYTHON_EXE":"../main.py",
                           "PY_ARGS":"input_dir_path",
                            }
        self.default_comp_res = True
    def print_config(self,config:configparser.ConfigParser):
        for section in config.sections():
            print(f"[{section}]")
            for key, value in config[section].items():
                print(f"{key} = {value}")
            print()
    def set_computation_resource(self,num_nodes:int,gpus_per_node:int,cpus_per_node:int,gres:str = "gpu:rtx8000:1"):
        # update the base config
        self.base_config.set("INFO","num_nodes",str(num_nodes))
        self.base_config.set("INFO","gpus_per_nodes",str(gpus_per_node))
        self.base_config.set("INFO","cpus_per_nodes",str(cpus_per_node))
        # update the batch options
        self.batch_dict["NUM_NODES"] = str(num_nodes)
        self.batch_dict["GPUS_PER_NODE"] = str(gpus_per_node)
        self.batch_dict["CPUS_NUM_NODES"] = str(cpus_per_node)
        self.default_comp_res = False
    
    def generate_config_combinations(self,config_options:dict)->configparser.ConfigParser:
        # Separate sections and values for each option
        sections = config_options.keys()
        options = {section: [dict(zip(config_options[section], values))
                         for values in product(*config_options[section].values())]
               for section in sections}

        # Generate Cartesian product of options across all sections
        all_combinations = product(*options.values())
        configs = []

        # Create a ConfigParser object for each combination
        for combination in all_combinations:
            config = configparser.ConfigParser()
            for section, option_dict in zip(sections, combination):
                config[section] = {key: str(value) for key, value in option_dict.items()}
            configs.append(deepcopy(config))
        return configs

    def create_directory_from_config(self,base_dir:str, config:configparser.ConfigParser,suffix:str = "")->str:
        # Generate a unique subdirectory name based on the hyperparameters
        folder_name_parts = []
        for section, options in config.items():
            if section == "DEFAULT":
                continue
            folder_name_parts.append(f"#{section}#")
            for key, value in options.items():
                folder_name_parts.append(f"{key}-{value}")
        folder_name = "-".join(folder_name_parts)
        
        # Join all parts to form the folder name
        folder_name = "-".join(folder_name_parts) + suffix

        # Create the directory using Python
        dir_path = os.path.join(base_dir,folder_name)
        # Create the directory if it doesn't exist
        os.makedirs(dir_path, exist_ok=True)
        print(dir_path)
        return dir_path
    def update_configparser(self,base_config:configparser.ConfigParser, update_config:configparser.ConfigParser):
        # Iterate over all sections in the update_config
        for section in update_config.sections():
            # Iterate over all options in the section and update base_config
            for key, value in update_config.items(section):
                base_config.set(section, key, value) 
    
    def write_config(self,file_path,config):
        with open(file_path, 'w') as configfile:
            config.write(configfile)
    
    def create_sbatch_file(self,batch_dict):
        with open('./submit_batch.ini', 'r') as file:
            fstring = file.read()
        for key in batch_dict:
            fstring = fstring.replace(key, batch_dict[key])
        with open('./submit_batch.sbatch', 'w') as file:
            file.write(fstring)
    
    def submit_sbatch(self,nsleep = 0.05):
        subprocess.run(["sbatch", "submit_batch.sbatch"])
        time.sleep(nsleep)
        subprocess.run(["rm", "submit_batch.sbatch"])
    
    def submit(self,base_dir:str,config_dict:dict,batch_dict:dict,n_repeat:int=1):
        if self.default_comp_res:
            print("The default computional resorcue setup is applied, use set_computation_resource() to reset if needed!")
        configs = self.generate_config_combinations(config_dict)
        print("there are {} configs in total".format(len(configs)))
        os.makedirs(base_dir, exist_ok=True)
        for i in range(n_repeat):
            suffix = "-run-" + f"{i:02}"
            for config in configs:
                dir_path = self.create_directory_from_config(base_dir,config,suffix)
                base_config = copy.deepcopy(self.base_config)
                self.update_configparser(base_config,config)
                self.write_config(os.path.join(dir_path,"config.ini"),base_config)
                base_batch_dict = copy.deepcopy(self.batch_dict)
                base_batch_dict["PY_ARGS"] = dir_path
                base_batch_dict.update(batch_dict)
                self.create_sbatch_file(base_batch_dict)
                #self.submit_sbatch()

In [59]:
j = JobManager("../default_config_cifar10.ini")
options = {"SSL":{"lr":[0.1,1.0],"batch_size":[10,20]},
           "LC":{"lr":[0.5,0.6],"ouptut_dim":[30,40]}}

In [60]:
j.submit("./simulations",options,{"TIME":"1:00:00"})

The default computional resorcue setup is applied, use set_computation_resource() to reset if needed!
there are 16 configs in total
./simulations/#SSL#-lr-0.1-batch_size-10-#LC#-lr-0.5-ouptut_dim-30-run-00
./simulations/#SSL#-lr-0.1-batch_size-10-#LC#-lr-0.5-ouptut_dim-40-run-00
./simulations/#SSL#-lr-0.1-batch_size-10-#LC#-lr-0.6-ouptut_dim-30-run-00
./simulations/#SSL#-lr-0.1-batch_size-10-#LC#-lr-0.6-ouptut_dim-40-run-00
./simulations/#SSL#-lr-0.1-batch_size-20-#LC#-lr-0.5-ouptut_dim-30-run-00
./simulations/#SSL#-lr-0.1-batch_size-20-#LC#-lr-0.5-ouptut_dim-40-run-00
./simulations/#SSL#-lr-0.1-batch_size-20-#LC#-lr-0.6-ouptut_dim-30-run-00
./simulations/#SSL#-lr-0.1-batch_size-20-#LC#-lr-0.6-ouptut_dim-40-run-00
./simulations/#SSL#-lr-1.0-batch_size-10-#LC#-lr-0.5-ouptut_dim-30-run-00
./simulations/#SSL#-lr-1.0-batch_size-10-#LC#-lr-0.5-ouptut_dim-40-run-00
./simulations/#SSL#-lr-1.0-batch_size-10-#LC#-lr-0.6-ouptut_dim-30-run-00
./simulations/#SSL#-lr-1.0-batch_size-10-#LC#-lr-0.6-o

In [61]:
import torch

if torch.cuda.is_available():
    print(torch.version.cuda)

12.4
