In [1]:
%load_ext autoreload
%autoreload 2

import os, sys
import pandas as pd

sys.path.append(os.path.join(os.getcwd(), '../symlie'))
from misc.utils_arrays import write_lines, read_lines, clean_val, dict_to_array

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
job_dir = '../jobs'
array_file = os.path.join(job_dir, 'arrays.csv')
array_dir  = os.path.join(job_dir, 'arrays')
job_array_dir = os.path.join(job_dir, 'job_arrays')

In [3]:
def main(job_dir, array_file, array_dir):
    df = pd.read_csv(array_file, dtype=object)
    df = df.set_index('experiment')
    df.head()

    skip_keys = ['experiment', 'tags']

    df_listed = pd.DataFrame({key : vals.apply(lambda val: clean_val(val)) for key, vals in df.items() if key not in skip_keys})
    df_listed['tags'] = df['tags'].apply(lambda x: [''.join(x.split(','))])
    # select only data_kwargs and transform_kwargs
    # df = df[['y_high', 'y_low', 'noise_std', 'grid_size', 'eps_mult', 'data_dir']]

    for experiment, hparams in df_listed.iterrows():
        output_file = os.path.join(array_dir, experiment + '.txt')
        output_lines = dict_to_array(hparams.dropna().to_dict())
        
        n_runs = output_lines.count('\n') + 1
        print(f"Writing {experiment} with {n_runs} lines")
        
        write_lines(output_file, output_lines)

main(job_dir, array_file, array_dir)

Writing sine1d with 9 lines
Writing sine1d-predict with 81 lines
Writing sine2d with 18 lines
Writing sine2d-predict with 108 lines
Writing flower with 36 lines
Writing flower-predict with 216 lines
Writing mnist with 18 lines
Writing mnist-predict with 108 lines
Writing sine1d-task with 18 lines


In [4]:
def get_template(n_jobs: int, array: str, time: str = "01:00:00", n_sims = 1, add_kwargs: str = ''): 
    return f"""\
#!/bin/bash

#SBATCH --partition=gpu
#SBATCH --gpus=1
#SBATCH --job-name={array}
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=18
#SBATCH --time={time}
#SBATCH --array=1-{n_jobs}%{n_sims}
#SBATCH --output=../slurm_output/{array}_%A_%a.out

array="{array}"

ARRAY_FILE=$HOME/thesis/SymPDE/jobs/arrays/$array.txt
cd $HOME/thesis/SymPDE/symlie

module purge
module load 2022
module load Anaconda3/2022.05
source activate sympde

srun python -u run.py \\
    --num_workers 18 {add_kwargs} \\
    $(head -$SLURM_ARRAY_TASK_ID $ARRAY_FILE | tail -1)
"""

# arrays = ['sine1d', 'sine2d', 'flower', 'mnist']
arrays = ['sine1d-predict', 'sine2d-predict', 'mnist-predict', 'flower-predict']
arrays = ['mnist-predict']
arrays = ['sine1d-task']


generate_data = "--generate_data --tags dev"
dryrun = "--net TrainP --tags dev --max_epochs 1 --n_test 10 --n_val 10 --n_train 10"

# add_kwargs, time = dryrun, "00:10:00"
# add_kwargs, time = generate_data, "00:10:00"
add_kwargs, time = '', "01:00:00"

[os.remove(os.path.join(job_array_dir, file)) for file in os.listdir(job_array_dir)]
for array in arrays:
    lines = read_lines(os.path.join(array_dir, f'{array}.txt'))
    n_jobs = len(lines)
    n_sims = min(10, n_jobs)

    bash_script = get_template(n_jobs = n_jobs, array = array, n_sims = n_sims, add_kwargs = add_kwargs)

    write_lines(os.path.join(job_array_dir, f'{array}.job'), [bash_script])