In [122]:
%load_ext autoreload
%autoreload 2

import os, sys
import pandas as pd

sys.path.append(os.path.join(os.getcwd(), '../symlie'))
sys.path.append(os.path.join(os.getcwd(), '..'))
from symlie.misc.utils_arrays import write_lines, read_lines, clean_val, dict_to_array

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [123]:
job_dir = '../jobs'
array_file = os.path.join(job_dir, 'arrays.csv')
array_dir  = os.path.join(job_dir, 'arrays')
job_array_dir = os.path.join(job_dir, 'job_arrays')

In [124]:
df = pd.read_csv(array_file, dtype=object)
df = df.set_index('experiment')
df.head()

skip_keys = ['experiment', 'tags']

df_listed = pd.DataFrame({key : vals.apply(lambda val: clean_val(val)) for key, vals in df.items() if key not in skip_keys})

df_listed.loc['combi'][['implicit_layer_dims']]#.iloc[0]

implicit_layer_dims    [[[49 49 49 49] [7 7 7]]]
Name: combi, dtype: object

In [125]:
def main(job_dir, array_file, array_dir):
    df = pd.read_csv(array_file, dtype=object)
    df = df.set_index('experiment')
    df.head()

    skip_keys = ['experiment', 'tags']

    df_listed = pd.DataFrame({key : vals.apply(lambda val: clean_val(val)) for key, vals in df.items() if key not in skip_keys})
    df_listed['tags'] = df['tags'].apply(lambda x: [''.join(x.split(','))])
    # select only data_kwargs and transform_kwargs
    # df = df[['y_high', 'y_low', 'noise_std', 'grid_size', 'eps_mult', 'data_dir']]

    for experiment, hparams in df_listed.iterrows():
        output_file = os.path.join(array_dir, experiment + '.txt')
        output_lines = dict_to_array(hparams.dropna().to_dict())
        
        n_runs = output_lines.count('\n') + 1
        print(f"Writing {experiment} with {n_runs} lines")
        
        write_lines(output_file, output_lines)

main(job_dir, array_file, array_dir)

Writing sine1d with 9 lines
Writing sine1d-predict with 81 lines
Writing sine2d with 18 lines
Writing sine2d-predict with 108 lines
Writing flower with 36 lines
Writing flower-predict with 216 lines
Writing mnist with 18 lines
Writing mnist-predict with 108 lines
Writing sine1d-task with 18 lines
Writing noise1d with 36 lines
Writing noise2d with 18 lines
Writing sine1dtwo-hparam-predict with 72 lines
Writing sine1dlayers-predict with 24 lines
Writing sine1dmtr-predict with 24 lines
Writing mnist-noise-predict with 24 lines
Writing mnist-noise-predict with 27 lines
Writing sine1d-noise-predict with 72 lines
Writing sine1d-noise-svd-predict with 81 lines
Writing noise1d-loss with 54 lines
grid_sizes [[[1 7] [1 7]] [[1 1] [1 7]]]
implicit_layer_dims [[49 49 49 49] [7 7 7]]
Writing combi with 81 lines
implicit_layer_dims [[0] [0]]
Writing combi-vanilla with 27 lines


In [126]:
def get_template(n_jobs: int, array: str, time: str = "01:00:00", n_sims = 1, partition: str = 'gpu', add_kwargs: str = ''): 
    return f"""\
#!/bin/bash

#SBATCH --partition={partition}
#SBATCH --gpus=1
#SBATCH --job-name={array}
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=18
#SBATCH --time={time}
#SBATCH --array=1-{n_jobs}%{n_sims}
#SBATCH --output=../slurm_output/{array}_%A_%a.out
#SBATCH --exclude=gcn45

array="{array}"

ARRAY_FILE=$HOME/thesis/SymPDE/jobs/arrays/$array.txt
cd $HOME/thesis/SymPDE/symlie

module purge
module load 2022
module load Anaconda3/2022.05
source activate sympde

srun python -u run.py \\
    --num_workers 18 {add_kwargs} \\
    $(head -$SLURM_ARRAY_TASK_ID $ARRAY_FILE | tail -1)
"""

# arrays = ['sine1d', 'sine2d', 'flower', 'mnist']
arrays = ['sine1d-predict', 'sine2d-predict', 'mnist-predict', 'flower-predict']
arrays = ['mnist-predict']
arrays = ['sine1d-task']
arrays = ['noise2d']
# arrays = ['sine1dtwo-hparam-predict']
# arrays = ['sine1dlayers-predict']
# arrays = ['sine1dmtr-predict']
arrays = ['mnist-noise-predict']
# arrays = ['sine1d-noise-predict']
arrays = ['noise1d']
arrays = ['sine1d-noise-svd-predict']
arrays = ['noise1d-loss']
arrays = ['combi', 'combi-vanilla']


generate_data = "--generate_data --tags dev"
dryrun = "--net TrainP --tags dev --max_epochs 1 --n_test 10 --n_val 10 --n_train 10"

# add_kwargs, time = dryrun, "00:10:00"
# add_kwargs, time = generate_data, "00:10:00"
# add_kwargs, time = '', "01:00:00"
# add_kwargs, time = '', "00:30:0g0"
add_kwargs, time = '', "00:10:00"
# add_kwargs, time = '', "00:05:00"

partition = 'gpu'
partition = 'gpu_mig'


n_sims = 20
[os.remove(os.path.join(job_array_dir, file)) for file in os.listdir(job_array_dir)]
for array in arrays:
    lines = read_lines(os.path.join(array_dir, f'{array}.txt'))
    n_jobs = len(lines)
    n_sims = min(n_sims, n_jobs)

    bash_script = get_template(n_jobs=n_jobs, array=array, time=time, n_sims=n_sims, partition=partition, add_kwargs=add_kwargs)

    write_lines(os.path.join(job_array_dir, f'{array}.job'), [bash_script])