## Create condor scheduling jobs

... and upload them to the server.

In [None]:
from notebook_prelude import *

In [None]:
from utils import time_utils, git_utils

all_experiments = glob('{}/*/*.yaml'.format(EXPERIMENT_CONFIG_FOLDER))

all_experiments = [x for x in all_experiments if '.disabled.' not in x]

priorities = [
    'remove_unseen_nodes',
    'split_multi_words',
    'edge_labels',
    'content_vs_structure',
    'use_directed',
    'relabel',
    'remove_infrequent',
    'node_weights',
    'combined',
    'ngrams',
    'fast_wl_normalization',
#    'dimensionality_reduction',
]

verbose = 1
cores = 16
extra = '--use_nested'
extra = ''
create_predictions='true'

tmpl = 'condor_submit priority="{prio}" batch_name="{name}_{task_name}" Args="--task_name {task_name} --experiment_config /home/david/bachelor-thesis/code/{experiment} {extra}" classification_job.condor'

outs = []
for t in [TYPE_CONCEPT_MAP, TYPE_COOCCURRENCE]:
    for experiment in sorted(all_experiments):
        name, name_ = experiment.split('/')[2:]
        if name not in priorities:
            print('Missing priority for experiment: {}.Skipping.'.format(name))
            continue
        prio = 100 - priorities.index(name)
        if t == TYPE_COOCCURRENCE:
            prio -= 10
        cmd = tmpl.format(name=name_, cores=cores, verbose=verbose, experiment=experiment, prio=prio, extra=extra, create_predictions=create_predictions, task_name=t)
        outs.append((prio, cmd))

prio = -2
cmd = 'condor_submit priority="{prio}" batch_name="{name}" Args="--task_name concept" classification_job.condor'.format(prio=prio, name='normal_concept_maps', cores=cores, verbose=verbose, create_predictions=create_predictions)
outs.append((prio, cmd))

prio = 91
cmd = 'condor_submit priority="{prio}" batch_name="{name}" Args="--task_type_include text" classification_job.condor'.format(prio=prio, name='text', cores=cores, verbose=verbose, create_predictions=create_predictions)
outs.append((prio, cmd))

prio = 101
for t in ['dummy_most_frequent', 'dummy_stratified', 'dummy_uniform']:
    cmd = ('condor_submit priority="{prio}" batch_name="{name}" Args="--task_type_include {task_type}" classification_job.condor'.format(prio=prio, name=t, task_type=t, cores=cores, verbose=verbose))
    outs.append((prio, cmd))

PRELUDE = '''#!/usr/bin/env bash

# Created: {}
# Commit:  {}

{}
'''
    
cmds = ';\n\n'.join([cmd for prio, cmd in sorted(outs, key=lambda x: x[0], reverse=True)])
with open('tmp/start_classifaction_jobs.sh', 'w') as f:
    f.write(PRELUDE.format(time_utils.get_time_formatted(), git_utils.get_current_commit(), cmds))

print('# Jobs: {}'.format(len(outs)))

print('Uploading')
!chmod +x tmp/start_classifaction_jobs.sh
!scp tmp/start_classifaction_jobs.sh pe:condor_scripts/
print('Finished')

In [None]:
!cat tmp/start_classifaction_jobs.sh