# Make granule lists and submit files for held OSG Pool Jobs
Takes as input a list generated by ```getheld.sh``` when running on a cluster on the OSG access point.

In [1]:
import pandas as pd
import numpy as np

In [2]:
def write_granule_list(input_list):
    df = pd.read_csv(input_list, header=None, names=['fn','hold_reason'])
    df['is_memory'] = df.apply(lambda x: 'exceeded request_memory' in x.hold_reason, axis=1)
    df['granule'] = df.apply(lambda x: x.fn[x.fn.find('ATL03_'):x.fn.find('.h5')+3], axis=1)
    def get_description(x):
        substr = x.fn[x.fn.find('job_')+4:x.fn.find('_ATL03_')]
        return substr[:substr.rfind('-')]
    df['description'] = df.apply(get_description, axis=1)
    def get_geojson(x):
        parms = x.description.split('_')
        parms[0] = 'ANT' if parms[0] == 'AIS' else 'GRE'
        del parms[1]
        return 'geojsons/simplified_' + '_'.join(parms) + '.geojson'
    df['geojson'] = df.apply(get_geojson, axis=1)
    df['geojson_full'] = df.apply(lambda x: x.geojson.replace('simplified_', ''), axis=1)
    df = df[['granule','geojson','description','geojson_full','fn','hold_reason','is_memory']]
    df.to_csv(input_list.replace('.csv', '_processed.csv'))
    
    df_mem = df[df.is_memory]
    df_nomem = df[~df.is_memory]
    
    df_mem = df_mem.drop(columns=['fn','hold_reason','is_memory'])
    df_nomem = df_nomem.drop(columns=['fn','hold_reason','is_memory'])
    
    fn_mem = input_list.replace('hold_lists/', 'granule_lists/').replace('final_', 'memory_')
    fn_nomem = input_list.replace('hold_lists/', 'granule_lists/').replace('final_', 'resubmit_')
    
    df_mem.to_csv(fn_mem, header=False, index=False)
    print('Wrote file %s. (%i jobs)' % (fn_mem, len(df_mem)))
    df_nomem.to_csv(fn_nomem, header=False, index=False)
    print('Wrote file %s. (%i jobs)' % (fn_nomem, len(df_nomem)))

    return fn_mem, fn_nomem

def write_submit_file(list_fn, sub_fn=None, mem_gb=16): 
    if not sub_fn:
        sub_fn = list_fn.replace('granule_lists/', 'HTCondor_submit/').replace('.csv', '.submit')
    
    f = open(sub_fn, "w")
    print('universe    = vanilla', file=f)
    print('+SingularityImage = "osdf:///ospool/ap21/data/fliphilipp/containers/icelake-container_v1.sif"', file=f)
    print('Requirements = HAS_SINGULARITY == True && OSG_HOST_KERNEL_VERSION >= 31000', file=f)
    print('executable  = run_py.sh', file=f)
    print('arguments = $(granule) $(polygon)', file=f)
    print('max_retries = 30', file=f)
    print('success_exit_code = 69', file=f)
    print('transfer_input_files = detect_lakes.py, icelakes/__init__.py, icelakes/utilities.py, icelakes/nsidc.py, icelakes/detection.py, misc/test1, misc/test2, $(polygon), $(polygon_full)', file=f)
    print('transfer_output_files = detection_out_data, detection_out_plot, detection_out_stat', file=f)
    print('should_transfer_files = YES', file=f)
    print('when_to_transfer_output = ON_EXIT', file=f)
    print('log           = logs/job_$(descriptor)-$(ClusterID)_$(granule)-$(ProcID).log', file=f)
    print('error         = errs/job_$(descriptor)-$(ClusterID)_$(granule)-$(ProcID).err', file=f)
    print('output        = outs/job_$(descriptor)-$(ClusterID)_$(granule)-$(ProcID).out', file=f)
    print('request_cpus    = 1', file=f)
    print('request_memory  = %iGB' % mem_gb, file=f)
    print('request_disk    = %iGB' % mem_gb, file=f)
    print('queue granule,polygon,descriptor,polygon_full from %s' % list_fn, file=f)
    f.close()
    
    print('Wrote file %s.\n' % sub_fn)

## Inital submit file

In [3]:
write_submit_file('granule_lists/icelakes-methods.csv', mem_gb=8)

Wrote file HTCondor_submit/icelakes-methods.submit.

