In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from nlppln import WorkflowGenerator
#cwl_working_dir = '/home/dafne/cwl-working-dir/'
cwl_working_dir = '/home/jvdzwaan/cwl-working-dir/'

In [None]:
with WorkflowGenerator(working_dir=cwl_working_dir) as wf:
    wf.load(steps_dir='../adhtools/cwl/')
    wf.load(steps_dir='../java/cwl/')
    wf.load(step_file='https://raw.githubusercontent.com/arabic-digital-humanities/BlackLabIndexer-docker/master/blacklabindexer.cwl')
    
    print(wf.list_steps())

In [None]:
with WorkflowGenerator(working_dir=cwl_working_dir) as wf:
    wf.load(steps_dir='../adhtools/cwl/')
    wf.load(steps_dir='../java/cwl/')
    wf.load(step_file='https://raw.githubusercontent.com/arabic-digital-humanities/BlackLabIndexer-docker/master/blacklabindexer.cwl')
    
    generic_yaml = wf.add_input(generic_yaml='File')
    specific_yaml = wf.add_input(specific_yaml='File')
    yaml_name = wf.add_input(yaml_name='string')
    in_dir = wf.add_input(in_dir='Directory')
    index_name = wf.add_input(index_name='string', default='corpus')
    action = wf.add_input(action='string', default='create')
    index_format = wf.add_input(index_format='string', default='safar-stemmer')
    xmx = wf.add_input(xmx='string', default='2G')
    
    doc = """
    Index a corpus with corpus specific metadata.
    
    Inputs:
        generic_yaml (File): yaml file containing the generic indexer configuration, 
            i.e., one of the blacklab indexer formats from 
            https://github.com/arabic-digital-humanities/index-safar.
            Specifies how to index SAFAR analyzer/stemmer output.
        specific_yaml (File): yaml file containing the corpus specific indexer 
            configuration, i.e., one of the files from 
            https://github.com/arabic-digital-humanities/corpus-blacklab-metadata-config.
            Determines what metadata and how the metadata is displayed in the corpus frontend.
        yaml_name (str): name for the file in which the generic_yaml and specific_yaml are 
            combined. In practice this should be either ``safar-analyzer.blf.yaml`` or 
            ``safar-stemmer.blf.yaml``.
        in_dir (Directory): Directory containing SAFAR XML files.
        index_name (str): The name of the index (default: corpus).
        action (str): The action that should be performed on the index, e.g., creation 
            (default: create). Other options are explained on 
            http://inl.github.io/BlackLab/indexing-with-blacklab.html#index-supported-format.
        index_format (str): The index format to be used, i.e., either ``safar-stemmer`` or 
            ``safar-analyzer`` (see https://github.com/arabic-digital-humanities/index-safar).
        xmx (str): Optional parameter to set the Java heap space (default: 2G).
    
    Outputs:
        Directory containing a BlackLab index.
    """
    
    wf.set_documentation(doc)

    blf_format_file = wf.merge_yaml(in_files=[generic_yaml, specific_yaml], out_name=yaml_name)
    indexed = wf.blacklabindexer(action=action, 
                                 config=blf_format_file, 
                                 in_dir=in_dir, 
                                 index_format=index_format, 
                                 index_name=index_name,
                                 xmx=xmx)
    
    wf.add_outputs(indexed=indexed)
    #wf.add_outputs(yaml=blf_format_file)
    
    wf.save('../adhtools/cwl/index-corpus-specific.cwl', wd=True, relative=False)
