In [74]:
import yaml
from os.path import join

examples_folder = '../../crosscompute-examples/tools'
configuration_folder = examples_folder + '/ask-question'
# examples_folder = '../../crosscompute-examples/widgets'
# configuration_folder = examples_folder + '/paint-letters'
# examples_folder = '../../crosscompute-examples/reports'
# configuration_folder = examples_folder + '/randomize-histograms'
configuration_path = join(configuration_folder, 'serve.yml')
configuration = yaml.safe_load(open(configuration_path, 'rt'))
configuration

{'crosscompute': '0.9.0',
 'name': 'Ask Question',
 'version': '0.0.1',
 'input': {'variables': [{'id': 'question',
    'view': 'string',
    'path': 'question.txt'}]},
 'output': {'variables': [{'id': 'response',
    'view': 'markdown',
    'path': 'response.md'}]},
 'tests': [{'folder': 'tests/standard'}],
 'batches': [{'name': '{question}',
   'folder': 'batches/{id}',
   'configuration': {'path': 'datasets/questions.csv'}}],
 'script': {'folder': '.',
  'command': 'python -c "$(jupyter nbconvert run.ipynb --to script --stdout)"'},
 'display': {'layout': 'input'}}

In [75]:
raw_batch_definitions = configuration.get('batches', [])
raw_batch_definitions

[{'name': '{question}',
  'folder': 'batches/{id}',
  'configuration': {'path': 'datasets/questions.csv'}}]

In [76]:
from crosscompute.macros import format_slug
FUNCTION_BY_NAME = {'slug': format_slug}
FUNCTION_BY_NAME

{'slug': <function crosscompute.macros.web.format_slug(text)>}

In [77]:
from crosscompute.routines.configuration import get_raw_variable_definitions
input_variable_definitions = get_raw_variable_definitions(configuration, 'input')
input_variable_definitions

[{'id': 'question', 'view': 'string', 'path': 'question.txt'}]

In [78]:
import re
FILTER_PATTERN = re.compile(r'{\s*([^}]+\|[^}]+?)\s*}')
FILTER_PATTERN.findall('{ x | slug } - {y} - {one}')

['x | slug']

In [79]:
VARIABLE_ID_PATTERN = re.compile(r'{\s*([^}]+?)\s*}')
keys = VARIABLE_ID_PATTERN.findall('{ x | slug } {y} {one} {}')
keys

['x | slug', 'y', 'one']

In [80]:
'y'.split('|')

['y']

In [83]:
data_by_id = {'x': 'AAA BBB', 'y': 'YYY'}

In [84]:
'apple'[1:]

'pple'

In [85]:
'a||b'.split('|')

['a', '', 'b']

In [86]:
import logging

def render_text(match):
    matching_text = match.group(0)
    expression_text = match.group(1)
    if expression_text in data_by_id:
        text = data_by_id[expression_text]
    elif '|' in expression_text:
        expression_terms = expression_text.split('|')
        variable_id = expression_terms[0].strip()
        try:
            text = data_by_id[variable_id]
        except KeyError:
            logging.warning(
                '%s missing in batch configuration',
                variable_id)
        for function_name in expression_terms[1:]:
            function_name = function_name.strip()
            if not function_name:
                continue
            try:
                f = FUNCTION_BY_NAME[function_name]
            except KeyError:
                logging.error(
                    '%s not supported in %s', function_name, expression_text)
                continue
            text = f(text)
    else:
        logging.warning(
            '%s missing in batch configuration',
            expression_text)
        text = matching_text
    return text

VARIABLE_ID_PATTERN.sub(render_text, '{ x | slug } {y} {one} {}')



'aaa-bbb YYY {one} {}'

In [95]:
def format_text(text, data_by_id):
    if not data_by_id:
        return text
    
    if None in data_by_id:
        render_text = data_by_id[None]
    else:
    
        def render_text(match):
            matching_text = match.group(0)
            expression_text = match.group(1)
            if expression_text in data_by_id:
                text = data_by_id[expression_text]
            elif '|' in expression_text:
                expression_terms = expression_text.split('|')
                variable_id = expression_terms[0].strip()
                try:
                    text = data_by_id[variable_id]
                except KeyError:
                    logging.warning(
                        '%s missing in batch configuration',
                        variable_id)
                for function_name in expression_terms[1:]:
                    function_name = function_name.strip()
                    if not function_name:
                        continue
                    try:
                        f = FUNCTION_BY_NAME[function_name]
                    except KeyError:
                        logging.error(
                            '%s not supported in %s', function_name, expression_text)
                        continue
                    text = f(text)
            else:
                logging.warning(
                    '%s missing in batch configuration',
                    expression_text)
                text = matching_text
            return text

    return VARIABLE_ID_PATTERN.sub(render_text, text)

format_text('{x | slug}', {'x': 'one two'})

'one-two'

In [88]:
from crosscompute.constants import BATCH_ROUTE
from crosscompute.macros import format_slug

def make_batch_definition(folder, name, slug, data_by_id=None):
    rendered_folder = format_text(folder, data_by_id)
    rendered_name = format_text(name, data_by_id)
    rendered_slug = format_text(
        slug, data_by_id) if slug else format_slug(rendered_name)
    rendered_uri = BATCH_ROUTE.format(batch_slug=rendered_slug)
    return {
        'folder': rendered_folder,
        'name': rendered_name,
        'slug': rendered_slug,
        'uri': rendered_uri,
    }

make_batch_definition('batches/{x | slug}', '{x}', '', {'x': 'a'})

{'folder': 'batches/a', 'name': 'a', 'slug': 'a', 'uri': '/b/a'}

In [None]:
from crosscompute.exceptions import CrossComputeConfigurationError

def yield_data_by_id_from_text_path(text_path, variable_definitions):
    if len(variable_definitions) > 1:
        raise CrossComputeConfigurationError(
            'use .csv to configure multiple variables')
        
    try:
        variable_id = variable_definitions[0]['id']
    except IndexError:
        variable_id = None
        
    try:
    except OSError:
        pass
    
    with open(join(configuration_folder, batch_configuration_path), 'rt') as batch_configuration_file:
        for line in batch_configuration_file:
            line = line.strip()
            if line.startswith('#'):
                continue
            variable_data_by_id = {variable_id: line}
            batch_definitions.append(make_batch_definition(
                batch_folder, batch_name, batch_slug, variable_data_by_id))
    

In [90]:
import logging
from crosscompute.macros import format_slug
from crosscompute.routines.configuration import get_scalar_text
from os.path import basename, splitext

batch_definitions = []
for raw_batch_definition in raw_batch_definitions:
    try:
        batch_folder = get_scalar_text(raw_batch_definition, 'folder')
    except KeyError:
        logging.error('folder required for each batch')
        continue
    batch_name = get_scalar_text(raw_batch_definition, 'name', basename(batch_folder))
    batch_slug = get_scalar_text(raw_batch_definition, 'slug', '')
    if 'configuration' in raw_batch_definition:
        try:
            batch_configuration = raw_batch_definition['configuration']
            if 'path' in batch_configuration:
                batch_configuration_path = batch_configuration['path']
                batch_configuration_extension = splitext(batch_configuration_path)[1]
                if batch_configuration_extension == '.txt':
                    batch_definitions.extend(make_batch_definition(
                        batch_folder, batch_name, batch_slug, _,
                    ) for _ in yield_data_by_id_from_txt(join(
                        configuration_folder, batch_configuration_path,
                    ), input_variable_definitions)                    
                # elif batch_configuration_extension == '.csv':  
                else:
                    raise CrossComputeConfigurationError(
                        f'{batch_configuration_extension} not supported for '
                        'batch configuration; use .txt or .csv')
            except CrossComputeError as e:
                logging.error(e)
        else:
            logging.error('path expected for each batch configuration')
    else:
        batch_definitions.append(make_batch_definition(batch_folder, batch_name, batch_slug))
batch_definitions

ERROR:root:.csv not supported; use .txt or .csv to configure batch


[]