# Advanced Attributes

In [1]:
# Following code is needed to preconfigure this notebook
import datetime
import sys
import os
sys.path.insert(0, os.path.abspath('../../..'))

import pyflow as pf

scratchdir = os.path.join('/', 'path', 'to', 'scratch')
filesdir = os.path.join(scratchdir, 'files')
outdir = os.path.join(scratchdir, 'out')


class CourseSuite(pf.Suite):
    """
    This CourseSuite object will be used throughout the course to provide sensible
    defaults without verbosity
    """
    def __init__(self, name, **kwargs):

        config = {
            'host': pf.LocalHost(),
            'files': os.path.join(filesdir, name),
            'home': outdir,
            'defstatus': pf.state.suspended
        }
        config.update(kwargs)

        super().__init__(name, **config)


class MyTask(pf.Task):

    """Counts to the double of a number, first half using a for loop then a while loop"""

    def __init__(self, name, default_value=0, **kwargs):

        variables = {
            'HALF': default_value,
            'LIMIT': 2*default_value,
        }
        variables.update(**kwargs)

        labels = {
            'counter_label': 'count to {}'.format(2*default_value)
        }

        script = [
            'echo "This is a counting task named {}"'.format(name),
            'for i in $(seq 1 $HALF); do echo "count $i/$LIMIT"; done',
            'i=$[$HALF+1]; while [ $i -lt $LIMIT ]; do echo "count $i/$LIMIT" ; i=$[$i+1]; done'
        ]

        super().__init__(name,
                         script=script,
                         labels=labels,
                         **variables)


class MyFamily(pf.Family):

    def __init__(self, name, counters, **kwargs):

        labels = {
            'total_counters': counters
        }

        super().__init__(name, labels=labels, **kwargs)

        with self:
            pf.sequence(MyTask('{}_{}'.format(name,i), i) for i in range(counters))


class LabelSetter(pf.Task):

    def __init__(self, *args, **kwargs):
        """
        Accepts a sequence of label-value tuples
        """
        script = [
            pf.TemplateScript(
                'ecflow_client --alter=change label {{ LABEL.name }} "{{ VALUE }}" {{ LABEL.parent.fullname }}',
                LABEL=label, VALUE=value
            ) for label, value in args
        ]

        name = kwargs.pop('name', 'set_labels')
        super().__init__(name, script=script, **kwargs)

## Creation of Attributes

Typically, we have three methods to construct attributes (or sub nodes) attached to any specific node. We give here examples both within a simple tree formulation of a suite, or within a class derived from a specific pyflow class.

These different methods have different constraints on them, and differ in clarity and legibility in different contexts. Ultimately, the choice of which to use should come down to which is most legible in context.

Firstly, we can construct the pyflow object within a context manager containing the parent node.

In [2]:
with pf.Suite('s', host=pf.NullHost()) as s:
    with pf.Family('f') as f:
        pf.Label('l', 'text')
        pf.Variable('V', 'value')

s

In [3]:
class DerivedFamily(pf.Family):
    def __init__(self):
        super().__init__('f')
        with self:
            pf.Label('l', 'text')
            pf.Variable('V', 'value')


with pf.Suite('s', host=pf.NullHost()) as s:
    DerivedFamily()

s

Secondly, objects can be allocated by using keyword arguments on the parent node constructor. These take three forms:

1. For an attribute of which there can only be one instance, the keyword argument is the lower-case string of the attribute class name. E.g. `script=`.
2. For an attribute of which there cane be multiple instances, the keyword argument is the lower-case, pluralised version of the class name. E.g. `labels=`, and accepts a list or tuple.
3. **ecFlow** variables are passed in as direct keyword arguments, identified by being capitalised and valid **ecFlow** variable names.

In [4]:
with pf.Suite('s', host=pf.NullHost()) as s:
    pf.Family('f', labels={'l': 'text'}, V='value')

s

In [5]:
class DerivedFamily(pf.Family):
    def __init__(self, **kwargs):
        
        variables = {'V': 'value'}
        variables.update(kwargs)
        
        labels = {'l': 'text'}
        
        super().__init__('f', labels=labels, **variables)


with pf.Suite('s', host=pf.NullHost()) as s:
    DerivedFamily() 

s

Finally, unambiguously named **pyflow** objects (variables, script, ...) can be directly assigned to their parent nodes.

In [6]:
with pf.Suite('s', host=pf.NullHost()) as s:
    f = pf.Family('f')
    f.V = 'value' 

s

## Best Practice for Variables and Attributes

Best practice for pyflow is to create derived types that encapsulate all of the concerns of a given class. This means that variable and attribute creation should occur within the constructor of the class being written. This should generally take the form of a setup section, in which various children are defined, before passing them through to the constructor of the superclass. Any structural children should then be defined below.

In [7]:
class ExampleFamily(pf.Family):
    def __init__(self, name, example_value, initial_label, **kwargs):
        
        # This structure allows the kwargs to override any of these variables if needed, or
        # to set other more general properties of the superclass (such as host=). The same
        # effect could be achieved by using kwargs.setdefault(...) and passing kwargs through.
        variables = {
            'REQUIRED_VARIABLE': 'required_value',
            'EXAMPLE_VARIABLE': example_value
        }
        variables.update(kwargs)
        
        labels = {
            'a_label': initial_label
        }
        
        super().__init__(name, labels=labels, **variables)
        
        # Here we define structural children
        with self:
            (
                MyFamily('f1')
                >>
                MyTask('t1')
            )

## Variable substitition and expansion

Variables and attributes can be directly referred to in scripts by making use of automatically exported environment variables of the same name. For example, a `RepeatDate('YMD', ...)` object may be referred to in a script by writing `$YMD`. This will be automatically detected by **pyflow** and the variable exported.

If generating scripts, or using the templating engine, **pyflow** objects can generate their own representations. The `str()` and `repr()` functions in Python will return representations of variables that can be used in scripts (after automatic variable exporting) and in technical contexts (pre variable exporting, such as in other **ecFlow** variables) respectively.

We can access the properties of an ecflow `Variable` programatically. This allows us to make interdependencies explicit, and to generate snippets within scripts that are guaranteed to correctly use the objects.

In [8]:
with pf.Suite('s'):
    v = pf.Variable('A_VARIABLE', 1234)

print(str(v), repr(v), v.value)
print(v.name, v.fullname)

$A_VARIABLE %A_VARIABLE% 1234
A_VARIABLE /s:A_VARIABLE


This allows us to automatically generate the correct shell-expansion of variables in the appropriate script context. Note that both Python string substitution and Jinja2 templating use the `str()` representation by default.

In [9]:
text_script = 'echo "Variable value: {}"'.format(v)

print(text_script)

echo "Variable value: $A_VARIABLE"


In [10]:
templated_script = pf.TemplateScript(
    'echo "variable {{ VARIABLE.name }} has value {{ VARIABLE }}"',
    VARIABLE=v
)

print(templated_script)

echo "variable A_VARIABLE has value $A_VARIABLE"


Other **ecFlow** objects that set accessible values can be accessed in the same way.

In [11]:
with pf.Suite('s') as s:
    pf.RepeatDate("YMD", datetime.date(2019, 1, 1), datetime.date(2019, 12, 31))
    
print(pf.TemplateScript(
    'echo "The current date object is {{ YMD.name }}. Value={{ YMD }}',
    YMD=s.YMD
))

echo "The current date object is YMD. Value=$YMD


We can also use templating to facilitate accessing attributes using the `ecflow_client`, and to correctly set thew according to mutable values (including **ecFlow** variables).

In [12]:
with pf.Suite('s', FOO='bar') as s:
    pf.Label('label', '')
    
print(pf.TemplateScript(
    'ecflow_client --alter=change label {{ LABEL.name }} "{{ VALUE }}" {{ LABEL.parent.fullname }}',
    LABEL=s.label,
    VALUE=s.FOO
))

ecflow_client --alter=change label label "$FOO" /s


## Using attributes belonging to other nodes

Attributes associated with other nodes can be used by passing the relevant attribute object to the site where it is needed. This can be facilitated by accessing children of various nodes as attributes of the parent.

In [13]:
with pf.Suite('s') as s:
    with pf.Family('family1') as f1:
        pf.Label('the_label', '')
        
    with pf.Family('family2') as f2:
        LabelSetter((f1.the_label, "a value"), name='labeller')
        
print(f2.labeller.script)

ecflow_client --alter=change label the_label "a value" /s/family1


In contexts where the relative path between nodes and attributes is required, the `relative_path` method is able to interrogate the relationships. Alternatively the `fullname` attribute will give the absolute path of nodes.

Within **pyflow** expressions it should not be necessary to generate these paths manually, as the expression generator should do the right thing. However, it is sometimes useful to refer to these components within scripts, especially as expansions within templates scripts.

In [14]:
print(s.family1.the_label.relative_path(s.family2))
print(s.family2.labeller.relative_path(s.family1))
print(s.family2.labeller.relative_path(s.family1.the_label))
print(s.family2.labeller.fullname)
print(s.family1.the_label.fullname)

print('\nscript: \n', pf.TemplateScript(
    'location of external node: {{ NODE.fullname }}',
    NODE=s.family2.labeller
))
print('\nscript: \n', pf.TemplateScript(
    'attribute relative path: {{ ATTRIBUTE.relative_path(NODE) }}',
    ATTRIBUTE=s.family1.the_label,
    NODE=s.family2.labeller
))

family1:the_label
family2/labeller
../family2/labeller
/s/family2/labeller
/s/family1:the_label

script: 
 location of external node: /s/family2/labeller

script: 
 attribute relative path: ../family1:the_label


## Using variables defined in parents

**ecFlow** suites inherit variables from above. If a task is making use of these variables it is very easy to end up writing tasks that assume the existence of variables in a suite already, without anything programattically indicating or enforcing that this relationship exists.

Derived Tasks that make use of external variables should require that they be passed in from outside. If they are not directly used (i.e. the value is used in the script directly) then validity should be `asserted` in the code.

In [15]:
class ChildTask(pf.Task):
    def __init__(self, external_variable):
        
        assert external_variable.name == 'EXTERNAL_VAR'
        script = 'echo "external variable: $EXTERNAL_VAR"'
        super().__init__('uses_var', script=script)


with CourseSuite('assert_external_variable') as s:
    with pf.Family('containing_family', EXTERNAL_VAR=1234) as f:
        ChildTask(f.EXTERNAL_VAR)
        
s

In [16]:
print("script:\n", f.uses_var.script, '\n')

script:
 echo "external variable: $EXTERNAL_VAR" 



If scripts are being generated or templated, then the existence of inherited variables can be enforced through generation.

In [17]:
class ChildTask(pf.Task):
    def __init__(self, external_variable):
        script = pf.TemplateScript(
            'echo "external variable: {{ VARIABLE }}"',
            VARIABLE=external_variable
        )
        super().__init__('uses_var', script=script)


with CourseSuite('templated_external_variable') as s:
    with pf.Family('containing_family', MY_VAR=1234) as f:
        ChildTask(f.MY_VAR)
        
s

In [18]:
print("script:\n", f.uses_var.script, '\n')

script:
 echo "external variable: $MY_VAR" 



Alternatively, we can provide default values which are overridden in the context of an externally supplied variable.

In [19]:
class TaskWithVariable(pf.Task):
    def __init__(self, name, default_value=1234, **kwargs):
        super().__init__(name, **kwargs)
        
        # Note that this sort of introspective setup is one that requires constructing
        # components after calling the superclass
        if isinstance(default_value, pf.Variable):
            var = default_value
        else:
            self.TASK_VALUE = default_value
            var = self.TASK_VALUE
        
        self.script = pf.TemplateScript(
            'echo "external variable: {{ VARIABLE }}"',
            VARIABLE=var
        )


with CourseSuite('internal_or_external_variable') as s:
    with pf.Family('containing_family', MY_VAR=1234) as f:
        TaskWithVariable('external_variable', f.MY_VAR)
        TaskWithVariable('external_value', f.MY_VAR.value)
        TaskWithVariable('default_value')
        
s

In [20]:
print("script external:\n", f.external_variable.script, '\n')
print("script default:\n", f.default_value.script, '\n')

script external:
 echo "external variable: $MY_VAR" 

script default:
 echo "external variable: $TASK_VALUE" 



## General node properties

Nodes and attributes have many accessible properties that can be accessed. Here is a non-exhaustive list of useful general node properties:

- `suite` - The `Suite` object containing the node
- `host()` - The currently active `Host` object
- `anchor` - The current anchor (either `Suite` or `AnchorFamily`) containing this node
- `name` - The visible name of this node
- `fullname` - The full path of this node from the root
- `all_children` - All (direct) children of a node
- `all_executable_children` - All `Tasks` and `Families` (directly) contained within a `Family`
- `all_tasks` - All `Tasks` (directly) contained within a `Family`