In [1]:
import sys
sys.path.append('..')
from core import computation_graph
from core import graph_merge

import pandas as pd

%load_ext autoreload
%autoreload 2

## Construct a simple pipeline

In [2]:
@computation_graph.optex_process('start_dataset')
def load_dataset(name):
    return pd.DataFrame({'a': [1,2,3], 'b': [6,3,4]})


@computation_graph.optex_process('dataset_two')
def transform_one(first_dataset):
    return first_dataset + 10


@computation_graph.optex_process('dataset_three')
def transform_two(second_dataset):
    return second_dataset / 10


@computation_graph.optex_composition('final_dataset')
def process_dataset(process_dataset):
    d1 = transform_one(process_dataset)
    d2 = transform_two(d1)
    return d2

d = process_dataset(load_dataset(computation_graph.Artifact('ahhh')))

In [3]:
d.parents['final_dataset'].parents['process_dataset'].parents['start_dataset'].parents

{'name': <core.computation_graph.Artifact at 0x7fe1d9816340>}

In [4]:
d.parents['final_dataset'].child_processes[0].children['dataset_two'].children['second_dataset'].children['dataset_three'].parents

{'final_dataset': <core.computation_graph.Process at 0x7fe18f30f1f0>}

## Nested compositions

This pipeline contains two levels of composition -- node containing a subgraph where the subgraph contains another subgraph.

In [5]:
@computation_graph.optex_process('transform_return')
def transform(transform_arg):
    return transform_arg + 10


@computation_graph.optex_composition('inner_return')
def inner_compose(inner_arg):
    return transform(inner_arg)


@computation_graph.optex_composition('outer_return')
def outer_compose(outer_arg):
    return inner_compose(outer_arg)

d = outer_compose(computation_graph.Artifact(20))

## A more complicated graph

This graph has multiple compositions, inputs, and outputs. It is useful for debugging and verifying the system works as expected.

In [8]:
@computation_graph.optex_process('transform_return')
def transform(transform_arg):
    return transform_arg + 10


@computation_graph.optex_composition('inner_return')
def inner_compose(inner_arg):
    return transform(inner_arg)


@computation_graph.optex_process('combined')
def combine(combine_arg1, combine_arg2):
    return combine_arg1 + combine_arg2


@computation_graph.optex_composition(['arg2', 'combined'])
def big_func(arg1, arg2):
    arg1.name = "input1"
    arg2.name = "input2"
    after_inner = inner_compose(arg1)
    after_inner.name = "input1_transformed"
    after_transform_1 = transform(after_inner)
    after_transform_1.name = "input1_transformed_again"
    combined = combine(after_transform_1, arg2)
    combined.name = "combined"
    arg2_transformed = transform(arg2)
    arg2_transformed.name = "input2_transformed"
    return arg2_transformed, combined

x, y = big_func(computation_graph.Artifact(1), computation_graph.Artifact(3))

## Preprocessing a graph for merging

Before merging a pipeline, we must:
1. Generate a static Graph from the pipeline's main function
2. Replace the Graph's composition nodes with the subgraphs they contain (so that there are no child processes)
2. Write the Graph in edge-list format.

These steps are performed below

In [28]:
g = computation_graph.Graph.from_process(big_func)  # generate a static graph
mergeable_g = graph_merge.make_expanded_graph_copy(g)  # remove compositions and write in edge-list format
print("The (pretty-printed) edge list is:")
[(parent.name, child.name) for role, parent, child in mergeable_g.edges]

The (pretty-printed) edge list is:


[('input1', 'transform'),
 ('input2', 'combine'),
 ('input2', 'transform'),
 ('transform', 'input2_transformed'),
 ('transform', 'input1_transformed'),
 ('combine', 'combined'),
 ('input1_transformed', 'transform'),
 ('transform', 'input1_transformed_again'),
 ('input1_transformed_again', 'combine')]

## Executing a statically generated graph

We can execute a statically generated graph by mapping its input Artifacts to concrete values and calling `graph_merge.execute_graph()`.

It returns a mapping from outupt Artifacts to the values they take on.

In [41]:
graph_inputs = {
    mergeable_g.inputs[0]: 10,
    mergeable_g.inputs[1]: 42
}

for artifact, value in graph_merge.execute_graph(mergeable_g, graph_inputs).items():
    print(f"{list(artifact.parents.keys())[0]}: {value}")

transform_return: 52
combined: 72


We can verify that this is correct by executing the function dynamically. The values are the same, but the returned roles are not. Why, you ask? I do not know.

In [45]:
for artifact in big_func(computation_graph.Artifact(10), computation_graph.Artifact(42)):
    print(f"{list(artifact.parents.keys())[0]}: {artifact._data}")

arg2: 52
combined: 72
