# Multiple flows

1. Flow 1 runs, inputs bar1 and outputs bar2
2. Flow 2 runs, inputs Flow1.input, Flow1.output

In [2]:
import logging

logger = logging.getLogger()
logger.setLevel(logging.WARNING)

import pandas as pd
from doltpy.core import Dolt
from doltpy.core.write import import_df

dolt = Dolt.init(".")

df_v1 = pd.DataFrame({"A": [1,1,1], "B": [1,1,1]})
df_v2 = pd.DataFrame({"A": [1,1,1,2,2,2], "B": [1,1,1,2,2,2]})

import_df(dolt, "bar", df_v1.reset_index(), ["index"], "create")
dolt.add(".")
dolt.add("bar")
dolt.commit("Initialize bar")

v1 = list(dolt.log(number="1").keys())[0]

import_df(dolt, "bar", df_v2.reset_index(), ["index"], "update")
dolt.add("bar")
dolt.commit("Add rows to bar")

v2 = list(dolt.log(number="1").keys())[0]

In [3]:
!poetry run python3 demo_one.py run

[35m[1mMetaflow 2.2.5.post33+gitc20afd4[0m[35m[22m executing [0m[31m[1mMultiFlowDemo1[0m[35m[22m[0m[35m[22m for [0m[31m[1muser:max-hoffman[0m[35m[22m[K[0m[35m[22m[0m
[35m[22mValidating your flow...[K[0m[35m[22m[0m
[32m[1m    The graph looks good![K[0m[32m[1m[0m
[35m[22mRunning pylint...[K[0m[35m[22m[0m
[32m[1m    Pylint is happy![K[0m[32m[1m[0m
[35m2021-01-18 11:56:14.443 [0m[1mWorkflow starting (run-id 1610999774434109):[0m
[35m2021-01-18 11:56:14.450 [0m[32m[1610999774434109/start/1 (pid 36833)] [0m[1mTask is starting.[0m
[35m2021-01-18 11:56:15.530 [0m[32m[1610999774434109/start/1 (pid 36833)] [0m[22m01-18 11:56:15 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-18 11:56:15.564 [0m[32m[1610999774434109/start/1 (pid 36833)] [0m[22m01-18 11:56:15 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[3

[35m2021-01-18 11:56:19.815 [0m[32m[1610999774434109/end/3 (pid 36913)] [0m[22m01-18 11:56:19 doltpy.core.system_helpers INFO     Before exiting cleaning up child processes[0m
[35m2021-01-18 11:56:19.822 [0m[32m[1610999774434109/end/3 (pid 36913)] [0m[22m01-18 11:56:19 doltpy.core.system_helpers INFO     No processes to clean up, exiting[0m
[35m2021-01-18 11:56:19.975 [0m[32m[1610999774434109/end/3 (pid 36913)] [0m[1mTask finished successfully.[0m
[35m2021-01-18 11:56:19.976 [0m[1mDone![0m
01-18 11:56:19 doltpy.core.system_helpers INFO     Before exiting cleaning up child processes
01-18 11:56:19 doltpy.core.system_helpers INFO     No processes to clean up, exiting


In [None]:
# TODO: pluck flow from last step

In [4]:
!poetry run python3 demo_two.py run --flow-dep MultiFlowDemo1/1610999774434109

[35m[1mMetaflow 2.2.5.post33+gitc20afd4[0m[35m[22m executing [0m[31m[1mMultiFlowDemo2[0m[35m[22m[0m[35m[22m for [0m[31m[1muser:max-hoffman[0m[35m[22m[K[0m[35m[22m[0m
[35m[22mValidating your flow...[K[0m[35m[22m[0m
[32m[1m    The graph looks good![K[0m[32m[1m[0m
[35m[22mRunning pylint...[K[0m[35m[22m[0m
[32m[1m    Pylint is happy![K[0m[32m[1m[0m
[35m2021-01-18 11:56:32.769 [0m[1mWorkflow starting (run-id 1610999792759585):[0m
[35m2021-01-18 11:56:32.775 [0m[32m[1610999792759585/start/1 (pid 36928)] [0m[1mTask is starting.[0m
[35m2021-01-18 11:56:33.837 [0m[32m[1610999792759585/start/1 (pid 36928)] [0m[22m01-18 11:56:33 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-18 11:56:33.934 [0m[32m[1610999792759585/start/1 (pid 36928)] [0m[22m01-18 11:56:33 doltpy.core.dolt INFO     flow_name,run_id,step_name,task_id,kind,database,table_name,commit,timestamp[

[35m2021-01-18 11:56:36.167 [0m[32m[1610999792759585/middle/2 (pid 36980)] [0m[22m01-18 11:56:36 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-18 11:56:36.195 [0m[32m[1610999792759585/middle/2 (pid 36980)] [0m[22m01-18 11:56:36 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-18 11:56:36.254 [0m[32m[1610999792759585/middle/2 (pid 36980)] [0m[22m01-18 11:56:36 doltpy.core.dolt INFO     * master                                        	a7hoh2l3phq5irslbupo7qpljd936kk5[0m
[35m2021-01-18 11:56:36.364 [0m[32m[1610999792759585/middle/2 (pid 36980)] [0m[22m[0m
[35m2021-01-18 11:56:36.364 [0m[32m[1610999792759585/middle/2 (pid 36980)] [0m[22m01-18 11:56:36 doltpy.core.write.write INFO     No import mode specified, table exists, using "update"[0m
[35m2021-01-18 11:56:36.453 [0m[32m[1610999792759585/middle/2 (pid 36980)] [0m[22m01-18 1

In [6]:
!cat demo_two.py

import logging

logger = logging.getLogger()

import pickle
import time

from metaflow import FlowSpec, step, DoltDT, Parameter
from metaflow.datatools.dolt import DoltRun
import pandas as pd
from sklearn import tree

class MultiFlowDemo2(FlowSpec):

    flow_dep = Parameter('flow-dep',  help="Specifc the tag for the input version", required=True)

    @step
    def start(self):
        flow, run = self.flow_dep.split("/")
        d = DoltRun(flow_name=flow, run_id=run)
        f_input = d.reads[0]
        f_output = d.writes[0]
        with DoltDT(run=self) as dolt:
            self.inp1 = dolt.read_table(f_input.table_name, commit=f_input.commit)
            self.inp2 = dolt.read_table(f_output.table_name, commit=f_output.commit)

        self.next(self.middle)

    @step
    def middle(self):
        with DoltDT(run=self) as dolt:

            df = self.inp1 + self.inp2

            dolt.write_table(table_name='baz', df=df, pks=['index'])

      