# Multiple flows

1. Flow 1 runs, inputs bar1 and outputs bar2
2. Flow 2 runs, inputs Flow1.input, Flow1.output

In [1]:
import logging

logger = logging.getLogger()
logger.setLevel(logging.WARNING)

import pandas as pd
from doltpy.core import Dolt
from doltpy.core.write import import_df

dolt = Dolt.init(".")

df_v1 = pd.DataFrame({"A": [1,1,1], "B": [1,1,1]})
df_v2 = pd.DataFrame({"A": [1,1,1,2,2,2], "B": [1,1,1,2,2,2]})

import_df(dolt, "bar", df_v1.reset_index(), ["index"], "create")
dolt.add(".")
dolt.add("bar")
dolt.commit("Initialize bar")

v1 = list(dolt.log(number="1").keys())[0]

import_df(dolt, "bar", df_v2.reset_index(), ["index"], "update")
dolt.add("bar")
dolt.commit("Add rows to bar")

v2 = list(dolt.log(number="1").keys())[0]

01-18 11:24:19 doltpy.core.dolt INFO     Initializing Dolt repo in existing dir .
01-18 11:24:19 doltpy.core.dolt INFO     Creating a new repo in .
01-18 11:24:19 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306
01-18 11:24:19 doltpy.core.write.write INFO     Importing to table bar in dolt directory located in ., import mode create
01-18 11:24:19 doltpy.core.dolt INFO     Rows Processed: 3, Additions: 3, Modifications: 0, Had No Effect: 0
01-18 11:24:19 doltpy.core.dolt INFO     
01-18 11:24:19 doltpy.core.dolt INFO     
01-18 11:24:19 doltpy.core.dolt INFO     commit 27p8h38rkdpcq7t0hcsc0c8kimurkd19
Author: Max Hoffman <maximilian.wolfgang1@gmail.com>
Date:   Mon Jan 18 11:24:19 -0800 2021

	Initialize bar


01-18 11:24:19 doltpy.core.write.write INFO     Importing to table bar in dolt directory located in ., import mode update
01-18 11:24:19 doltpy.core.dolt INFO     Rows Processed: 6, Additions: 3, Modifications: 0, Had No Effect: 3
01

In [2]:
!poetry run python3 demo_one.py run

[35m[1mMetaflow 2.2.5.post32+git1ea1770[0m[35m[22m executing [0m[31m[1mMultiFlowDemo1[0m[35m[22m[0m[35m[22m for [0m[31m[1muser:max-hoffman[0m[35m[22m[K[0m[35m[22m[0m
[35m[22mValidating your flow...[K[0m[35m[22m[0m
[32m[1m    The graph looks good![K[0m[32m[1m[0m
[35m[22mRunning pylint...[K[0m[35m[22m[0m
[32m[1m    Pylint is happy![K[0m[32m[1m[0m
[35m2021-01-18 11:24:50.103 [0m[1mWorkflow starting (run-id 1610997890092238):[0m
[35m2021-01-18 11:24:50.113 [0m[32m[1610997890092238/start/1 (pid 35732)] [0m[1mTask is starting.[0m
[35m2021-01-18 11:24:51.968 [0m[32m[1610997890092238/start/1 (pid 35732)] [0m[22m01-18 11:24:51 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-18 11:24:52.015 [0m[32m[1610997890092238/start/1 (pid 35732)] [0m[22m01-18 11:24:52 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[3

[35m2021-01-18 11:24:55.303 [0m[32m[1610997890092238/middle/2 (pid 35774)] [0m[1mTask finished successfully.[0m
[35m2021-01-18 11:24:55.310 [0m[32m[1610997890092238/end/3 (pid 35817)] [0m[1mTask is starting.[0m
[35m2021-01-18 11:24:56.430 [0m[32m[1610997890092238/end/3 (pid 35817)] [0m[22m01-18 11:24:56 doltpy.core.system_helpers INFO     Before exiting cleaning up child processes[0m
[35m2021-01-18 11:24:56.437 [0m[32m[1610997890092238/end/3 (pid 35817)] [0m[22m01-18 11:24:56 doltpy.core.system_helpers INFO     No processes to clean up, exiting[0m
[35m2021-01-18 11:24:56.599 [0m[32m[1610997890092238/end/3 (pid 35817)] [0m[1mTask finished successfully.[0m
[35m2021-01-18 11:24:56.600 [0m[1mDone![0m
01-18 11:24:56 doltpy.core.system_helpers INFO     Before exiting cleaning up child processes
01-18 11:24:56 doltpy.core.system_helpers INFO     No processes to clean up, exiting


In [None]:
# TODO: pluck flow from last step

In [4]:
!poetry run python3 demo_two.py run --flow-dep MultiFlowDemo1/1610997890092238

[35m[1mMetaflow 2.2.5.post32+git1ea1770[0m[35m[22m executing [0m[31m[1mMultiFlowDemo2[0m[35m[22m[0m[35m[22m for [0m[31m[1muser:max-hoffman[0m[35m[22m[K[0m[35m[22m[0m
[35m[22mValidating your flow...[K[0m[35m[22m[0m
[32m[1m    The graph looks good![K[0m[32m[1m[0m
[35m[22mRunning pylint...[K[0m[35m[22m[0m
[32m[1m    Pylint is happy![K[0m[32m[1m[0m
[35m2021-01-18 11:26:05.971 [0m[1mWorkflow starting (run-id 1610997965963222):[0m
[35m2021-01-18 11:26:05.977 [0m[32m[1610997965963222/start/1 (pid 35837)] [0m[1mTask is starting.[0m
[35m2021-01-18 11:26:07.025 [0m[32m[1610997965963222/start/1 (pid 35837)] [0m[22m01-18 11:26:07 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-18 11:26:07.117 [0m[32m[1610997965963222/start/1 (pid 35837)] [0m[22m01-18 11:26:07 doltpy.core.dolt INFO     flow_name,run_id,step_name,task_id,kind,database,table_name,commit,timestamp[

[35m2021-01-18 11:26:08.316 [0m[32m[1610997965963222/start/1 (pid 35837)] [0m[1mTask finished successfully.[0m
[35m2021-01-18 11:26:08.323 [0m[32m[1610997965963222/middle/2 (pid 35889)] [0m[1mTask is starting.[0m
[35m2021-01-18 11:26:09.672 [0m[32m[1610997965963222/middle/2 (pid 35889)] [0m[22m01-18 11:26:09 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-18 11:26:09.705 [0m[32m[1610997965963222/middle/2 (pid 35889)] [0m[22m01-18 11:26:09 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-18 11:26:09.767 [0m[32m[1610997965963222/middle/2 (pid 35889)] [0m[22m01-18 11:26:09 doltpy.core.dolt INFO     * master                                        	t23vjks4005suj719a5psn9d9v8lthjg[0m
[35m2021-01-18 11:26:09.873 [0m[32m[1610997965963222/middle/2 (pid 35889)] [0m[22m[0m
[35m2021-01-18 11:26:09.873 [0m[32m[1610997965963222/middle/

In [6]:
!cat demo_two.py

import logging

logger = logging.getLogger()

import pickle
import time

from metaflow import FlowSpec, step, DoltDT, Parameter
from metaflow.datatools.dolt import DoltRun
import pandas as pd
from sklearn import tree

class MultiFlowDemo2(FlowSpec):

    flow_dep = Parameter('flow-dep',  help="Specifc the tag for the input version", required=True)

    @step
    def start(self):
        flow, run = self.flow_dep.split("/")
        d = DoltRun(flow_name=flow, run_id=run)
        f_input = d.reads[0]
        f_output = d.writes[0]
        with DoltDT(run=self) as dolt:
            self.inp1 = dolt.read_table(f_input.table_name, commit=f_input.commit)
            self.inp2 = dolt.read_table(f_output.table_name, commit=f_output.commit)

        self.next(self.middle)

    @step
    def middle(self):
        with DoltDT(run=self) as dolt:

            df = self.inp1 + self.inp2

            dolt.write_table(table_name='baz', df=df, pks=['index'])

      