# Reproducibility - resume

1. Create and commit two versions of dataframe -> v1, v2
2. Run flow 1 with v1 data -> crashes b/c flow 2 hasn't run
3. Run flow 2 with v2 data
4. Resume flow 1 successfully

In [3]:
import pandas as pd
from doltpy.core import Dolt
from doltpy.core.write import import_df

dolt = Dolt.init("foo")

df_v1 = pd.DataFrame({"A": [1,1,1], "B": [1,1,1]})
df_v2 = pd.DataFrame({"A": [1,1,1,2,2,2], "B": [1,1,1,2,2,2]})

import_df(dolt, "bar", df_v1.reset_index(), ["index"], "create")
dolt.add("bar")
dolt.commit("Initialize bar")

v1 = list(dolt.log(number="1").keys())[0]

import_df(dolt, "bar", df_v2.reset_index(), ["index"], "update")
dolt.add("bar")
dolt.commit("Add rows to bar")

v2 = list(dolt.log(number="1").keys())[0]

01-17 16:53:34 doltpy.core.dolt INFO     Creating directory foo
01-17 16:53:34 doltpy.core.dolt INFO     Creating a new repo in foo
01-17 16:53:34 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306
01-17 16:53:34 doltpy.core.write.write INFO     Importing to table bar in dolt directory located in foo, import mode create
01-17 16:53:35 doltpy.core.dolt INFO     Rows Processed: 3, Additions: 3, Modifications: 0, Had No Effect: 0
01-17 16:53:35 doltpy.core.dolt INFO     
01-17 16:53:35 doltpy.core.dolt INFO     commit ldjnaiqi9r5n22lalefvt5r2e3eipjkt
Author: Max Hoffman <maximilian.wolfgang1@gmail.com>
Date:   Sun Jan 17 16:53:35 -0800 2021

	Initialize bar


01-17 16:53:35 doltpy.core.write.write INFO     Importing to table bar in dolt directory located in foo, import mode update
01-17 16:53:35 doltpy.core.dolt INFO     Rows Processed: 6, Additions: 3, Modifications: 0, Had No Effect: 3
01-17 16:53:35 doltpy.core.dolt INFO     
01-17 16:53:35

In [2]:
!cat succeeds_second.py

import pickle

from metaflow import FlowSpec, step, DoltDT, Parameter, Flow
import pandas as pd
from sklearn import tree

class SucceedsSecondDemo(FlowSpec):

    bar_version = Parameter('bar-version',  help="Specifc the tag for the input version", required=True)

    @step
    def start(self):
        with DoltDT(run=self, database='foo', branch="master") as dolt:
            self.df = dolt.read_table('bar')
        first = Flow("SucceedsFirstDemo").latest_successful_run

        self.next(self.middle)

    @step
    def middle(self):
        with DoltDT(run=self, database='foo', branch="master") as dolt:
            df = self.df
            df["B"] = df["B"].map(lambda x: x*2)

            dolt.write_table(table_name='baz', df=df, pks=['index'])

        self.next(self.end)

    @step
    def end(self):
        pass


if __name__ == '__main__':
    SucceedsSecondDemo()


In [36]:
!cat succeeds_first.py

import pickle

from metaflow import FlowSpec, step, DoltDT, Parameter
import pandas as pd
from sklearn import tree

class SucceedsFirstDemo(FlowSpec):

    bar_version = Parameter('bar-version',  help="Specifc the tag for the input version", required=True)

    @step
    def start(self):
        with DoltDT(run=self, doltdb_path='foo', branch=self.bar_version) as dolt:
            self.df = dolt.read_table('bar')

        self.next(self.middle)

    @step
    def middle(self):
        with DoltDT(run=self, doltdb_path='foo', branch=self.bar_version) as dolt:

            df = self.df
            df["B"] = df["B"].map(lambda x: x*2)

            dolt.write_table(table_name='baz', df=df, pks=['index'])

        self.next(self.end)

    @step
    def end(self):
        pass


if __name__ == '__main__':
    SucceedsFirstDemo()


In [20]:
!poetry run python3 succeeds_second.py run --bar-version $v1

[35m[1mMetaflow 2.2.5.post22+git5ab5047[0m[35m[22m executing [0m[31m[1mSucceedsSecondDemo[0m[35m[22m[0m[35m[22m for [0m[31m[1muser:max-hoffman[0m[35m[22m[K[0m[35m[22m[0m
[35m[22mValidating your flow...[K[0m[35m[22m[0m
[32m[1m    The graph looks good![K[0m[32m[1m[0m
[35m[22mRunning pylint...[K[0m[35m[22m[0m
[32m[1m    Pylint is happy![K[0m[32m[1m[0m
[35m2021-01-17 17:37:45.407 [0m[1mWorkflow starting (run-id 1610933865398710):[0m
[35m2021-01-17 17:37:45.418 [0m[32m[1610933865398710/start/1 (pid 29031)] [0m[1mTask is starting.[0m
[35m2021-01-17 17:37:46.482 [0m[32m[1610933865398710/start/1 (pid 29031)] [0m[22m01-17 17:37:46 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-17 17:37:46.536 [0m[32m[1610933865398710/start/1 (pid 29031)] [0m[22m01-17 17:37:46 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m

01-17 17:37:47 doltpy.core.system_helpers INFO     No processes to clean up, exiting


In [21]:
!poetry run python3 succeeds_first.py run --bar-version $v2

[35m[1mMetaflow 2.2.5.post22+git5ab5047[0m[35m[22m executing [0m[31m[1mSucceedsFirstDemo[0m[35m[22m[0m[35m[22m for [0m[31m[1muser:max-hoffman[0m[35m[22m[K[0m[35m[22m[0m
[35m[22mValidating your flow...[K[0m[35m[22m[0m
[32m[1m    The graph looks good![K[0m[32m[1m[0m
[35m[22mRunning pylint...[K[0m[35m[22m[0m
[32m[1m    Pylint is happy![K[0m[32m[1m[0m
[35m2021-01-17 17:38:05.472 [0m[1mWorkflow starting (run-id 1610933885461124):[0m
[35m2021-01-17 17:38:05.480 [0m[32m[1610933885461124/start/1 (pid 29078)] [0m[1mTask is starting.[0m
[35m2021-01-17 17:38:06.613 [0m[32m[1610933885461124/start/1 (pid 29078)] [0m[22m01-17 17:38:06 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-17 17:38:06.642 [0m[32m[1610933885461124/start/1 (pid 29078)] [0m[22m01-17 17:38:06 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m


[35m2021-01-17 17:38:09.642 [0m[32m[1610933885461124/middle/2 (pid 29112)] [0m[1mTask finished successfully.[0m
[35m2021-01-17 17:38:09.649 [0m[32m[1610933885461124/end/3 (pid 29152)] [0m[1mTask is starting.[0m
[35m2021-01-17 17:38:10.822 [0m[32m[1610933885461124/end/3 (pid 29152)] [0m[22m01-17 17:38:10 doltpy.core.system_helpers INFO     Before exiting cleaning up child processes[0m
[35m2021-01-17 17:38:10.829 [0m[32m[1610933885461124/end/3 (pid 29152)] [0m[22m01-17 17:38:10 doltpy.core.system_helpers INFO     No processes to clean up, exiting[0m
[35m2021-01-17 17:38:10.987 [0m[32m[1610933885461124/end/3 (pid 29152)] [0m[1mTask finished successfully.[0m
[35m2021-01-17 17:38:10.988 [0m[1mDone![0m
01-17 17:38:10 doltpy.core.system_helpers INFO     Before exiting cleaning up child processes
01-17 17:38:10 doltpy.core.system_helpers INFO     No processes to clean up, exiting


In [34]:
!poetry run python3 succeeds_second.py resume start

[35m[1mMetaflow 2.2.5.post19+gitd26b8cd[0m[35m[22m executing [0m[31m[1mSucceedsSecondDemo[0m[35m[22m[0m[35m[22m for [0m[31m[1muser:max-hoffman[0m[35m[22m[K[0m[35m[22m[0m
[35m[22mValidating your flow...[K[0m[35m[22m[0m
[32m[1m    The graph looks good![K[0m[32m[1m[0m
[35m[22mRunning pylint...[K[0m[35m[22m[0m
[32m[1m    Pylint is happy![K[0m[32m[1m[0m
[35m2021-01-15 15:38:34.003 [0m[22mGathering required information to resume run (this may take a bit of time)...[0m
[35m2021-01-15 15:38:34.011 [0m[1mWorkflow starting (run-id 1610753914002543):[0m
[35m2021-01-15 15:38:34.018 [0m[32m[1610753914002543/start/1 (pid 12116)] [0m[1mTask is starting.[0m
[35m2021-01-15 15:38:35.150 [0m[32m[1610753914002543/start/1 (pid 12116)] [0m[22m01-15 15:38:35 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-15 15:38:35.240 [0m[32m[1610753914002543/start/1 (pid 12116)] [0m

[35m2021-01-15 15:38:39.040 [0m[32m[1610753914002543/end/3 (pid 12181)] [0m[22m01-15 15:38:39 doltpy.core.system_helpers INFO     No processes to clean up, exiting[0m
[35m2021-01-15 15:38:39.202 [0m[32m[1610753914002543/end/3 (pid 12181)] [0m[1mTask finished successfully.[0m
[35m2021-01-15 15:38:39.203 [0m[1mDone![0m
01-15 15:38:39 doltpy.core.system_helpers INFO     Before exiting cleaning up child processes
01-15 15:38:39 doltpy.core.system_helpers INFO     No processes to clean up, exiting
