In [10]:
!dolt clone vinai/iris-test

cloning https://doltremoteapi.dolthub.com/vinai/iris-test
[31merror: data repository already exists at iris-test[0m


In [2]:
!dolt clone vinai/iris-model-results

cloning https://doltremoteapi.dolthub.com/vinai/iris-model-results
Retrieving remote informatio0 of 63 chunks complete. 0 chunks being downloaded currently0 of 63 chunks complete. 4 chunks being downloaded currently0 of 63 chunks complete. 8 chunks being downloaded currently4 of 63 chunks complete. 4 chunks being downloaded currently8 of 63 chunks complete. 0 chunks being downloaded currently8 of 63 chunks complete. 2 chunks being downloaded currently10 of 63 chunks complete. 0 chunks being downloaded currentl10 of 63 chunks complete. 4 chunks being downloaded currentl14 of 63 chunks complete. 0 chunks being downloaded currentl14 of 63 chunks complete. 4 chunks being downloaded currentl18 of 63 chunks complete. 0 chunks being downloaded currentl18 of 63 chunks complete. 1 chunks being downloaded currentl19 of 63 chunks complete. 0 chunks being downloaded currentl19 of 63 chunks complete. 4 chunks being downloaded currentl23 of 63 chunks complete. 0 chunks being downloaded currentl23 of

In [4]:
!cat iris_demo.py

from metaflow import FlowSpec, step, DoltDT
import pandas as pd
import pickle
from sklearn import tree

class DoltMLDemoFlow(FlowSpec):
    @step
    def start(self):
        # Start by getting original dataset
        with DoltDT(run=self, doltdb_path='iris-test') as dolt:
            self.test_set = dolt.read_table('iris-test')

        self.next(self.predict)

    @step
    def predict(self):
        with DoltDT(run=self, doltdb_path='iris-model-results') as dolt:
            self.model = pickle.load(open('model.p', 'rb'))
            self.model_type = 'Decision Tree'

            samples = self.test_set['sample']
            y_true = self.test_set['species']
            y_true = y_true.rename('labels')

            test = self.test_set.drop(columns=['species', 'sample'])
            predictions = pd.Series(self.model.predict(test))
            predictions = predictions.rename('predictions')

            self.result = pd.concat([samples, y_true, predictio

In [5]:
!poetry run python3 iris_demo.py run

[35m[1mMetaflow 2.2.5.post14+git4337f78[0m[35m[22m executing [0m[31m[1mDoltMLDemoFlow[0m[35m[22m[0m[35m[22m for [0m[31m[1muser:max-hoffman[0m[35m[22m[K[0m[35m[22m[0m
[35m[22mValidating your flow...[K[0m[35m[22m[0m
[32m[1m    The graph looks good![K[0m[32m[1m[0m
[35m[22mRunning pylint...[K[0m[35m[22m[0m
[32m[1m    Pylint is happy![K[0m[32m[1m[0m
[35m2021-01-14 12:05:54.259 [0m[1mWorkflow starting (run-id 1610654754251345):[0m
[35m2021-01-14 12:05:54.265 [0m[32m[1610654754251345/start/1 (pid 2731)] [0m[1mTask is starting.[0m
[35m2021-01-14 12:05:55.315 [0m[32m[1610654754251345/start/1 (pid 2731)] [0m[22m01-14 12:05:55 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-14 12:05:55.587 [0m[32m[1610654754251345/start/1 (pid 2731)] [0m[22m01-14 12:05:55 doltpy.core.dolt INFO     * master                                        	cnt6q9n22svhdvb1n3g90kk43k3b9aol

In [4]:
from metaflow import Flow, get_metadata
from metaflow.datatools.dolt import DoltDT
from doltpy.core import Dolt

def print_data_map(data_map):
    for run_step in data_map.keys():
        for table in data_map[run_step]:
            print('{}, {}'.format(run_step, table))
            #print(data_map[run_step][table])


print("Current metadata provider: %s" % get_metadata())
doltdb_path = './iris-test'
flow = Flow('DoltMLDemoFlow')
run = flow.latest_successful_run
print("Using run: %s" % str(run))

'''
Ex 1: Get all the inputs used by a specific run of a flow
'''
doltdt = DoltDT(run, doltdb_path, 'master')
data_map_for_run = doltdt.get_reads(steps=['start'])
print_data_map(data_map_for_run)

'''
Ex 2: Get all the inputs used by a specific step of a run of a flow
'''
# doltdt = DoltDT(run, doltdb_path, 'vinai/add-rotten-data')
# data_map_for_run = doltdt.get_reads(steps=['start'])
# print_data_map(data_map_for_run)

'''
Ex 3 Outputs are handled identically
'''
doltdt = DoltDT(run, doltdb_path, 'vinai/add-rotten-data')
data_map_flow_outputs = doltdt.get_writes(steps=['stats'])
print_data_map(data_map_flow_outputs)

# d = Dolt('imdb-reviews')

01-14 12:56:25 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306


Current metadata provider: local@/Users/max-hoffman/Documents/sandbox/dolt/metaflow/dolt-demos
Using run: Run('DoltMLDemoFlow/1610654754251345')


01-14 12:56:25 doltpy.core.dolt INFO     * master                                        	cnt6q9n22svhdvb1n3g90kk43k3b9aol

01-14 12:56:25 doltpy.core.dolt INFO     sample,sepal_length,sepal_width,petal_length,petal_width,species
3,4.7,3.2,1.3,0.2,setosa
5,5,3.6,1.4,0.2,setosa
10,4.9,3.1,1.5,0.1,setosa
29,5.2,3.4,1.4,0.2,setosa
32,5.4,3.4,1.5,0.4,setosa
35,4.9,3.1,1.5,0.1,setosa
40,5.1,3.4,1.5,0.2,setosa
42,4.5,2.3,1.3,0.3,setosa
51,7,3.2,4.7,1.4,versicolor
58,4.9,2.4,3.3,1,versicolor
60,5.2,2.7,3.9,1.4,versicolor
62,5.9,3,4.2,1.5,versicolor
63,6,2.2,4,1,versicolor
65,5.6,2.9,3.6,1.3,versicolor
67,5.6,3,4.5,1.5,versicolor
69,6.2,2.2,4.5,1.5,versicolor
70,5.6,2.5,3.9,1.1,versicolor
73,6.3,2.5,4.9,1.5,versicolor
74,6.1,2.8,4.7,1.2,versicolor
75,6.4,2.9,4.3,1.3,versicolor
133,6.4,2.8,5.6,2.2,virginica
136,7.7,3,6.1,2.3,virginica
143,5.8,2.7,5.1,1.9,virginica
147,6.3,2.5,5,1.9,virginica
79,6,2.9,4.5,1.5,versicolor
89,5.6,3,4.1,1.3,versicolor
91,5.5,2.6,4.4,1.2,versicolor
104,6.3,2.9,5.6,1.

1610654754251345/start, iris-test


'\nEx 3 Outputs are handled identically\n'

In [2]:
print('SELECT * FROM `iris-test` AS OF "cnt6q9n22svhdvb1n3g90kk43k3b9aol"', '--result-format', 'csv'], b'', b'could not find a value for this hash\n')

SyntaxError: closing parenthesis ']' does not match opening parenthesis '(' (<ipython-input-2-c65f291c9618>, line 1)

In [5]:
data_map_for_run

{'1610654754251345/start': {'iris-test':    sample sepal_length sepal_width petal_length petal_width     species
  0       3          4.7         3.2          1.3         0.2      setosa
  1       5            5         3.6          1.4         0.2      setosa
  2      10          4.9         3.1          1.5         0.1      setosa
  3      29          5.2         3.4          1.4         0.2      setosa
  4      32          5.4         3.4          1.5         0.4      setosa
  5      35          4.9         3.1          1.5         0.1      setosa
  6      40          5.1         3.4          1.5         0.2      setosa
  7      42          4.5         2.3          1.3         0.3      setosa
  8      51            7         3.2          4.7         1.4  versicolor
  9      58          4.9         2.4          3.3           1  versicolor
  10     60          5.2         2.7          3.9         1.4  versicolor
  11     62          5.9           3          4.2         1.5  versicolor