# DoltHub Demo

1. Clone repos locally from DoltHub remote
2. Run flow locally
3. Inspect results

In [1]:
!dolt clone vinai/iris-test

cloning https://doltremoteapi.dolthub.com/vinai/iris-test
Retrieving remote informatio0 of 9 chunks complete. 0 chunks being downloaded currentl0 of 9 chunks complete. 2 chunks being downloaded currentl2 of 9 chunks complete. 0 chunks being downloaded currentl2 of 9 chunks complete. 7 chunks being downloaded currentl9 of 9 chunks complete. 0 chunks being downloaded currently.


In [2]:
!dolt clone vinai/iris-model-results

cloning https://doltremoteapi.dolthub.com/vinai/iris-model-results
Retrieving remote informatio0 of 63 chunks complete. 0 chunks being downloaded currently0 of 63 chunks complete. 4 chunks being downloaded currently4 of 63 chunks complete. 0 chunks being downloaded currently4 of 63 chunks complete. 4 chunks being downloaded currently4 of 63 chunks complete. 6 chunks being downloaded currently8 of 63 chunks complete. 2 chunks being downloaded currently10 of 63 chunks complete. 0 chunks being downloaded currentl10 of 63 chunks complete. 4 chunks being downloaded currentl10 of 63 chunks complete. 5 chunks being downloaded currentl10 of 63 chunks complete. 9 chunks being downloaded currentl11 of 63 chunks complete. 8 chunks being downloaded currentl15 of 63 chunks complete. 4 chunks being downloaded currentl19 of 63 chunks complete. 0 chunks being downloaded currentl19 of 63 chunks complete. 4 chunks being downloaded currentl19 of 63 chunks complete. 5 chunks being downloaded currentl23 of

In [3]:
!cat iris_demo.py

import logging

logger = logging.getLogger()

from metaflow import FlowSpec, step, DoltDT
import pandas as pd
import pickle
from sklearn import tree

class DoltMLDemoFlow(FlowSpec):
    @step
    def start(self):
        # Start by getting original dataset
        with DoltDT(run=self, database='iris-test') as dolt:
            self.test_set = dolt.read_table('iris-test')

        self.next(self.predict)

    @step
    def predict(self):
        with DoltDT(run=self, database='iris-model-results') as dolt:
            self.model = pickle.load(open('model.p', 'rb'))
            self.model_type = 'Decision Tree'

            samples = self.test_set['sample']
            y_true = self.test_set['species']
            y_true = y_true.rename('labels')

            test = self.test_set.drop(columns=['species', 'sample'])
            predictions = pd.Series(self.model.predict(test))
            predictions = predictions.rename('predictions')

            self.re

In [4]:
!poetry run python3 iris_demo.py run

[35m[1mMetaflow 2.2.5.post24+git1f18147[0m[35m[22m executing [0m[31m[1mDoltMLDemoFlow[0m[35m[22m[0m[35m[22m for [0m[31m[1muser:max-hoffman[0m[35m[22m[K[0m[35m[22m[0m
[35m[22mValidating your flow...[K[0m[35m[22m[0m
[32m[1m    The graph looks good![K[0m[32m[1m[0m
[35m[22mRunning pylint...[K[0m[35m[22m[0m
[32m[1m    Pylint is happy![K[0m[32m[1m[0m
[35m2021-01-17 19:04:21.920 [0m[1mWorkflow starting (run-id 1610939061912544):[0m
[35m2021-01-17 19:04:21.929 [0m[32m[1610939061912544/start/1 (pid 31821)] [0m[1mTask is starting.[0m
[35m2021-01-17 19:04:23.494 [0m[32m[1610939061912544/start/1 (pid 31821)] [0m[22m01-17 19:04:23 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[35m2021-01-17 19:04:23.554 [0m[32m[1610939061912544/start/1 (pid 31821)] [0m[22m01-17 19:04:23 doltpy.core.dolt INFO     Creating engine for Dolt SQL Server instance running on 127.0.0.1:3306[0m
[3

[35m2021-01-17 19:04:28.248 [0m[32m[1610939061912544/end/3 (pid 31895)] [0m[22m01-17 19:04:28 doltpy.core.system_helpers INFO     Before exiting cleaning up child processes[0m
[35m2021-01-17 19:04:28.256 [0m[32m[1610939061912544/end/3 (pid 31895)] [0m[22m01-17 19:04:28 doltpy.core.system_helpers INFO     No processes to clean up, exiting[0m
[35m2021-01-17 19:04:28.425 [0m[32m[1610939061912544/end/3 (pid 31895)] [0m[1mTask finished successfully.[0m
[35m2021-01-17 19:04:28.425 [0m[1mDone![0m
01-17 19:04:28 doltpy.core.system_helpers INFO     Before exiting cleaning up child processes
01-17 19:04:28 doltpy.core.system_helpers INFO     No processes to clean up, exiting


In [7]:
!dolt sql -q "SELECT `flow_name`, `run_id` from metadata;"

+----------------+------------------+
| flow_name      | run_id           |
+----------------+------------------+
| DoltMLDemoFlow | 1610920919008498 |
| DoltMLDemoFlow | 1610920919008498 |
| DoltMLDemoFlow | 1610939061912544 |
| DoltMLDemoFlow | 1610939061912544 |
+----------------+------------------+


In [11]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.WARNING)

from metaflow.datatools.dolt import DoltRun

d = DoltRun(flow_name="DoltMLDemoFlow", run_id="1610939061912544")

In [12]:
d.reads[0].data

Unnamed: 0,sample,sepal_length,sepal_width,petal_length,petal_width,species
0,3,4.7,3.2,1.3,0.2,setosa
1,5,5.0,3.6,1.4,0.2,setosa
2,10,4.9,3.1,1.5,0.1,setosa
3,29,5.2,3.4,1.4,0.2,setosa
4,32,5.4,3.4,1.5,0.4,setosa
5,35,4.9,3.1,1.5,0.1,setosa
6,40,5.1,3.4,1.5,0.2,setosa
7,42,4.5,2.3,1.3,0.3,setosa
8,51,7.0,3.2,4.7,1.4,versicolor
9,58,4.9,2.4,3.3,1.0,versicolor


In [13]:
d.writes[0].data

Unnamed: 0,sample,labels,predictions
0,3,setosa,setosa
1,5,setosa,setosa
2,10,setosa,setosa
3,29,setosa,setosa
4,32,setosa,setosa
5,35,setosa,setosa
6,40,setosa,setosa
7,42,setosa,setosa
8,51,versicolor,versicolor
9,58,versicolor,versicolor
