In [11]:
from datetime import datetime

from metaflow import Metaflow,Flow, Step

import pandas as pd

In [12]:
print(Metaflow().flows)

[Flow('ArchetypeEstimator')]


## Monitoring of the flow (Archetypeestimator)

In [13]:
# Monitor the execution of the flow
flowname = "ArchetypeEstimator"

# Collect flow and runs information
flow = Flow(flowname)
runs = list(flow)

In [14]:
# display the five last runs
runs[:5]

[Run('ArchetypeEstimator/11'),
 Run('ArchetypeEstimator/10'),
 Run('ArchetypeEstimator/9'),
 Run('ArchetypeEstimator/8'),
 Run('ArchetypeEstimator/7')]

In [5]:
informations = []
allpredictions = []
for i,run in enumerate(runs):
    
    if run.successful:
        step_start = Step(f"{flowname}/{run.id}/start")
        step_end = run.end_task

        nbr_cardsselected = step_start.task.data.limittopcards
        type_execution = step_start.task.data.tags_script
        
        # Collect general informations on the flow (startdate, enddate, execution time)
        startdate = datetime.strptime(step_start.created_at[:-4], "%Y-%m-%dT%H:%M:%S")
        enddate = datetime.strptime(step_end.finished_at[:-4], "%Y-%m-%dT%H:%M:%S")
        timeexecution = (enddate - startdate).total_seconds()

        # Navigate on the variable produced by the flow
        # Collect the first sample of the training set
        
        step_segment_decks = Step(f"{flowname}/{run.id}/segment_decks")
        sample_details = step_segment_decks.task.data.df_decks_totrain.iloc[0][["deckid","deckname","archetype"]].values

        # Collect the accuracy and the parameter of the best model
        step = Step(f"{flowname}/{run.id}/select_and_score")
        accuracy = step.task.data.accuracy
        parameters = step.task.data.parameters

        # collect the predictions for the model
        predictions = step.task.data.df_scored[["deckid","prediction"]]
        predictions.set_index(["deckid"], inplace = True, drop = True)
        predictions.columns = [run.id]
        allpredictions.append(predictions)

        # Print some stuff sometime
        if i%10 == 0:
            print(f"Run:{run.id}")
            print(f"Started at {step_start.created_at[:-4]}")
            print(f"Run for {timeexecution} seconds")
            print("Number of cards selected :", nbr_cardsselected)
            print("First sample of the training set", sample_details)
            print(f"The best RF with {parameters} haa an accuracy of {round(accuracy,2)}")
                                                             
        # Store the details on the run
        information = [run.id, startdate, enddate, timeexecution, type_execution, nbr_cardsselected, str(sample_details), parameters, accuracy]
        informations.append(information)
    
# Wrap up the information collected on the runs
df_rundetails = pd.DataFrame(informations, columns = ["runid","startdate","enddate","timeexecution","type_execution","nbr_cards","firstsample_training","parametersRF","accuracy"])
df_allpredictions = pd.concat(allpredictions, axis = 1)

Run:11
Started at 2020-01-10T18:51:11
Run for 216.0 seconds
Number of cards selected : 7
First sample of the training set [616941 'Karazhan Flood Druid (Cheap)' 'Beast Druid']
The best RF with {'n_estimators': 200, 'criterion': 'entropy', 'max_depth': 4} haa an accuracy of 0.26
Run:1
Started at 2020-01-09T18:24:38
Run for 72.0 seconds
Number of cards selected : 33
First sample of the training set [1049171 '678909876' 'Control Paladin']
The best RF with {'n_estimators': 100, 'criterion': 'gini', 'max_depth': 16} haa an accuracy of 0.33


In [6]:
# display a sample of the results
df_rundetails.sample(frac = 1).head()

Unnamed: 0,runid,startdate,enddate,timeexecution,type_execution,nbr_cards,firstsample_training,parametersRF,accuracy
3,8,2020-01-10 13:21:27,2020-01-10 13:45:17,1430.0,nolayer,19,[954452 'Stealthy Rogue' 'Prince Rogue'],"{'n_estimators': 400, 'criterion': 'entropy', ...",0.336364
7,1,2020-01-09 18:24:38,2020-01-09 18:25:50,72.0,mlflow_layer,33,[1049171 '678909876' 'Control Paladin'],"{'n_estimators': 100, 'criterion': 'gini', 'ma...",0.327273
5,3,2020-01-09 22:23:05,2020-01-09 22:24:14,69.0,nolayer,34,[1217292 'Grotesque Wild Murloc Shaman Deck' '...,"{'n_estimators': 400, 'criterion': 'gini', 'ma...",0.345455
1,10,2020-01-10 18:47:22,2020-01-10 18:48:31,69.0,nolayer,14,[736109 '[70% WR] Pirogue' 'Pirate Rogue'],"{'n_estimators': 200, 'criterion': 'entropy', ...",0.263636
6,2,2020-01-09 22:12:24,2020-01-09 22:13:31,67.0,nolayer,12,[573980 'Zoolock' 'Zoolock'],"{'n_estimators': 400, 'criterion': 'gini', 'ma...",0.245455


In [7]:
step = Step(f"{flowname}/8/select_and_score")
step.task.environment_info

{'type': 'conda',
 'explicit': ['openssl=1.1.1d=h516909a_0',
  'numpy=1.17.0=py37h95a1406_0',
  'libstdcxx-ng=9.2.0=hdf63c60_2',
  'cffi=1.13.2=py37h8022711_0',
  'certifi=2019.11.28=py37_0',
  'scikit-learn=0.22.1=py37hcdab131_1',
  'libgcc-ng=9.2.0=h24d8f2e_2',
  'libblas=3.8.0=14_openblas',
  'requests=2.22.0=py37_1',
  'libgfortran-ng=7.3.0=hdf63c60_4',
  'libcblas=3.8.0=14_openblas',
  'jmespath=0.9.4=py_0',
  'pycparser=2.19=py37_1',
  'libgomp=9.2.0=h24d8f2e_2',
  'boto3=1.9.235=py_0',
  'ncurses=6.1=hf484d3e_1002',
  'botocore=1.12.253=py_0',
  'python=3.7.4=h265db76_1',
  'click=7.0=py_0',
  'ca-certificates=2019.11.28=hecc5488_0',
  'joblib=0.14.1=py_0',
  'six=1.13.0=py37_0',
  'xz=5.2.4=h14c3975_1001',
  'libedit=3.1.20181209=hc058e9b_0',
  's3transfer=0.2.1=py37_0',
  '_openmp_mutex=4.5=0_gnu',
  'libopenblas=0.3.7=h5ec1e0e_6',
  'pytz=2019.3=py_0',
  'tk=8.6.10=hed695b0_0',
  'pysocks=1.7.1=py37_0',
  'python-dateutil=2.8.1=py_0',
  'pyopenssl=19.1.0=py37_0',
  'scipy=1.4

In [15]:
step.task.code.path

's3://sc-882357307949-pp-detjkcxwnmoe2-metaflows3bucket-m7g0qak5fu2o/metaflow/ArchetypeEstimator/data/2c/2ce02537b3bce2528c58d4278c7bb1a86510be77'

## Monitoring mlruns

In [8]:
import mlflow

In [9]:
df_runs = mlflow.search_runs(experiment_ids="1")

In [10]:
df_runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.time_testing,metrics.time_training,metrics.accuracy,params.criterion,params.n_estimators,params.max_depth,tags.taskid,tags.metaflow_runid,tags.mlflow.source.git.commit,tags.mlflow.source.name,tags.mlflow.source.type,tags.stepname,tags.username,tags.mlflow.user
0,d99583b002b94e8182c1bc056215721f,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 18:25:26.826000+00:00,2020-01-09 18:25:28.006000+00:00,0.625565,0.625565,0.300000,gini,200,16,22,1,f755792182c445c6beb094d266fb2a7d276985b6,pipeline_crossflow.py,LOCAL,build_model,ubuntu,ubuntu
1,86e5280f8fd347d28c29aa8ec43fadf3,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 18:25:26.629000+00:00,2020-01-09 18:25:28.501000+00:00,1.164860,1.164860,0.309091,gini,400,16,21,1,f755792182c445c6beb094d266fb2a7d276985b6,pipeline_crossflow.py,LOCAL,build_model,ubuntu,ubuntu
2,a1a1cdf0a2b844199e44487bfa597c29,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 18:25:26.508000+00:00,2020-01-09 18:25:27.544000+00:00,0.401690,0.401690,0.327273,gini,100,16,18,1,f755792182c445c6beb094d266fb2a7d276985b6,pipeline_crossflow.py,LOCAL,build_model,ubuntu,ubuntu
3,0e2b7f598f874e21b2748ef4437f5d61,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 18:25:26.502000+00:00,2020-01-09 18:25:27.809000+00:00,0.764293,0.764293,0.327273,entropy,200,8,20,1,f755792182c445c6beb094d266fb2a7d276985b6,pipeline_crossflow.py,LOCAL,build_model,ubuntu,ubuntu
4,734379064032468587fc800667294514,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 18:25:26.397000+00:00,2020-01-09 18:25:27.475000+00:00,0.498743,0.498743,0.154545,entropy,200,2,19,1,f755792182c445c6beb094d266fb2a7d276985b6,pipeline_crossflow.py,LOCAL,build_model,ubuntu,ubuntu
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,0301ef2a9bbf41068dc17d38345bb1a7,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:02:52.060000+00:00,2020-01-09 17:02:53.090000+00:00,0.525378,0.525378,0.154545,entropy,100,,19,1578589360841130,f755792182c445c6beb094d266fb2a7d276985b6,pipeline_crossflow.py,LOCAL,build_model,ubuntu,ubuntu
131,56bdc468c57146d9b5f137caeb819f83,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:02:51.766000+00:00,2020-01-09 17:02:52.996000+00:00,0.663630,0.663630,0.154545,entropy,200,32,21,1578589360841130,f755792182c445c6beb094d266fb2a7d276985b6,pipeline_crossflow.py,LOCAL,build_model,ubuntu,ubuntu
132,3fa03fe46f6045d884f191e86295bee2,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:02:51.705000+00:00,2020-01-09 17:02:52.850000+00:00,0.476407,0.476407,0.154545,gini,100,16,20,1578589360841130,f755792182c445c6beb094d266fb2a7d276985b6,pipeline_crossflow.py,LOCAL,build_model,ubuntu,ubuntu
133,c8a350a886514b11bc73b1e12e19d46d,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:02:51.573000+00:00,2020-01-09 17:02:52.397000+00:00,0.256078,0.256078,0.145455,entropy,100,8,18,1578589360841130,f755792182c445c6beb094d266fb2a7d276985b6,pipeline_crossflow.py,LOCAL,build_model,ubuntu,ubuntu
