In [1]:
from datetime import datetime

from metaflow import Metaflow,Flow, Step

import pandas as pd

In [8]:
import metaflow

In [9]:
metaflow.__version__

'2.0.1'

In [2]:
print(Metaflow().flows)

[Flow('ArchetypeEstimator')]


## Monitoring of the flow (Archetypeestimator)

In [10]:
# Monitor the execution of the flow
flowname = "ArchetypeEstimator"

# Collect flow and runs information
flow = Flow(flowname)
runs = list(flow)

In [11]:
# display the five last runs
runs[:5]

[Run('ArchetypeEstimator/1')]

In [5]:
informations = []
allpredictions = []
for i,run in enumerate(runs):
    
    if run.successful:
        step_start = Step(f"{flowname}/{run.id}/start")
        step_end = run.end_task

        nbr_cardsselected = step_start.task.data.limittopcards
        type_execution = step_start.task.data.tags_script
        
        # Collect general informations on the flow (startdate, enddate, execution time)
        startdate = datetime.strptime(step_start.created_at[:-4], "%Y-%m-%dT%H:%M:%S")
        enddate = datetime.strptime(step_end.finished_at[:-4], "%Y-%m-%dT%H:%M:%S")
        timeexecution = (enddate - startdate).total_seconds()

        # Navigate on the variable produced by the flow
        # Collect the first sample of the training set
        
        step_segment_decks = Step(f"{flowname}/{run.id}/segment_decks")
        sample_details = step_segment_decks.task.data.df_decks_totrain.iloc[0][["deckid","deckname","archetype"]].values

        # Collect the accuracy and the parameter of the best model
        step = Step(f"{flowname}/{run.id}/select_and_score")
        accuracy = step.task.data.accuracy
        parameters = step.task.data.parameters

        # collect the predictions for the model
        predictions = step.task.data.df_scored[["deckid","prediction"]]
        predictions.set_index(["deckid"], inplace = True, drop = True)
        predictions.columns = [run.id]
        allpredictions.append(predictions)

        # Print some stuff sometime
        if i%10 == 0:
            print(f"Run:{run.id}")
            print(f"Started at {step_start.created_at[:-4]}")
            print(f"Run for {timeexecution} seconds")
            print("Number of cards selected :", nbr_cardsselected)
            print("First sample of the training set", sample_details)
            print(f"The best RF with {parameters} haa an accuracy of {round(accuracy,2)}")
                                                             
        # Store the details on the run
        information = [run.id, startdate, enddate, timeexecution, type_execution, nbr_cardsselected , run.successful, str(sample_details), parameters, accuracy]
        informations.append(information)
    
# Wrap up the information collected on the runs
df_rundetails = pd.DataFrame(informations, columns = ["runid","startdate","enddate","timeexecution","type_execution","nbr_cards","is_successful","firstsample_training","parametersRF","accuracy"])
df_allpredictions = pd.concat(allpredictions, axis = 1)

Run:1
Started at 2020-01-09T18:24:38
Run for 72.0 seconds
Number of cards selected : 33
First sample of the training set [1049171 '678909876' 'Control Paladin']
The best RF with {'n_estimators': 100, 'criterion': 'gini', 'max_depth': 16} haa an accuracy of 0.33


In [6]:
# display a sample of the results
df_rundetails.sample(frac = 1).head()

Unnamed: 0,runid,startdate,enddate,timeexecution,type_execution,nbr_cards,is_successful,firstsample_training,parametersRF,accuracy
0,1,2020-01-09 18:24:38,2020-01-09 18:25:50,72.0,mlflow_layer,33,True,[1049171 '678909876' 'Control Paladin'],"{'n_estimators': 100, 'criterion': 'gini', 'ma...",0.327273


In [7]:
df_allpredictions.head()

Unnamed: 0_level_0,1
deckid,Unnamed: 1_level_1
335523,Control Priest
479242,Tempo Mage
207073,N'Zoth Paladin
267084,Secrets Hunter
464777,Token Druid


In [70]:
step.task.environment_info, step.task.code

(None, None)

## Monitoring mlruns

In [77]:
import mlflow

In [78]:
df_runs = mlflow.search_runs(experiment_ids="1")

In [79]:
df_runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.accuracy,metrics.time_testing,metrics.time_training,params.max_depth,params.n_estimators,params.criterion,tags.mlflow.source.git.commit,tags.metaflow_runid,tags.taskid,tags.mlflow.source.name,tags.stepname,tags.mlflow.user,tags.mlflow.source.type,tags.username
0,0134191e04f74329bc2dc7e91aa2bd0b,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:10:17.115000+00:00,2020-01-09 17:10:20.037000+00:00,0.336364,1.697639,1.697639,,400,entropy,f755792182c445c6beb094d266fb2a7d276985b6,1578589807410776,21,pipeline_crossflow.py,build_model,ubuntu,LOCAL,ubuntu
1,6a2227da4bfc46849c8586cc82cb2239,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:10:17.109000+00:00,2020-01-09 17:10:19.543000+00:00,0.336364,1.419619,1.419619,,400,entropy,f755792182c445c6beb094d266fb2a7d276985b6,1578589807410776,17,pipeline_crossflow.py,build_model,ubuntu,LOCAL,ubuntu
2,2b7399c76520470c8ee7a0f16554b6d4,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:10:16.922000+00:00,2020-01-09 17:10:19.670000+00:00,0.363636,1.512061,1.512061,32,400,gini,f755792182c445c6beb094d266fb2a7d276985b6,1578589807410776,19,pipeline_crossflow.py,build_model,ubuntu,LOCAL,ubuntu
3,fb0d842bc9344be8ae413ed152678b9b,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:10:16.905000+00:00,2020-01-09 17:10:18.158000+00:00,0.336364,0.343687,0.343687,16,100,gini,f755792182c445c6beb094d266fb2a7d276985b6,1578589807410776,18,pipeline_crossflow.py,build_model,ubuntu,LOCAL,ubuntu
4,8c65139cdeee4660b7ab9ff7eb002ce4,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:10:16.735000+00:00,2020-01-09 17:10:19.146000+00:00,0.345455,1.267203,1.267203,,400,gini,f755792182c445c6beb094d266fb2a7d276985b6,1578589807410776,20,pipeline_crossflow.py,build_model,ubuntu,LOCAL,ubuntu
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,0301ef2a9bbf41068dc17d38345bb1a7,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:02:52.060000+00:00,2020-01-09 17:02:53.090000+00:00,0.154545,0.525378,0.525378,,100,entropy,f755792182c445c6beb094d266fb2a7d276985b6,1578589360841130,19,pipeline_crossflow.py,build_model,ubuntu,LOCAL,ubuntu
126,56bdc468c57146d9b5f137caeb819f83,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:02:51.766000+00:00,2020-01-09 17:02:52.996000+00:00,0.154545,0.663630,0.663630,32,200,entropy,f755792182c445c6beb094d266fb2a7d276985b6,1578589360841130,21,pipeline_crossflow.py,build_model,ubuntu,LOCAL,ubuntu
127,3fa03fe46f6045d884f191e86295bee2,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:02:51.705000+00:00,2020-01-09 17:02:52.850000+00:00,0.154545,0.476407,0.476407,16,100,gini,f755792182c445c6beb094d266fb2a7d276985b6,1578589360841130,20,pipeline_crossflow.py,build_model,ubuntu,LOCAL,ubuntu
128,c8a350a886514b11bc73b1e12e19d46d,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:02:51.573000+00:00,2020-01-09 17:02:52.397000+00:00,0.145455,0.256078,0.256078,8,100,entropy,f755792182c445c6beb094d266fb2a7d276985b6,1578589360841130,18,pipeline_crossflow.py,build_model,ubuntu,LOCAL,ubuntu
