In [16]:
from datetime import datetime

from metaflow import Metaflow,Flow, Step

import pandas as pd

In [17]:
print(Metaflow().flows)

[Flow('ArchetypeEstimator')]


## Monitoring of the flow (Archetypeestimator)

In [18]:
# Monitor the execution of the flow
flowname = "ArchetypeEstimator"

# Collect flow and runs information
flow = Flow(flowname)
runs = list(flow)

In [19]:
# display the five last runs
runs[:5]

[Run('ArchetypeEstimator/1578692048443190'),
 Run('ArchetypeEstimator/1578685465592613'),
 Run('ArchetypeEstimator/1578685360207251'),
 Run('ArchetypeEstimator/1578589807410776'),
 Run('ArchetypeEstimator/1578589792399730')]

In [20]:
informations = []
allpredictions = []
for i,run in enumerate(runs):
    
    if run.successful:
        step_start = Step(f"{flowname}/{run.id}/start")
        step_end = run.end_task

        nbr_cardsselected = step_start.task.data.limittopcards
        type_execution = step_start.task.data.tags_script
        
        # Collect general informations on the flow (startdate, enddate, execution time)
        startdate = datetime.strptime(step_start.created_at[:-4], "%Y-%m-%dT%H:%M:%S")
        enddate = datetime.strptime(step_end.finished_at[:-4], "%Y-%m-%dT%H:%M:%S")
        timeexecution = (enddate - startdate).total_seconds()

        # Navigate on the variable produced by the flow
        # Collect the first sample of the training set
        
        step_segment_decks = Step(f"{flowname}/{run.id}/segment_decks")
        sample_details = step_segment_decks.task.data.df_decks_totrain.iloc[0][["deckid","deckname","archetype"]].values

        # Collect the accuracy and the parameter of the best model
        step = Step(f"{flowname}/{run.id}/select_and_score")
        accuracy = step.task.data.accuracy
        parameters = step.task.data.parameters

        # collect the predictions for the model
        predictions = step.task.data.df_scored[["deckid","prediction"]]
        predictions.set_index(["deckid"], inplace = True, drop = True)
        predictions.columns = [run.id]
        allpredictions.append(predictions)

        # Print some stuff sometime
        if i%10 == 0:
            print(f"Run:{run.id}")
            print(f"Started at {step_start.created_at[:-4]}")
            print(f"Run for {timeexecution} seconds")
            print("Number of cards selected :", nbr_cardsselected)
            print("First sample of the training set", sample_details)
            print(f"The best RF with {parameters} haa an accuracy of {round(accuracy,2)}")
                                                             
        # Store the details on the run
        information = [run.id, startdate, enddate, timeexecution, type_execution, nbr_cardsselected, str(sample_details), parameters, accuracy]
        informations.append(information)
    
# Wrap up the information collected on the runs
df_rundetails = pd.DataFrame(informations, columns = ["runid","startdate","enddate","timeexecution","type_execution","nbr_cards","firstsample_training","parametersRF","accuracy"])
df_allpredictions = pd.concat(allpredictions, axis = 1)

Run:1578692048443190
Started at 2020-01-10T21:34:08
Run for 10.0 seconds
Number of cards selected : 36
First sample of the training set [1199269 'full legendary for ach' 'Shudderwock Shaman']
The best RF with {'n_estimators': 200, 'criterion': 'gini', 'max_depth': None} haa an accuracy of 0.37
Run:1578589696415904
Started at 2020-01-09T17:08:16
Run for 14.0 seconds
Number of cards selected : 13
First sample of the training set [982358 'Set your bets!' 'Yogg Mage']
The best RF with {'n_estimators': 200, 'criterion': 'entropy', 'max_depth': 32} haa an accuracy of 0.34
Run:1578589524777917
Started at 2020-01-09T17:05:24
Run for 15.0 seconds
Number of cards selected : 14
First sample of the training set [884259 'Infest Control Druid' 'Ramp Druid']
The best RF with {'n_estimators': 400, 'criterion': 'gini', 'max_depth': 16} haa an accuracy of 0.3
Run:1578585094454738
Started at 2020-01-09T15:51:34
Run for 14.0 seconds
Number of cards selected : 36
First sample of the training set [1229871 '

In [21]:
# display a sample of the results
df_rundetails.sample(frac = 1).head()

Unnamed: 0,runid,startdate,enddate,timeexecution,type_execution,nbr_cards,firstsample_training,parametersRF,accuracy
0,1578692048443190,2020-01-10 21:34:08,2020-01-10 21:34:18,10.0,nolayer,36,[1199269 'full legendary for ach' 'Shudderwock...,"{'n_estimators': 200, 'criterion': 'gini', 'ma...",0.372727
30,1578585094454738,2020-01-09 15:51:34,2020-01-09 15:51:48,14.0,nolayer,36,[1229871 'Sanctuary of Kabal' 'Reno Priest'],"{'n_estimators': 200, 'criterion': 'entropy', ...",0.409091
18,1578589559885182,2020-01-09 17:05:59,2020-01-09 17:06:12,13.0,mlflow_layer,1,[748849 'freeze mage wild' 'Freeze Mage'],"{'n_estimators': 100, 'criterion': 'entropy', ...",0.209091
16,1578589590426245,2020-01-09 17:06:30,2020-01-09 17:06:46,16.0,mlflow_layer,19,[1139487 'Token Druid' 'Token Druid'],"{'n_estimators': 400, 'criterion': 'entropy', ...",0.336364
28,1578589360841130,2020-01-09 17:02:40,2020-01-09 17:02:55,15.0,mlflow_layer,7,[1050655 'dude-adin' 'Aggro Paladin'],"{'n_estimators': 100, 'criterion': 'entropy', ...",0.154545


In [8]:
step.task.environment_info

## Monitoring mlruns

In [22]:
import mlflow

In [23]:
df_runs = mlflow.search_runs(experiment_ids="1")

In [24]:
df_runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.time_testing,metrics.accuracy,metrics.time_training,params.criterion,params.n_estimators,params.max_depth,tags.mlflow.user,tags.username,tags.mlflow.source.git.commit,tags.metaflow_runid,tags.mlflow.source.name,tags.taskid,tags.mlflow.source.type,tags.stepname
0,df556378fd4c443aad666ec2fe440daa,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-10 19:44:35.758000+00:00,2020-01-10 19:44:36.963000+00:00,0.641280,0.272727,0.641280,entropy,200,4,ubuntu,ubuntu,e1443d805f5eba0ee63bc8087031fe195c981925,1578685465592613,pipeline_crossflow.py,21,LOCAL,build_model
1,de0762f8a2ba4fcb8a11a485b20abdfd,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-10 19:44:35.246000+00:00,2020-01-10 19:44:36.660000+00:00,0.670688,0.363636,0.670688,gini,200,32,ubuntu,ubuntu,e1443d805f5eba0ee63bc8087031fe195c981925,1578685465592613,pipeline_crossflow.py,19,LOCAL,build_model
2,282b5924acaf4c45886070e9402204da,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-10 19:44:35.166000+00:00,2020-01-10 19:44:36.486000+00:00,0.747085,0.363636,0.747085,gini,200,16,ubuntu,ubuntu,e1443d805f5eba0ee63bc8087031fe195c981925,1578685465592613,pipeline_crossflow.py,20,LOCAL,build_model
3,d9f8ff7d16c64fcd9752ce56c1f4b62a,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-10 19:44:35.162000+00:00,2020-01-10 19:44:36.638000+00:00,0.875242,0.190909,0.875242,entropy,400,2,ubuntu,ubuntu,e1443d805f5eba0ee63bc8087031fe195c981925,1578685465592613,pipeline_crossflow.py,18,LOCAL,build_model
4,79eb892d5887415f80582924d0b13478,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-10 19:44:35.109000+00:00,2020-01-10 19:44:36.003000+00:00,0.420034,0.254545,0.420034,gini,200,4,ubuntu,ubuntu,e1443d805f5eba0ee63bc8087031fe195c981925,1578685465592613,pipeline_crossflow.py,17,LOCAL,build_model
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,0301ef2a9bbf41068dc17d38345bb1a7,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:02:52.060000+00:00,2020-01-09 17:02:53.090000+00:00,0.525378,0.154545,0.525378,entropy,100,,ubuntu,ubuntu,f755792182c445c6beb094d266fb2a7d276985b6,1578589360841130,pipeline_crossflow.py,19,LOCAL,build_model
136,56bdc468c57146d9b5f137caeb819f83,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:02:51.766000+00:00,2020-01-09 17:02:52.996000+00:00,0.663630,0.154545,0.663630,entropy,200,32,ubuntu,ubuntu,f755792182c445c6beb094d266fb2a7d276985b6,1578589360841130,pipeline_crossflow.py,21,LOCAL,build_model
137,3fa03fe46f6045d884f191e86295bee2,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:02:51.705000+00:00,2020-01-09 17:02:52.850000+00:00,0.476407,0.154545,0.476407,gini,100,16,ubuntu,ubuntu,f755792182c445c6beb094d266fb2a7d276985b6,1578589360841130,pipeline_crossflow.py,20,LOCAL,build_model
138,c8a350a886514b11bc73b1e12e19d46d,1,FINISHED,file:///home/ubuntu/development/metaflow-exper...,2020-01-09 17:02:51.573000+00:00,2020-01-09 17:02:52.397000+00:00,0.256078,0.145455,0.256078,entropy,100,8,ubuntu,ubuntu,f755792182c445c6beb094d266fb2a7d276985b6,1578589360841130,pipeline_crossflow.py,18,LOCAL,build_model
