In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import hydra
import mlflow
import pandas as pd
import plotly.express as px

In [None]:
with hydra.initialize_config_module('smc01.postprocessing.conf'):
    cfg = hydra.compose('train')

In [None]:
client = mlflow.tracking.client.MlflowClient(tracking_uri=cfg.logging.mlflow.tracking_uri)

In [None]:
def runs_to_df(runs):
    rows = []
    for r in runs:
        rmse_history = client.get_metric_history(r.info.run_id, 'Val/RMSE')

        min_rmse = min(rmse_history, key=lambda x: x.value) if len(rmse_history) > 0 else None


        rows.append({
            'model': r.data.params.get('model._target_', r.data.params.get('model/_target_', '')).split('.')[-1],
            'train_begin': r.data.params.get('split.train_begin', r.data.params.get('split/train_begin', '')),
            'val_begin': r.data.params.get('split.val_begin', r.data.params.get('split/val_begin', '')),
            'min_rmse': min_rmse.value if min_rmse else None,
            'start_time': r.info.start_time,
            'end_time': r.info.end_time,
            'station_subset': r.data.params.get('dataset/station_set_file', r.data.params.get('dataset.station_set_file', '')),
            'run_name': r.data.tags['mlflow.runName'],
            'freeze_upper': 'True' == r.data.params.get('freeze_upper', False),
            'split_name': r.data.params.get('split.name', ''),
            
        })
        
    df = pd.DataFrame(rows)
    df['start_time'] = pd.to_datetime(df['start_time'], unit='ms')
    df['end_time'] = pd.to_datetime(df['end_time'], unit='ms')

        
    return df

In [None]:
exp_02_runs = client.search_runs(
    experiment_ids="2",
    filter_string="",
    run_view_type=mlflow.entities.ViewType.ACTIVE_ONLY,
)

In [None]:
exp_10_runs = client.search_runs(
    experiment_ids="10",
    filter_string="",
    run_view_type=mlflow.entities.ViewType.ACTIVE_ONLY,
)

In [None]:
df = pd.concat([runs_to_df(exp_02_runs), runs_to_df(exp_10_runs)])

In [None]:
df['duration'] = df['end_time'] - df['start_time']

In [None]:
df.sum()