In [None]:
import glob
import os
import pandas as pd
from upath import UPath
import json
import time
import pathlib
from prefect.client import Client
from prefect.backend.flow import FlowView
from prefect.backend.flow_run import FlowRunView, watch_flow_run
from prefect.backend import FlowRunView
from cmip6_downscaling.methods.common.containers import RunParameters
from cmip6_downscaling.utils import str_to_hash
from cmip6_downscaling import __version__ as version, config
from cmip6_downscaling.methods.bcsd.flow import flow as bcsd_flow

In [None]:
client = Client()

In [None]:
flow_id = bcsd_flow.register(project_name='cmip6')

In [None]:
def retrieve_test_parms():
    """retrieve list of all .json param files in method subdir"""
    return glob.glob('../../configs/flow_runner_test/*.json')


def run_flow(flow_id: str, param_file_path: str) -> list[str]:

    json_path = pathlib.Path(param_file_path).read_text()
    flow_hash = str_to_hash(json_path)
    param_dict = json.loads(json_path)
    client = Client()
    flow_run_id = client.create_flow_run(flow_id=flow_id, parameters=param_dict)
    flow_run = FlowRunView.from_flow_run_id(flow_run_id)
    run_url = client.get_cloud_url("flow-run", flow_run_id)
    print(run_url)
    return flow_run_id, flow_run, run_url, flow_hash, param_file_path


def check_flow_status(flow_run):
    while not flow_run.state.is_finished():
        time.sleep(3)
        flow_run = flow_run.get_latest()
    return flow_run


def rerun_flow(flow_run, flow_run_id, param_file):

    flow_run = check_flow_status(flow_run)

    if flow_run.state.is_successful():
        return ''
    else:
        # we can add more retries here -- more complex for now
        flow_run_id, flow_run, run_url, flow_hash, param_file_path = run_flow(
            flow_run_id, param_file
        )
        flow_run = check_flow_status(flow_run)

        if flow_run.state.is_failed():
            return param_file_path
        else:
            return ''

In [None]:
runs = []
list_of_params = retrieve_test_parms()
for param_file_path in list_of_params:
    runs.append(run_flow(flow_id, param_file_path))

failed_param_file_list = []
for flow_run_id, flow_run, run_url, flow_hash, param_file_path in runs:
    rerun_flow_status = rerun_flow(flow_run, flow_run_id, param_file_path)
    if len(rerun_flow_status) != 0:
        failed_param_file_list.append(rerun_flow_status)

In [None]:
results_dir = UPath(config.get("storage.results.uri")) / version

downscaling_methods = ['bcsd']
method = 'bcsd'
_prefect_register_str = (
    """prefect register --project "cmip6" -p ../methods/{downscaling_method}/flow.py"""
)
_prefect_run_str = """prefect run -i "{flow_run_id}" --param-file {param_file}"""

In [None]:
def create_run_params_from_json(parameter_fpath: str) -> RunParameters:

    # run_parameters.run_id_hash
    # target2 = results_dir / 'runs' / run_parameters.run_id / 'latest.json'

    df = pd.read_json(parameter_fpath)
    run_parameters = RunParameters(
        method=df.method.iloc[0],
        obs=df.obs.iloc[0],
        model=df.model.iloc[0],
        member=df.member.iloc[0],
        grid_label=df.grid_label.iloc[0],
        table_id=df.table_id.iloc[0],
        scenario=df.scenario.iloc[0],
        variable=df.variable.iloc[0],
        latmin=df.latmin.iloc[0],
        latmax=df.latmax.iloc[0],
        lonmin=df.lonmin.iloc[0],
        lonmax=df.lonmax.iloc[0],
        train_dates=[df.train_period.iloc[0], df.train_period.iloc[1]],
        predict_dates=[df.predict_period.iloc[0], df.predict_period.iloc[1]],
    )

    return run_parameters


def register_flow(method: str) -> str:
    """Register flow with prefect cloud and return flow_run_id for running flows"""

    print('registering flow on prefect cloud')
    flow_id = bcsd_flow.register(project_name='cmip6')
    return flow_id


def check_run_failed(run_id: str) -> bool:
    flow_run = FlowRunView.from_flow_run_id(run_id)
    flow_status_is_failed = FlowRunView.from_flow_run_id(run_id).state.is_failed()

    return flow_status_is_failed


def check_run_status(run_id: str) -> bool:
    flow_run = FlowRunView.from_flow_run_id(run_id)
    flow_state_finished_status = flow_run.state.is_finished()

    return flow_state_finished_status


# failed_runs = []
# while flow_state_finished_status == False:
#         flow_run = FlowRunView.from_flow_run_id(run_id)
#         flow_state_finished_status = flow_run.state.is_finished()
#         time.sleep(60)
#     flow_status_is_failed = FlowRunView.from_flow_run_id(run_id).state.is_failed()
#     if flow_status_is_failed: #if True, run has failed
#         failed_runs.append(run_id)


# def run_flow(param_file: str, flow_id: str) -> list:

#         print(param_file)
#         sys_output = os.popen(
#             _prefect_run_str.format(flow_run_id=flow_id, param_file=param_file)
#         ).read()
#         run_id = sys_output.split('UUID: ')[1].split('\n')[0]
#         run_url = sys_output.split('URL: ')[1].split('\n')[0]
#         print(run_url)

#         return [run_id, run_url]


def check_run_status(run_id_list: list) -> list:
    failed_runs = []
    for run_id in run_id_list:
        flow_state_finished_status = check_run_status(run_id)
        flow_status_is_failed = check_run_failed(run_id)

        if flow_status_is_failed:
            failed_runs.append(run_id)
    return failed_runs


def run_all_param_files(param_file_list: list) -> list:
    run_id_list = []
    flow_id = register_flow(method)
    for param_file in param_file_list:
        flow_return = run_flow(param_file, flow_id)
        run_id_list.append(flow_return[0])
    print(run_id_list)

In [None]:
# first pass
param_file_list = retrieve_test_parms()
run_id_list = run_all_param_files(param_file_list)

In [None]:
run_id_list

In [None]:
# second pass
failed_runs = check_run_status(run_id_list)
second_failed_runs = run_all_param_files(failed_runs)