In [5]:
import importlib
import sys
import getpass
import uuid
import os
import copy
from jobmon.client.tool import Tool 
from pathlib import Path

# NOTE: Imports required for the execution logic
import idd_climate_models.constants as rfc
from idd_climate_models.dictionary_utils import parse_results
from idd_climate_models.io_compare_utils import compare_model_validation, filter_tc_risk_models
from idd_climate_models.validation_functions import create_validation_dict, validate_all_models_in_source 
# ----------------------------------------------------------------------------

# --- CONSTANT DEFINITIONS ---
repo_name = rfc.repo_name
package_name = rfc.package_name
TC_RISK_INPUT_PATH = rfc.TC_RISK_INPUT_PATH
TC_RISK_OUTPUT_PATH = rfc.TC_RISK_OUTPUT_PATH
SCRIPT_ROOT = rfc.REPO_ROOT / repo_name / "src" / package_name / "01_run_tc_risk"

# Configuration
DATA_SOURCE = "cmip6"
NUM_DRAWS = rfc.NUM_DRAWS

INPUT_DATA_TYPE = "tc_risk"
INPUT_IO_TYPE = "input"
OUTPUT_DATA_TYPE = "tc_risk"
OUTPUT_IO_TYPE = "output"

# ============================================================================
# STAGE 1: DATA SETUP & VALIDATION (Identify Complete Inputs)
# ============================================================================

validation_info = compare_model_validation(
    input_data_type=INPUT_DATA_TYPE,
    input_io_type=INPUT_IO_TYPE,
    output_data_type=OUTPUT_DATA_TYPE,
    output_io_type=OUTPUT_IO_TYPE,
    data_source=DATA_SOURCE,
    verbose=False
)




Validation complete for: tc_risk, input, cmip6
Summary: 22/22 models complete. Parsed log (up to 'scenario') written to /mnt/team/rapidresponse/pub/tropical-storms/tc_risk/input/cmip6/validation_log.json

Validation complete for: tc_risk, output, cmip6
Summary: 6/22 models complete. Parsed log (up to 'basin') written to /mnt/team/rapidresponse/pub/tropical-storms/tc_risk/output/cmip6/validation_log.json


In [6]:
# Assuming you run this where comparison_data is the return value of compare_model_validation:
# comparison_data = compare_model_validation(
#     input_data_type='data', input_io_type='raw',
#     output_data_type='tc_risk', output_io_type='output',
#     data_source='cmip6', verbose=True
# )

# You need to run filter_tc_risk_models on the final output dictionary:
output_validation_dict = validation_info["output_validation_dict"]
filtered_results = filter_tc_risk_models(output_validation_dict)

# 1. Grab the models that are complete
complete_models = filtered_results["complete_models"]
print(f"✅ Models Complete: {complete_models}")

# 2. Grab the models that are missing all basins other than GL
only_gl_models = filtered_results["missing_all_other_basins"]
print(f"⚠️ Models with Only GL Basin: {only_gl_models}")

# 3. Grab the models that have some basins but don't have enough files
# This returns a dictionary of {model_name: [issue_details, ...]}
insufficient_file_models = filtered_results["incomplete_file_count"]
print(f"❌ Models with Insufficient Files (Criterion #3 Failures): {set(insufficient_file_models.keys())}")

✅ Models Complete: {'MPI-ESM1-2-HR', 'EC-Earth3-Veg-LR', 'EC-Earth3-Veg', 'EC-Earth3', 'MRI-ESM2-0', 'MIROC6'}
⚠️ Models with Only GL Basin: {'ACCESS-ESM1-5', 'BCC-CSM2-MR', 'IPSL-CM6A-LR', 'CanESM5', 'IITM-ESM', 'MPI-ESM1-2-LR', 'CMCC-CM2-SR5', 'CMCC-ESM2', 'ACCESS-CM2', 'INM-CM4-8', 'TaiESM1', 'NorESM2-MM', 'NESM3', 'NorESM2-LM', 'FGOALS-g3', 'INM-CM5-0'}
❌ Models with Insufficient Files (Criterion #3 Failures): set()
