In [None]:
import importlib
import sys
import getpass
import uuid
from jobmon.client.tool import Tool  # type: ignore
from pathlib import Path

import idd_climate_models.constants as rfc
from idd_climate_models.validate_model_functions import *

repo_name = rfc.repo_name
package_name = rfc.package_name
DATA_DIR = rfc.DATA_PATH
SCRIPT_ROOT = rfc.REPO_ROOT / repo_name / "src" / package_name / "clean_and_split"

results = validate_all_models(DATA_DIR, data_source='cmip6', verbose=False)
complete_models = get_complete_models(results)
print(f"\nComplete models: {len(complete_models)}")
print(complete_models)

# Jobmon setup
user = getpass.getuser()

log_dir = Path("/mnt/team/idd/pub/")
log_dir.mkdir(parents=True, exist_ok=True)
# Create directories for stdout and stderr
stdout_dir = log_dir / "stdout"
stderr_dir = log_dir / "stderr"
stdout_dir.mkdir(parents=True, exist_ok=True)
stderr_dir.mkdir(parents=True, exist_ok=True)

# Project
project = "proj_rapidresponse"  # Adjust this to your project name if needed
queue = 'all.q'

wf_uuid = uuid.uuid4()
tool_name = f"{package_name}_fill_and_split_tool"
tool = Tool(name=tool_name)

# Create a workflow
workflow = tool.create_workflow(
    name=f"{tool_name}_workflow_{wf_uuid}",
    max_concurrently_running=10000,  # Adjust based on system capacity
)

# Compute resources
workflow.set_default_compute_resources_from_dict(
    cluster_name="slurm",
    dictionary={
        "memory": "15G",
        "cores": 1,
        "runtime": "5m",
        "queue": queue,
        "project": project,
        "stdout": str(stdout_dir),
        "stderr": str(stderr_dir),
    }
)

# Define the task template for processing each year batch
task_template = tool.get_task_template(
    template_name="malaria_as_calculation",
    default_cluster_name="slurm",
    default_compute_resources={
        "memory": "30G",
        "cores": 4,
        "runtime": "5m",
        "queue": queue,
        "project": project,
        "stdout": str(stdout_dir),
        "stderr": str(stderr_dir),
    },
    command_template=(
        "python {script_root}/fill_and_yearly_split.py "
        "--model {{model}} "
        "--variant {{variant}} "
        "--scenario {{scenario}} "
        "--variable {{variable}} "
        "--grid {{grid}} "
        "--time_period {{time_period}} "
        "--file_path {{file_path}} "
    ).format(script_root=SCRIPT_ROOT),
    node_args=["model", "variant", "scenario", "variable", "grid", "time_period", "file_path"],
    task_args=[],
    op_args=[],
)

# Add tasks
tasks = []
for model in complete_models:
    variants = results[model]['variants']
    for variant in variants:
        scenarios = variants[variant]['scenarios']
        for scenario in scenarios:
            variables = scenarios[scenario]['variables']
            for variable in variables:
                grids = variables[variable]['grids']
                for grid in grids:
                    time_periods = grids[grid]['time_periods']
                    for time_period in time_periods:
                        files = time_periods[time_period]['files']
                        for file_path in files:
                            task = task_template.create_task(
                                model=model,
                                variant=variant,
                                scenario=scenario,
                                variable=variable,
                                grid=grid,
                                time_period=time_period,
                                file_path=file_path,
                            )
                            tasks.append(task)

print(f"Number of tasks: {len(tasks)}")

if tasks:
    workflow.add_tasks(tasks)
    print("✅ Tasks successfully added to workflow.")
else:
    print("⚠️ No tasks added to workflow. Check task generation.")

try:
    workflow.bind()
    print("✅ Workflow successfully bound.")
    print(f"Running workflow with ID {workflow.workflow_id}.")
    print("For full information see the Jobmon GUI:")
    print(f"https://jobmon-gui.ihme.washington.edu/#/workflow/{workflow.workflow_id}")
except Exception as e:
    print(f"❌ Workflow binding failed: {e}")

try:
    status = workflow.run()
    print(f"Workflow {workflow.workflow_id} completed with status {status}.")
except Exception as e:
    print(f"❌ Workflow submission failed: {e}")
