Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,4 @@ repos:
stages: [commit-msg]
verbose: true
entry: bash -c 'commitlint --edit || exit 0'
- repo: local
hooks:
- id: zizmor
name: zizmor
entry: bash -c 'poetry run zizmor .github/workflows/*' --
language: system
pass_filenames: false
# - repo: https://github.com/woodruffw/zizmor-pre-commit
# rev: v1.5.2
# hooks:
# - id: zizmor
# args: [ .github/workflows/* ]

242 changes: 206 additions & 36 deletions tidy3d/web/api/webapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from tidy3d.web.core.http_util import get_version as _get_protocol_version
from tidy3d.web.core.http_util import http
from tidy3d.web.core.task_core import BatchDetail, BatchTask, Folder, SimulationTask
from tidy3d.web.core.task_info import ChargeType, TaskInfo
from tidy3d.web.core.task_info import AsyncJobDetail, ChargeType, TaskInfo
from tidy3d.web.core.types import PayType

from .connect_util import REFRESH_TIME, get_grid_points_str, get_time_steps_str, wait_for_connection
Expand Down Expand Up @@ -95,6 +95,206 @@ def _batch_detail(resource_id: str):
return BatchTask(resource_id).detail(batch_type="RF_SWEEP")


def _batch_detail_error(resource_id: str) -> Optional[WebError]:
"""Processes a failed batch job to generate a detailed error.

This function inspects the status of a batch detail object. If the status
indicates an error, it logs the failure and constructs a specific `WebError`
object to be returned. For validation failures, it parses and aggregates
detailed error messages from each subtask.

Args:
resource_id (str): The identifier of the batch resource that failed.

Returns:
An instance of `WebError` if the batch failed, otherwise `None`.
"""
try:
batch_detail = BatchTask(batch_id=resource_id).detail(batch_type="RF_SWEEP")
status = batch_detail.totalStatus.value
except Exception as e:
log.error(f"Could not retrieve batch details for '{resource_id}': {e}")
return WebError(f"Failed to retrieve status for batch '{resource_id}'.")

if status not in ERROR_STATES:
return None

log.error(f"The ComponentModeler batch '{resource_id}' has failed with status: {status}")

if (
status == "validate_fail"
and hasattr(batch_detail, "validateErrors")
and batch_detail.validateErrors
):
error_details = []
for key, error_str in batch_detail.validateErrors.items():
try:
error_dict = json.loads(error_str)
validation_error = error_dict.get("validation_error", "Unknown validation error.")
msg = f"- Subtask '{key}' failed: {validation_error}"
log.error(msg)
error_details.append(msg)
except (json.JSONDecodeError, TypeError):
# Handle cases where the error string isn't valid JSON
log.error(f"Could not parse validation error for subtask '{key}'.")
error_details.append(f"- Subtask '{key}': Could not parse error details.")

details_string = "\n".join(error_details)
full_error_msg = (
"One or more subtasks failed validation. Please fix the component modeler configuration.\n"
f"Details:\n{details_string}"
)
return WebError(full_error_msg)

# Handle all other generic error states
else:
error_msg = (
f"Batch '{resource_id}' failed with status '{status}'. Check server "
"logs for details or contact customer support."
)
return WebError(error_msg)


def _upload_component_modeler_subtasks(
resource_id: str, verbose: bool = True, solver_version: Optional[str] = None
):
"""Kicks off and monitors the split and validation of component modeler tasks.

This function orchestrates a two-phase process. First, it initiates a
server-side asynchronous job to split the components of a modeler batch.
It monitors this job's progress by polling the API and parsing the
response into an `AsyncJobDetail` model until the job completes or fails.

If the split is successful, the function proceeds to the second phase:
triggering a batch validation via `batch.check()`. It then monitors this
validation process by polling for `BatchDetail` updates. The progress bar,
if verbose, reflects the status according to a predefined state mapping.

Finally, it processes the terminal state of the validation. If a
'validate_fail' status occurs, it parses detailed error messages for each
failed subtask and includes them in the raised exception.

Args:
resource_id (str): The identifier for the batch resource to be processed.
verbose (bool): If True, displays progress bars and logs detailed
status messages to the console during the operation.
solver_version (str): Solver version in which to run validation.

Raises:
RuntimeError: If the initial asynchronous split job fails.
WebError: If the subsequent batch validation fails, ends in an
unexpected state, or if a 'validate_fail' status is encountered.
"""
console = get_logging_console() if verbose else None
final_error = None
batch_type = "RF_SWEEP"

split_path = "tidy3d/async-biz/component-modeler-split"
payload = {
"batchType": batch_type,
"batchId": resource_id,
"fileName": "modeler.hdf5.gz",
"protocolVersion": _get_protocol_version(),
}

if verbose:
console.log("Starting Modeler and Subtasks Validation...")

initial_resp = http.post(split_path, payload)
split_job_detail = AsyncJobDetail(**initial_resp)
monitor_split_path = f"{split_path}?asyncId={split_job_detail.asyncId}"

if verbose:
progress_bar = Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
console=console,
)

with progress_bar as progress:
description = "Upload Subtasks"
pbar = progress.add_task(description, completed=split_job_detail.progress, total=100)
while True:
split_job_raw_result = http.get(monitor_split_path)
split_job_detail = AsyncJobDetail(**split_job_raw_result)

progress.update(
pbar, completed=split_job_detail.progress, description=f"[blue]{description}"
)

if split_job_detail.status in END_STATES:
progress.update(
pbar,
completed=split_job_detail.progress,
description=f"[green]{description}",
)
break
time.sleep(RUN_REFRESH_TIME)

if split_job_detail.status in ERROR_STATES:
msg = split_job_detail.message or "An unknown error occurred."
final_error = WebError(f"Component modeler split job failed: {msg}")

if not final_error:
description = "Validating"
pbar = progress.add_task(
completed=10, total=100, description=f"[blue]{description}"
)
batch = BatchTask(resource_id)
batch.check(solver_version=solver_version, batch_type=batch_type)

while True:
batch_detail = batch.detail(batch_type=batch_type)
status = batch_detail.totalStatus
progress_percent = STATE_PROGRESS_PERCENTAGE.get(status, 0)
progress.update(
pbar, completed=progress_percent, description=f"[blue]{description}"
)

if status in POST_VALIDATE_STATES:
progress.update(pbar, completed=100, description=f"[green]{description}")
task_mapping = json.loads(split_job_detail.result)
console.log(
f"Uploaded Subtasks: \n{_task_dict_to_url_bullet_list(task_mapping)}"
)
progress.refresh()
break
elif status in ERROR_STATES:
progress.update(pbar, completed=0, description=f"[red]{description}")
progress.refresh()
break
time.sleep(RUN_REFRESH_TIME)

else:
# Non-verbose mode: Poll for split job completion.
while True:
split_job_raw_result = http.get(monitor_split_path)
split_job_detail = AsyncJobDetail(**split_job_raw_result)
if split_job_detail.status in END_STATES:
break
time.sleep(RUN_REFRESH_TIME)

# Check for split job failure.
if split_job_detail.status in ERROR_STATES:
msg = split_job_detail.message or "An unknown error occurred."
final_error = WebError(f"Component modeler split job failed: {msg}")

# If split succeeded, poll for validation completion.
if not final_error:
batch = BatchTask(resource_id)
batch.check(solver_version=solver_version, batch_type=batch_type)
while True:
batch_detail = batch.detail(batch_type=batch_type)
status = batch_detail.totalStatus
if status in POST_VALIDATE_STATES or status in END_STATES:
break
time.sleep(RUN_REFRESH_TIME)

return _batch_detail_error(resource_id=resource_id)


def _task_dict_to_url_bullet_list(data_dict: dict) -> str:
"""
Converts a dictionary into a string formatted as a bullet point list.
Expand All @@ -107,6 +307,8 @@ def _task_dict_to_url_bullet_list(data_dict: dict) -> str:
"""
# Use a list comprehension to format each key-value pair
# and then join them together with newline characters.
if data_dict is None:
raise WebError("Error in subtask dictionary data.")
return "\n".join([f"- {key}: '{value}'" for key, value in data_dict.items()])


Expand Down Expand Up @@ -391,26 +593,7 @@ def upload(
)

if task_type == "RF":
split_path = "tidy3d/projects/component-modeler-split"
payload = {
"batchType": "RF_SWEEP",
"batchId": resource_id,
"fileName": "modeler.hdf5.gz",
"protocolVersion": _get_protocol_version(),
}
resp = http.post(split_path, payload)
if verbose:
console = get_logging_console()
console.log(
f"Child simulation subtasks are being uploaded to \n{_task_dict_to_url_bullet_list(resp)}"
)
# split (modeler-specific)
batch = BatchTask(resource_id)
# Kick off server-side validation for the RF batch.
batch.check(solver_version=solver_version, batch_type="RF_SWEEP")
if verbose:
# Validation phase
console.log("Validating component modeler and subtask simulations...")
_upload_component_modeler_subtasks(resource_id=resource_id, verbose=verbose)

estimate_cost(task_id=resource_id, solver_version=solver_version, verbose=verbose)

Expand Down Expand Up @@ -555,7 +738,7 @@ def start(
solver_version=solver_version, batch_type="RF_SWEEP", worker_group=worker_group
)
if verbose:
console.log(f"Component Modeler '{task_id}' validation succeeded. Starting to solve...")
console.log(f"Component Modeler '{task_id}' validated. Solving...")
return

if priority is not None and (priority < 1 or priority > 10):
Expand Down Expand Up @@ -1407,20 +1590,7 @@ def estimate_cost(
return est_flex_unit

elif status in ERROR_STATES:
log.error(f"The ComponentModeler '{task_id}' has failed: {status}")

if status == "validate_fail":
assert d.validateErrors is not None
for key, error in d.validateErrors.items():
# I don't like this ideally but would like to control the endpoint to make this better
error_dict = json.loads(error)
validation_error = error_dict["validation_error"]
log.error(
f"Subtask '{key}' has failed to validate:"
f" \n {validation_error} \n "
f"Fix your component modeler configuration. "
f"Generate subtask simulations locally using `ComponentModelerType.sim_dict`."
)
return _batch_detail_error(resource_id=task_id)

raise WebError("Could not get estimated cost!")

Expand Down
36 changes: 36 additions & 0 deletions tidy3d/web/core/task_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,3 +317,39 @@ class BatchDetail(TaskBase):
message: str = None
tasks: list[BatchMember] = []
validateErrors: dict = None


class AsyncJobDetail(TaskBase):
"""
Provides a detailed view of an asynchronous job and its sub-tasks.

This model represents a long-running operation. The 'result' attribute holds
the output of a completed job, which for orchestration jobs, is often a
JSON string mapping sub-task names to their unique IDs.

Attributes:
asyncId: The unique identifier for the asynchronous job.
status: The current overall status of the job (e.g., 'RUNNING', 'COMPLETED').
progress: The completion percentage of the job (from 0.0 to 100.0).
createdAt: The timestamp when the job was created.
completedAt: The timestamp when the job finished (successfully or not).
tasks: A dictionary mapping logical task keys to their unique task IDs.
This is often populated by parsing the 'result' of an orchestration task.
result: The raw string output of the completed job. If the job spawns other
tasks, this is expected to be a JSON string detailing those tasks.
taskBlockInfo: Information on any dependencies blocking the job from running.
message: A human-readable message about the job's status.
"""

asyncId: str
status: str
progress: Optional[float] = None
createdAt: Optional[datetime] = None
completedAt: Optional[datetime] = None
tasks: Optional[dict[str, str]] = None
result: Optional[str] = None
taskBlockInfo: Optional[TaskBlockInfo] = None
message: Optional[str] = None


AsyncJobDetail.update_forward_refs()