Skip to content

Commit

Permalink
fix:show the error message when job is deleted in bohrium (#304)
Browse files Browse the repository at this point in the history
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
xiaoyeqiannian and pre-commit-ci[bot] committed Jan 29, 2023
1 parent 99e2611 commit e955685
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 19 deletions.
28 changes: 15 additions & 13 deletions dpdispatcher/dp_cloud_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,19 @@ def do_submit(self, job):
job.job_state = JobStatus.waiting
return job_id

def _get_job_detail(self, job_id, group_id):
check_return = self.api.get_job_detail(job_id)
assert check_return is not None, (
f"Failed to retrieve tasks information. To resubmit this job, please "
f"try again, if this problem still exists please delete the submission "
f"file and try again.\nYou can check submission.submission_hash in the "
f'previous log or type `grep -rl "{job_id}:job_group_id:{group_id}" '
f"~/.dpdispatcher/dp_cloud_server/` to find corresponding file. "
f"You can try with command:\n "
f'rm $(grep -rl "{job_id}:job_group_id:{group_id}" ~/.dpdispatcher/dp_cloud_server/)'
)
return check_return

def check_status(self, job):
if job.job_id == "":
return JobStatus.unsubmitted
Expand All @@ -138,26 +151,15 @@ def check_status(self, job):
dlog.debug(
f"debug: check_status; job.job_id:{job_id}; job.job_hash:{job.job_hash}"
)
check_return = None
# print("api",self.api_version,self.input_data.get('job_group_id'),job.job_id)
check_return = self.api.get_tasks(job_id, group_id)
assert check_return is not None, (
f"Failed to retrieve tasks information. To resubmit this job, please "
f"try again, if this problem still exists please delete the submission "
f"file and try again.\nYou can check submission.submission_hash in the "
f'previous log or type `grep -rl "{job_id}:job_group_id:{group_id}" '
f"~/.dpdispatcher/dp_cloud_server/` to find corresponding file. "
f"You can try with command:\n "
f'rm $(grep -rl "{job_id}:job_group_id:{group_id}" ~/.dpdispatcher/dp_cloud_server/)'
)
check_return = self._get_job_detail(job_id, group_id)
try:
dp_job_status = check_return["status"]
except IndexError as e:
dlog.error(
f"cannot find job information in bohrium for job {job.job_id}. check_return:{check_return}; retry one more time after 60 seconds"
)
time.sleep(60)
retry_return = self.api.get_tasks(job_id, group_id)
retry_return = self._get_job_detail(job_id, group_id)
try:
dp_job_status = retry_return["status"]
except IndexError as e:
Expand Down
16 changes: 12 additions & 4 deletions dpdispatcher/dpcloudserver/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 +242,18 @@ def _is_none(_in):
s = s[0].lower() + s[1:]
return regex.sub(lambda m: m.group(0)[-1].upper(), s)

def get_tasks(self, job_id, group_id, page=1, per_page=10):
ret = self.get(
f"brm/v1/job/{job_id}",
)
def get_job_detail(self, job_id):
try:
ret = self.get(
f"brm/v1/job/{job_id}",
)
except RequestInfoException as e:
if e.args[0] != 200:
raise e

dlog.error(f"get job detail error {e}", stack_info=ENABLE_STACK)
return None

return ret

def get_log(self, job_id):
Expand Down
4 changes: 2 additions & 2 deletions dpdispatcher/dpcloudserver/temp_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,14 @@ def test_commit_job(self):
job_id = api.job_create(
self.test_data["job_type"], self.test_data["job_resources"], self.test_data
)
tasks = api.get_tasks(job_id)
tasks = api.get_job_detail(job_id)
print(tasks)

def test_get_tasks(self):
print("----------", sys._getframe().f_code.co_name)
jobs = api.get_jobs()
for j in jobs:
tasks = api.get_tasks(j["id"])
tasks = api.get_job_detail(j["id"])
print(tasks)

# def test_download(self):
Expand Down

0 comments on commit e955685

Please sign in to comment.