Skip to content

Commit

Permalink
Merge pull request #356 from brianhlin/stable.dont-hold-removed-jobs
Browse files Browse the repository at this point in the history
Do not put removed jobs on hold
  • Loading branch information
brianhlin committed Jul 1, 2020
2 parents 00558a8 + bf3e851 commit ae284d6
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 4 deletions.
4 changes: 2 additions & 2 deletions config/01-ce-router-defaults.conf.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ GRIDMANAGER_MAX_SUBMITTED_JOBS_PER_RESOURCE = $(CONDORCE_MAX_JOBS)
# Only route jobs with a valid, non-expired proxy for either the vanilla or standard universe.
JOB_ROUTER_SOURCE_JOB_CONSTRAINT = (target.x509userproxysubject =!= UNDEFINED) && (target.x509UserProxyExpiration =!= UNDEFINED) && (time() < target.x509UserProxyExpiration) && (target.JobUniverse =?= 5 || target.JobUniverse =?= 1)

# Put jobs on hold if they meet any of the following requirements
# Put jobs on hold if they are not in the removed state and meet any of the following requirements:
# 1. Is missing a proxy subject or proxy expiration date.
# 2. Has an expired proxy and is not running
# 3. It has not been routed by the CE and is not a standard, vanilla, scheduler, or local job.
# 4. It has not been routed by the CE and has been idle for 30+ min
SYSTEM_PERIODIC_HOLD = (x509userproxysubject =?= UNDEFINED) || (x509UserProxyExpiration =?= UNDEFINED) || (time() > x509UserProxyExpiration && JobStatus =!= 2) || (RoutedBy is null && JobUniverse =!= 1 && JobUniverse =!= 5 && JobUniverse =!= 7 && JobUniverse =!= 12) || ((JobStatus =?= 1 && time() - EnteredCurrentStatus > 1800) && RoutedToJobId is null && RoutedJob =!= true)
SYSTEM_PERIODIC_HOLD = (JobStatus != 3) && ((x509userproxysubject =?= UNDEFINED) || (x509UserProxyExpiration =?= UNDEFINED) || (time() > x509UserProxyExpiration && JobStatus =!= 2) || (RoutedBy is null && JobUniverse =!= 1 && JobUniverse =!= 5 && JobUniverse =!= 7 && JobUniverse =!= 12) || ((JobStatus =?= 1 && time() - EnteredCurrentStatus > 1800) && RoutedToJobId is null && RoutedJob =!= true))

SYSTEM_PERIODIC_HOLD_REASON = \
strcat("HTCondor-CE held job due to ", \
Expand Down
4 changes: 2 additions & 2 deletions config/01-ce-router.conf.in
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ CONDORCE_MAX_JOBS = 10000
# Only route jobs with a valid, non-expired proxy for either the vanilla or standard universe.
JOB_ROUTER_SOURCE_JOB_CONSTRAINT = (target.x509userproxysubject =!= UNDEFINED) && (target.x509UserProxyExpiration =!= UNDEFINED) && (time() < target.x509UserProxyExpiration) && (target.JobUniverse =?= 5 || target.JobUniverse =?= 1)

# Put jobs on hold if they meet any of the following requirements
# Put jobs on hold if they are not in the removed state and meet any of the following requirements:
# 1. Is missing a proxy subject or proxy expiration date.
# 2. Has an expired proxy and is not running
# 3. It has not been routed by the CE and is not a standard, vanilla, scheduler, or local job.
# 4. It has not been routed by the CE and has been idle for 30+ min
SYSTEM_PERIODIC_HOLD = (x509userproxysubject =?= UNDEFINED) || (x509UserProxyExpiration =?= UNDEFINED) || (time() > x509UserProxyExpiration && JobStatus =!= 2) || (RoutedBy is null && JobUniverse =!= 1 && JobUniverse =!= 5 && JobUniverse =!= 7 && JobUniverse =!= 12) || ((JobStatus =?= 1 && time() - EnteredCurrentStatus > 1800) && RoutedToJobId is null && RoutedJob =!= true)
SYSTEM_PERIODIC_HOLD = (JobStatus != 3) && ((x509userproxysubject =?= UNDEFINED) || (x509UserProxyExpiration =?= UNDEFINED) || (time() > x509UserProxyExpiration && JobStatus =!= 2) || (RoutedBy is null && JobUniverse =!= 1 && JobUniverse =!= 5 && JobUniverse =!= 7 && JobUniverse =!= 12) || ((JobStatus =?= 1 && time() - EnteredCurrentStatus > 1800) && RoutedToJobId is null && RoutedJob =!= true))

SYSTEM_PERIODIC_HOLD_REASON = \
strcat("HTCondor-CE held job due to ", \
Expand Down
2 changes: 2 additions & 0 deletions rpm/htcondor-ce.spec
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,8 @@ install -m 0755 -d -p $RPM_BUILD_ROOT/%{_sysconfdir}/condor-ce/bosco_override
* Tue Jun 23 2020 Brian Lin <blin@cs.wisc.edu> - 3.4.3-1
- Fix a stacktrace with the BDII provider when `HTCONDORCE_SPEC` isn't
defined in the local HTCondor configuration
- Fixed a race condition that could result in removed jobs being put
on hold

* Mon Jun 15 2020 Brian Lin <blin@cs.wisc.edu> - 3.4.2-1
- Replace APEL uploader SchedD cron with init and systemd services
Expand Down

0 comments on commit ae284d6

Please sign in to comment.