Skip to content

Commit

Permalink
YARN-3231. FairScheduler: Changing queueMaxRunningApps interferes wit…
Browse files Browse the repository at this point in the history
…h pending jobs. (Siqi Li via kasha)
  • Loading branch information
kambatla committed Mar 5, 2015
1 parent 430b537 commit 22426a1
Show file tree
Hide file tree
Showing 4 changed files with 348 additions and 6 deletions.
3 changes: 3 additions & 0 deletions hadoop-yarn-project/CHANGES.txt
Expand Up @@ -697,6 +697,9 @@ Release 2.7.0 - UNRELEASED

YARN-3131. YarnClientImpl should check FAILED and KILLED state in
submitApplication (Chang Li via jlowe)

YARN-3231. FairScheduler: Changing queueMaxRunningApps interferes with pending
jobs. (Siqi Li via kasha)

Release 2.6.0 - 2014-11-18

Expand Down
Expand Up @@ -1477,6 +1477,7 @@ public void onReload(AllocationConfiguration queueInfo) {
allocConf = queueInfo;
allocConf.getDefaultSchedulingPolicy().initialize(clusterResource);
queueMgr.updateAllocationConfiguration(allocConf);
maxRunningEnforcer.updateRunnabilityOnReload();
}
}
}
Expand Down
Expand Up @@ -104,6 +104,26 @@ public void trackNonRunnableApp(FSAppAttempt app) {
usersNonRunnableApps.put(user, app);
}

/**
* This is called after reloading the allocation configuration when the
* scheduler is reinitilized
*
* Checks to see whether any non-runnable applications become runnable
* now that the max running apps of given queue has been changed
*
* Runs in O(n) where n is the number of apps that are non-runnable and in
* the queues that went from having no slack to having slack.
*/
public void updateRunnabilityOnReload() {
FSParentQueue rootQueue = scheduler.getQueueManager().getRootQueue();
List<List<FSAppAttempt>> appsNowMaybeRunnable =
new ArrayList<List<FSAppAttempt>>();

gatherPossiblyRunnableAppLists(rootQueue, appsNowMaybeRunnable);

updateAppsRunnability(appsNowMaybeRunnable, Integer.MAX_VALUE);
}

/**
* Checks to see whether any other applications runnable now that the given
* application has been removed from the given queue. And makes them so.
Expand Down Expand Up @@ -156,6 +176,19 @@ public void updateRunnabilityOnAppRemoval(FSAppAttempt app, FSLeafQueue queue) {
}
}

updateAppsRunnability(appsNowMaybeRunnable,
appsNowMaybeRunnable.size());
}

/**
* Checks to see whether applications are runnable now by iterating
* through each one of them and check if the queue and user have slack
*
* if we know how many apps can be runnable, there is no need to iterate
* through all apps, maxRunnableApps is used to break out of the iteration
*/
private void updateAppsRunnability(List<List<FSAppAttempt>>
appsNowMaybeRunnable, int maxRunnableApps) {
// Scan through and check whether this means that any apps are now runnable
Iterator<FSAppAttempt> iter = new MultiListStartTimeIterator(
appsNowMaybeRunnable);
Expand All @@ -173,9 +206,7 @@ public void updateRunnabilityOnAppRemoval(FSAppAttempt app, FSLeafQueue queue) {
next.getQueue().addApp(appSched, true);
noLongerPendingApps.add(appSched);

// No more than one app per list will be able to be made runnable, so
// we can stop looking after we've found that many
if (noLongerPendingApps.size() >= appsNowMaybeRunnable.size()) {
if (noLongerPendingApps.size() >= maxRunnableApps) {
break;
}
}
Expand All @@ -194,11 +225,10 @@ public void updateRunnabilityOnAppRemoval(FSAppAttempt app, FSLeafQueue queue) {

if (!usersNonRunnableApps.remove(appSched.getUser(), appSched)) {
LOG.error("Waiting app " + appSched + " expected to be in "
+ "usersNonRunnableApps, but was not. This should never happen.");
+ "usersNonRunnableApps, but was not. This should never happen.");
}
}
}

/**
* Updates the relevant tracking variables after a runnable app with the given
* queue and user has been removed.
Expand Down

0 comments on commit 22426a1

Please sign in to comment.