Skip to content

Commit

Permalink
YARN-10287.Update scheduler-conf corrupts the CS configuration when r…
Browse files Browse the repository at this point in the history
…emoving queue which is referred in queue mapping
  • Loading branch information
Ashutosh Gupta committed Jun 29, 2022
1 parent 2d133a5 commit 82ac2ec
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 12 deletions.
Expand Up @@ -455,6 +455,7 @@ public void reinitialize(Configuration newConf, RMContext rmContext,
reinitializeQueues(this.conf);
} catch (Throwable t) {
this.conf = oldConf;
reinitializeQueues(this.conf);
refreshMaximumAllocation(
ResourceUtils.fetchMaximumAllocationFromConfig(this.conf));
throw new IOException("Failed to re-init queues : " + t.getMessage(),
Expand Down
Expand Up @@ -44,6 +44,8 @@
import org.apache.hadoop.yarn.webapp.JerseyTestBase;
import org.apache.hadoop.yarn.webapp.dao.QueueConfigInfo;
import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo;
import org.apache.hadoop.yarn.webapp.util.YarnWebServiceUtils;

import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
Expand All @@ -69,6 +71,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration.ORDERING_POLICY;
import static org.junit.Assert.assertTrue;

Expand Down Expand Up @@ -145,7 +148,7 @@ public void setUp() throws Exception {
private static void setupQueueConfiguration(
CapacitySchedulerConfiguration config) {
config.setQueues(CapacitySchedulerConfiguration.ROOT,
new String[]{"a", "b", "c"});
new String[]{"a", "b", "c", "mappedqueue"});

final String a = CapacitySchedulerConfiguration.ROOT + ".a";
config.setCapacity(a, 25f);
Expand All @@ -166,6 +169,11 @@ private static void setupQueueConfiguration(
final String c1 = c + ".c1";
config.setQueues(c, new String[] {"c1"});
config.setCapacity(c1, 0f);

final String d = CapacitySchedulerConfiguration.ROOT + ".d";
config.setCapacity(d, 0f);
config.set(CapacitySchedulerConfiguration.QUEUE_MAPPING,
"g:hadoop:mappedqueue");
}

public TestRMWebServicesConfigurationMutation() {
Expand Down Expand Up @@ -201,14 +209,14 @@ private CapacitySchedulerConfiguration getSchedulerConf()
public void testGetSchedulerConf() throws Exception {
CapacitySchedulerConfiguration orgConf = getSchedulerConf();
assertNotNull(orgConf);
assertEquals(3, orgConf.getQueues("root").length);
assertEquals(4, orgConf.getQueues("root").length);
}

@Test
public void testFormatSchedulerConf() throws Exception {
CapacitySchedulerConfiguration newConf = getSchedulerConf();
assertNotNull(newConf);
assertEquals(3, newConf.getQueues("root").length);
assertEquals(4, newConf.getQueues("root").length);

SchedConfUpdateInfo updateInfo = new SchedConfUpdateInfo();
Map<String, String> nearEmptyCapacity = new HashMap<>();
Expand All @@ -234,7 +242,7 @@ public void testFormatSchedulerConf() throws Exception {
.put(ClientResponse.class);
newConf = getSchedulerConf();
assertNotNull(newConf);
assertEquals(4, newConf.getQueues("root").length);
assertEquals(5, newConf.getQueues("root").length);

// Format the scheduler config and validate root.formattest is not present
response = r.path("ws").path("v1").path("cluster")
Expand All @@ -243,7 +251,7 @@ public void testFormatSchedulerConf() throws Exception {
.accept(MediaType.APPLICATION_JSON).get(ClientResponse.class);
assertEquals(Status.OK.getStatusCode(), response.getStatus());
newConf = getSchedulerConf();
assertEquals(3, newConf.getQueues("root").length);
assertEquals(4, newConf.getQueues("root").length);
}

private long getConfigVersion() throws Exception {
Expand All @@ -269,7 +277,7 @@ public void testSchedulerConfigVersion() throws Exception {
public void testAddNestedQueue() throws Exception {
CapacitySchedulerConfiguration orgConf = getSchedulerConf();
assertNotNull(orgConf);
assertEquals(3, orgConf.getQueues("root").length);
assertEquals(4, orgConf.getQueues("root").length);

WebResource r = resource();

Expand Down Expand Up @@ -304,7 +312,7 @@ public void testAddNestedQueue() throws Exception {
assertEquals(Status.OK.getStatusCode(), response.getStatus());
CapacitySchedulerConfiguration newCSConf =
((CapacityScheduler) rm.getResourceScheduler()).getConfiguration();
assertEquals(4, newCSConf.getQueues("root").length);
assertEquals(5, newCSConf.getQueues("root").length);
assertEquals(2, newCSConf.getQueues("root.d").length);
assertEquals(25.0f, newCSConf.getNonLabeledQueueCapacity(new QueuePath("root.d.d1")),
0.01f);
Expand All @@ -313,7 +321,7 @@ public void testAddNestedQueue() throws Exception {

CapacitySchedulerConfiguration newConf = getSchedulerConf();
assertNotNull(newConf);
assertEquals(4, newConf.getQueues("root").length);
assertEquals(5, newConf.getQueues("root").length);
}

@Test
Expand Down Expand Up @@ -343,7 +351,7 @@ public void testAddWithUpdate() throws Exception {
assertEquals(Status.OK.getStatusCode(), response.getStatus());
CapacitySchedulerConfiguration newCSConf =
((CapacityScheduler) rm.getResourceScheduler()).getConfiguration();
assertEquals(4, newCSConf.getQueues("root").length);
assertEquals(5, newCSConf.getQueues("root").length);
assertEquals(25.0f, newCSConf.getNonLabeledQueueCapacity(new QueuePath("root.d")), 0.01f);
assertEquals(50.0f, newCSConf.getNonLabeledQueueCapacity(new QueuePath("root.b")), 0.01f);
}
Expand Down Expand Up @@ -504,6 +512,46 @@ public void testStopWithRemoveQueue() throws Exception {
assertEquals("a1", newCSConf.getQueues("root.a")[0]);
}

@Test
public void testRemoveQueueWhichHasQueueMapping() throws Exception {
WebResource r = resource();

ClientResponse response;
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();

// Validate Queue 'mappedqueue' exists before deletion
assertNotNull("Failed to setup CapacityScheduler Configuration",
cs.getQueue("mappedqueue"));

// Set state of queue 'mappedqueue' to STOPPED.
SchedConfUpdateInfo updateInfo = new SchedConfUpdateInfo();
Map<String, String> stoppedParam = new HashMap<>();
stoppedParam.put(CapacitySchedulerConfiguration.STATE, QueueState.STOPPED.toString());
QueueConfigInfo stoppedInfo = new QueueConfigInfo("root.mappedqueue", stoppedParam);
updateInfo.getUpdateQueueInfo().add(stoppedInfo);

// Remove queue 'mappedqueue' using update scheduler-conf
updateInfo.getRemoveQueueInfo().add("root.mappedqueue");
response = r.path("ws").path("v1").path("cluster").path("scheduler-conf")
.queryParam("user.name", userName).accept(MediaType.APPLICATION_JSON)
.entity(YarnWebServiceUtils.toJson(updateInfo, SchedConfUpdateInfo.class),
MediaType.APPLICATION_JSON).put(ClientResponse.class);
String responseText = response.getEntity(String.class);

// Queue 'mappedqueue' deletion will fail as there is queue mapping present
assertEquals(Status.BAD_REQUEST.getStatusCode(), response.getStatus());
assertTrue(responseText.contains(
"Failed to re-init queues : " + "org.apache.hadoop.yarn.exceptions.YarnException:"
+ " Path root 'mappedqueue' does not exist. Path 'mappedqueue' is invalid"));

// Validate queue 'mappedqueue' exists after above failure
CapacitySchedulerConfiguration newCSConf =
((CapacityScheduler) rm.getResourceScheduler()).getConfiguration();
assertEquals(4, newCSConf.getQueues("root").length);
assertNotNull("CapacityScheduler Configuration is corrupt",
cs.getQueue("mappedqueue"));
}

@Test
public void testStopWithConvertLeafToParentQueue() throws Exception {
WebResource r = resource();
Expand Down Expand Up @@ -558,7 +606,7 @@ public void testRemoveParentQueue() throws Exception {
assertEquals(Status.OK.getStatusCode(), response.getStatus());
CapacitySchedulerConfiguration newCSConf =
((CapacityScheduler) rm.getResourceScheduler()).getConfiguration();
assertEquals(2, newCSConf.getQueues("root").length);
assertEquals(3, newCSConf.getQueues("root").length);
assertNull(newCSConf.getQueues("root.c"));
}

Expand Down Expand Up @@ -589,7 +637,7 @@ public void testRemoveParentQueueWithCapacity() throws Exception {
assertEquals(Status.OK.getStatusCode(), response.getStatus());
CapacitySchedulerConfiguration newCSConf =
((CapacityScheduler) rm.getResourceScheduler()).getConfiguration();
assertEquals(2, newCSConf.getQueues("root").length);
assertEquals(3, newCSConf.getQueues("root").length);
assertEquals(100.0f, newCSConf.getNonLabeledQueueCapacity(new QueuePath("root.b")),
0.01f);
}
Expand Down Expand Up @@ -621,7 +669,7 @@ public void testRemoveMultipleQueues() throws Exception {
assertEquals(Status.OK.getStatusCode(), response.getStatus());
CapacitySchedulerConfiguration newCSConf =
((CapacityScheduler) rm.getResourceScheduler()).getConfiguration();
assertEquals(1, newCSConf.getQueues("root").length);
assertEquals(2, newCSConf.getQueues("root").length);
}

private void stopQueue(String... queuePaths) throws Exception {
Expand Down

0 comments on commit 82ac2ec

Please sign in to comment.