[ML] Update the number of allocations per nlp process (#86277)

Adds a method to DeploymentManager to update the number of allocations per process as implemented in elastic/ml-cpp#2258.
elastic · May 5, 2022 · 6318be5 · 6318be5
1 parent 8d0f0e8
commit 6318be5
Show file tree

Hide file tree

Showing 28 changed files with 998 additions and 433 deletions.
diff --git a/...i-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlMemoryIT.java b/...i-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlMemoryIT.java
@@ -57,7 +57,6 @@ public void cleanUpAfterTest() {
         cleanUp();
     }
 
-    @AwaitsFix(bugUrl = "https://github.com/elastic/ml-cpp/pull/2258")
     public void testMemoryStats() throws Exception {
 
         deployTrainedModel();

diff --git a/...de-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java b/...de-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java
@@ -8,7 +8,6 @@
 package org.elasticsearch.xpack.ml.integration;
 
 import org.apache.http.util.EntityUtils;
-import org.apache.lucene.tests.util.LuceneTestCase;
 import org.elasticsearch.client.Request;
 import org.elasticsearch.client.Response;
 import org.elasticsearch.client.ResponseException;
@@ -75,8 +74,6 @@
  * torch.jit.save(traced_model, "simplemodel.pt")
  * ## End Python
  */
-
-@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/ml-cpp/pull/2258")
 public class PyTorchModelIT extends ESRestTestCase {
 
     private static final String BASIC_AUTH_VALUE_SUPER_USER = UsernamePasswordToken.basicAuthHeaderValue(
@@ -254,7 +251,7 @@ public void testDeploymentStats() throws IOException {
                 stats.get(0)
             );
             assertThat(responseMap.toString(), requiredNativeMemory, is(not(nullValue())));
-            assertThat(requiredNativeMemory, equalTo((int) (ByteSizeValue.ofMb(270).getBytes() + 2 * RAW_MODEL_SIZE)));
+            assertThat(requiredNativeMemory, equalTo((int) (ByteSizeValue.ofMb(240).getBytes() + 2 * RAW_MODEL_SIZE)));
 
             Response humanResponse = client().performRequest(new Request("GET", "/_ml/trained_models/" + modelId + "/_stats?human"));
             var humanResponseMap = entityAsMap(humanResponse);
@@ -276,7 +273,7 @@ public void testDeploymentStats() throws IOException {
                 stringRequiredNativeMemory,
                 is(not(nullValue()))
             );
-            assertThat(stringRequiredNativeMemory, equalTo("270mb"));
+            assertThat(stringRequiredNativeMemory, equalTo("240mb"));
             stopDeployment(modelId);
         };
 

diff --git a/...ests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TestFeatureResetIT.java b/...ests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TestFeatureResetIT.java
@@ -165,7 +165,6 @@ public void testMLFeatureResetFailureDueToPipelines() throws Exception {
         assertThat(isResetMode(), is(false));
     }
 
-    @AwaitsFix(bugUrl = "https://github.com/elastic/ml-cpp/pull/2258")
     public void testMLFeatureResetWithModelDeployment() throws Exception {
         createModelDeployment();
         client().execute(ResetFeatureStateAction.INSTANCE, new ResetFeatureStateRequest()).actionGet();

diff --git a/.../src/main/java/org/elasticsearch/xpack/ml/inference/deployment/AbstractPyTorchAction.java b/.../src/main/java/org/elasticsearch/xpack/ml/inference/deployment/AbstractPyTorchAction.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.ml.inference.deployment;
+
+import org.apache.logging.log4j.Logger;
+import org.apache.logging.log4j.message.ParameterizedMessage;
+import org.elasticsearch.ElasticsearchStatusException;
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.common.util.concurrent.AbstractRunnable;
+import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.rest.RestStatus;
+import org.elasticsearch.threadpool.Scheduler;
+import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
+import org.elasticsearch.xpack.ml.MachineLearning;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+abstract class AbstractPyTorchAction<T> extends AbstractRunnable {
+
+    private final String modelId;
+    private final long requestId;
+    private final TimeValue timeout;
+    private final Scheduler.Cancellable timeoutHandler;
+    private final DeploymentManager.ProcessContext processContext;
+    private final AtomicBoolean notified = new AtomicBoolean();
+
+    private final ActionListener<T> listener;
+
+    protected AbstractPyTorchAction(
+        String modelId,
+        long requestId,
+        TimeValue timeout,
+        DeploymentManager.ProcessContext processContext,
+        ThreadPool threadPool,
+        ActionListener<T> listener
+    ) {
+        this.modelId = modelId;
+        this.requestId = requestId;
+        this.timeout = timeout;
+        this.timeoutHandler = threadPool.schedule(
+            this::onTimeout,
+            ExceptionsHelper.requireNonNull(timeout, "timeout"),
+            MachineLearning.UTILITY_THREAD_POOL_NAME
+        );
+        this.processContext = processContext;
+        this.listener = listener;
+    }
+
+    void onTimeout() {
+        if (notified.compareAndSet(false, true)) {
+            processContext.getTimeoutCount().incrementAndGet();
+            processContext.getResultProcessor().ignoreResponseWithoutNotifying(String.valueOf(requestId));
+            listener.onFailure(
+                new ElasticsearchStatusException("timeout [{}] waiting for inference result", RestStatus.REQUEST_TIMEOUT, timeout)
+            );
+            return;
+        }
+        getLogger().debug("[{}] request [{}] received timeout after [{}] but listener already alerted", modelId, requestId, timeout);
+    }
+
+    void onSuccess(T result) {
+        timeoutHandler.cancel();
+        if (notified.compareAndSet(false, true)) {
+            listener.onResponse(result);
+            return;
+        }
+        getLogger().debug("[{}] request [{}] received inference response but listener already notified", modelId, requestId);
+    }
+
+    @Override
+    public void onFailure(Exception e) {
+        timeoutHandler.cancel();
+        if (notified.compareAndSet(false, true)) {
+            processContext.getResultProcessor().ignoreResponseWithoutNotifying(String.valueOf(requestId));
+            listener.onFailure(e);
+            return;
+        }
+        getLogger().debug(
+            () -> new ParameterizedMessage("[{}] request [{}] received failure but listener already notified", modelId, requestId),
+            e
+        );
+    }
+
+    protected void onFailure(String errorMessage) {
+        onFailure(new ElasticsearchStatusException("Error in inference process: [" + errorMessage + "]", RestStatus.INTERNAL_SERVER_ERROR));
+    }
+
+    boolean isNotified() {
+        return notified.get();
+    }
+
+    long getRequestId() {
+        return requestId;
+    }
+
+    String getModelId() {
+        return modelId;
+    }
+
+    DeploymentManager.ProcessContext getProcessContext() {
+        return processContext;
+    }
+
+    protected abstract Logger getLogger();
+}
diff --git a/...ain/java/org/elasticsearch/xpack/ml/inference/deployment/ControlMessagePyTorchAction.java b/...ain/java/org/elasticsearch/xpack/ml/inference/deployment/ControlMessagePyTorchAction.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.ml.inference.deployment;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.logging.log4j.message.ParameterizedMessage;
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentFactory;
+import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
+import org.elasticsearch.xpack.ml.inference.pytorch.results.PyTorchResult;
+import org.elasticsearch.xpack.ml.inference.pytorch.results.ThreadSettings;
+
+import java.io.IOException;
+
+class ControlMessagePyTorchAction extends AbstractPyTorchAction<ThreadSettings> {
+
+    private static final Logger logger = LogManager.getLogger(InferencePyTorchAction.class);
+
+    private final int numAllocationThreads;
+
+    private enum ControlMessageTypes {
+        AllocationThreads
+    };
+
+    ControlMessagePyTorchAction(
+        String modelId,
+        long requestId,
+        int numAllocationThreads,
+        TimeValue timeout,
+        DeploymentManager.ProcessContext processContext,
+        ThreadPool threadPool,
+        ActionListener<ThreadSettings> listener
+    ) {
+        super(modelId, requestId, timeout, processContext, threadPool, listener);
+        this.numAllocationThreads = numAllocationThreads;
+    }
+
+    @Override
+    protected void doRun() throws Exception {
+        if (isNotified()) {
+            // Should not execute request as it has already timed out while waiting in the queue
+            logger.debug(
+                () -> new ParameterizedMessage(
+                    "[{}] skipping control message on request [{}] as it has timed out",
+                    getModelId(),
+                    getRequestId()
+                )
+            );
+            return;
+        }
+
+        final String requestIdStr = String.valueOf(getRequestId());
+        try {
+            var message = buildControlMessage(requestIdStr, numAllocationThreads);
+
+            getProcessContext().getResultProcessor()
+                .registerRequest(requestIdStr, ActionListener.wrap(this::processResponse, this::onFailure));
+
+            getProcessContext().getProcess().get().writeInferenceRequest(message);
+        } catch (IOException e) {
+            logger.error(new ParameterizedMessage("[{}] error writing control message to the inference process", getModelId()), e);
+            onFailure(ExceptionsHelper.serverError("Error writing control message to the inference process", e));
+        } catch (Exception e) {
+            onFailure(e);
+        }
+    }
+
+    public static BytesReference buildControlMessage(String requestId, int numAllocationThreads) throws IOException {
+        XContentBuilder builder = XContentFactory.jsonBuilder();
+        builder.startObject();
+        builder.field("request_id", requestId);
+        builder.field("control", ControlMessageTypes.AllocationThreads.ordinal());
+        builder.field("num_allocations", numAllocationThreads);
+        builder.endObject();
+
+        // BytesReference.bytes closes the builder
+        return BytesReference.bytes(builder);
+    }
+
+    public void processResponse(PyTorchResult result) {
+        if (result.isError()) {
+            onFailure(result.errorResult().error());
+            return;
+        }
+        onSuccess(result.threadSettings());
+    }
+
+    @Override
+    protected Logger getLogger() {
+        return logger;
+    }
+
+}