From f883a8390b735ea12ed8225ea91907f015dfd949 Mon Sep 17 00:00:00 2001 From: Frank Liu Date: Tue, 6 Dec 2022 09:41:42 -0800 Subject: [PATCH] [serving] Avoid using special config.properties for DeepSpeed (#363) --- serving/docker/deepspeed.Dockerfile | 2 +- serving/docker/deepspeed.config.properties | 6 ------ .../main/java/ai/djl/serving/ModelServer.java | 19 +++++++++---------- 3 files changed, 10 insertions(+), 17 deletions(-) delete mode 100644 serving/docker/deepspeed.config.properties diff --git a/serving/docker/deepspeed.Dockerfile b/serving/docker/deepspeed.Dockerfile index 46b8838c7..589dbe2da 100644 --- a/serving/docker/deepspeed.Dockerfile +++ b/serving/docker/deepspeed.Dockerfile @@ -34,7 +34,7 @@ CMD ["serve"] COPY scripts scripts/ RUN mkdir -p /opt/djl/conf && \ mkdir -p /opt/djl/deps -COPY deepspeed.config.properties /opt/djl/conf/config.properties +COPY config.properties /opt/djl/conf/config.properties RUN apt-get update && \ scripts/install_djl_serving.sh $djl_version && \ diff --git a/serving/docker/deepspeed.config.properties b/serving/docker/deepspeed.config.properties deleted file mode 100644 index 9fa2ba518..000000000 --- a/serving/docker/deepspeed.config.properties +++ /dev/null @@ -1,6 +0,0 @@ -inference_address=http://0.0.0.0:8080 -management_address=http://0.0.0.0:8080 -model_store=/opt/ml/model -load_models=ALL -load_on_devices=0 -#model_url_pattern=.* diff --git a/serving/src/main/java/ai/djl/serving/ModelServer.java b/serving/src/main/java/ai/djl/serving/ModelServer.java index 6a939caae..42d0746a8 100644 --- a/serving/src/main/java/ai/djl/serving/ModelServer.java +++ b/serving/src/main/java/ai/djl/serving/ModelServer.java @@ -364,7 +364,7 @@ private void initModelStore() throws IOException { String modelUrl = matcher.group(3); String version = null; String engineName = null; - String deviceMapping = null; + String deviceMapping = "*"; String modelName; if (endpoint != null) { String[] tokens = endpoint.split(":", -1); @@ -389,12 +389,9 @@ private void initModelStore() throws IOException { continue; } } - String[] devices = {null}; - if (deviceMapping != null) { - DependencyManager.getInstance().installEngine(engineName); - Engine engine = Engine.getEngine(engineName); - devices = parseDevices(deviceMapping, engine, pair.getValue()); - } + DependencyManager.getInstance().installEngine(engineName); + Engine engine = Engine.getEngine(engineName); + String[] devices = parseDevices(deviceMapping, engine, pair.getValue()); WlmConfigManager wlmc = WlmConfigManager.getInstance(); ModelInfo modelInfo = @@ -410,13 +407,12 @@ private void initModelStore() throws IOException { wlmc.getMaxBatchDelay(), wlmc.getBatchSize()); Workflow workflow = new Workflow(modelInfo); - String[] finalDevices = devices; CompletableFuture f = modelManager .registerWorkflow(workflow) .thenAccept( v -> { - for (String deviceName : finalDevices) { + for (String deviceName : devices) { modelManager.initWorkers(workflow, deviceName, -1, -1); } }) @@ -627,7 +623,8 @@ private String[] parseDevices(String devices, Engine engine, Path modelDir) { if ("*".equals(devices)) { int gpuCount = engine.getGpuCount(); if (gpuCount > 0) { - if ("Python".equals(engine.getEngineName())) { + String engineName = engine.getEngineName(); + if ("Python".equals(engineName)) { Properties prop = getServingProperties(modelDir); String v = Utils.getenv("TENSOR_PARALLEL_DEGREE", "-1"); v = prop.getProperty("option.tensor_parallel_degree", v); @@ -642,6 +639,8 @@ private String[] parseDevices(String devices, Engine engine, Path modelDir) { } gpuCount = procs; } + } else if ("DeepSpeed".equals(engineName)) { + return new String[] {"0"}; } return IntStream.range(0, gpuCount)