diff --git a/serving/docker/deepspeed.Dockerfile b/serving/docker/deepspeed.Dockerfile
index 46b8838c7..589dbe2da 100644
--- a/serving/docker/deepspeed.Dockerfile
+++ b/serving/docker/deepspeed.Dockerfile
@@ -34,7 +34,7 @@ CMD ["serve"]
COPY scripts scripts/
RUN mkdir -p /opt/djl/conf && \
mkdir -p /opt/djl/deps
-COPY deepspeed.config.properties /opt/djl/conf/config.properties
+COPY config.properties /opt/djl/conf/config.properties
RUN apt-get update && \
scripts/install_djl_serving.sh $djl_version && \
diff --git a/serving/docker/deepspeed.config.properties b/serving/docker/deepspeed.config.properties
deleted file mode 100644
index 9fa2ba518..000000000
--- a/serving/docker/deepspeed.config.properties
+++ /dev/null
@@ -1,6 +0,0 @@
-inference_address=http://0.0.0.0:8080
-management_address=http://0.0.0.0:8080
-model_store=/opt/ml/model
-load_models=ALL
-load_on_devices=0
-#model_url_pattern=.*
diff --git a/serving/src/main/java/ai/djl/serving/ModelServer.java b/serving/src/main/java/ai/djl/serving/ModelServer.java
index 6a939caae..42d0746a8 100644
--- a/serving/src/main/java/ai/djl/serving/ModelServer.java
+++ b/serving/src/main/java/ai/djl/serving/ModelServer.java
@@ -364,7 +364,7 @@ private void initModelStore() throws IOException {
String modelUrl = matcher.group(3);
String version = null;
String engineName = null;
- String deviceMapping = null;
+ String deviceMapping = "*";
String modelName;
if (endpoint != null) {
String[] tokens = endpoint.split(":", -1);
@@ -389,12 +389,9 @@ private void initModelStore() throws IOException {
continue;
}
}
- String[] devices = {null};
- if (deviceMapping != null) {
- DependencyManager.getInstance().installEngine(engineName);
- Engine engine = Engine.getEngine(engineName);
- devices = parseDevices(deviceMapping, engine, pair.getValue());
- }
+ DependencyManager.getInstance().installEngine(engineName);
+ Engine engine = Engine.getEngine(engineName);
+ String[] devices = parseDevices(deviceMapping, engine, pair.getValue());
WlmConfigManager wlmc = WlmConfigManager.getInstance();
ModelInfo modelInfo =
@@ -410,13 +407,12 @@ private void initModelStore() throws IOException {
wlmc.getMaxBatchDelay(),
wlmc.getBatchSize());
Workflow workflow = new Workflow(modelInfo);
- String[] finalDevices = devices;
CompletableFuture f =
modelManager
.registerWorkflow(workflow)
.thenAccept(
v -> {
- for (String deviceName : finalDevices) {
+ for (String deviceName : devices) {
modelManager.initWorkers(workflow, deviceName, -1, -1);
}
})
@@ -627,7 +623,8 @@ private String[] parseDevices(String devices, Engine engine, Path modelDir) {
if ("*".equals(devices)) {
int gpuCount = engine.getGpuCount();
if (gpuCount > 0) {
- if ("Python".equals(engine.getEngineName())) {
+ String engineName = engine.getEngineName();
+ if ("Python".equals(engineName)) {
Properties prop = getServingProperties(modelDir);
String v = Utils.getenv("TENSOR_PARALLEL_DEGREE", "-1");
v = prop.getProperty("option.tensor_parallel_degree", v);
@@ -642,6 +639,8 @@ private String[] parseDevices(String devices, Engine engine, Path modelDir) {
}
gpuCount = procs;
}
+ } else if ("DeepSpeed".equals(engineName)) {
+ return new String[] {"0"};
}
return IntStream.range(0, gpuCount)