elastic · szabosteve · May 17, 2023 · May 17, 2023
diff --git a/docs/reference/ingest/processors/inference.asciidoc b/docs/reference/ingest/processors/inference.asciidoc
@@ -15,11 +15,11 @@ ingested in the pipeline.
 .{infer-cap} Options
 [options="header"]
 |======
-| Name               | Required  | Default                        | Description
-| `model_id`         | yes       | -                              | (String) The ID or alias for the trained model.
-| `target_field`     | no        | `ml.inference.<processor_tag>` | (String) Field added to incoming documents to contain results objects.
-| `field_map`        | no        | If defined the model's default field map | (Object) Maps the document field names to the known field names of the model. This mapping takes precedence over any default mappings provided in the model configuration.
-| `inference_config` | no        | The default settings defined in the model  | (Object) Contains the inference type and its options.
+| Name                                  | Required  | Default                        | Description
+| `model_id` .                          | yes       | -                              | (String) The ID or alias for the trained model, or the ID of the deployment.
+| `target_field`                        | no        | `ml.inference.<processor_tag>` | (String) Field added to incoming documents to contain results objects.
+| `field_map`                           | no        | If defined the model's default field map | (Object) Maps the document field names to the known field names of the model. This mapping takes precedence over any default mappings provided in the model configuration.
+| `inference_config`                    | no        | The default settings defined in the model  | (Object) Contains the inference type and its options.
 include::common-options.asciidoc[]
 |======
 
@@ -28,7 +28,7 @@ include::common-options.asciidoc[]
 --------------------------------------------------
 {
   "inference": {
-    "model_id": "flight_delay_regression-1571767128603",
+    "model_id": "model_deployment_for_inference",
     "target_field": "FlightDelayMin_prediction_infer",
     "field_map": {
       "your_field": "my_field"
@@ -384,6 +384,7 @@ include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizati
 [discrete]
 [[inference-processor-config-example]]
 ==== {infer-cap} processor examples
+
 [source,js]
 --------------------------------------------------
 "inference":{

diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc
@@ -498,6 +498,10 @@ that document will not be used for training, but a prediction with the trained
 model will be generated for it. It is also known as continuous target variable.
 end::dependent-variable[]
 
+tag::deployment-id[]
+A unique identifier for the deployment of the model.
+end::deployment-id[]
+
 tag::desc-results[]
 If true, the results are sorted in descending order.
 end::desc-results[]

diff --git a/.../reference/ml/trained-models/apis/clear-trained-model-deployment-cache.asciidoc b/.../reference/ml/trained-models/apis/clear-trained-model-deployment-cache.asciidoc
@@ -6,12 +6,12 @@
 <titleabbrev>Clear trained model deployment cache</titleabbrev>
 ++++
 
-Clears a trained model deployment cache on all nodes where the trained model is assigned.
+Clears the {infer} cache on all nodes where the deployment is assigned.
 
 [[clear-trained-model-deployment-cache-request]]
 == {api-request-title}
 
-`POST _ml/trained_models/<model_id>/deployment/cache/_clear`
+`POST _ml/trained_models/<deployment_id>/deployment/cache/_clear`
 
 [[clear-trained-model-deployment-cache-prereq]]
 == {api-prereq-title}
@@ -22,16 +22,16 @@ Requires the `manage_ml` cluster privilege. This privilege is included in the
 [[clear-trained-model-deployment-cache-desc]]
 == {api-description-title}
 
-A trained model deployment may have an inference cache enabled. As requests are handled by each allocated node,
-their responses may be cached on that individual node. Calling this API clears the caches without restarting the
-deployment.
+A trained model deployment may have an inference cache enabled. As requests are 
+handled by each allocated node, their responses may be cached on that individual 
+node. Calling this API clears the caches without restarting the deployment.
 
 [[clear-trained-model-deployment-cache-path-params]]
 == {api-path-parms-title}
 
-`<model_id>`::
+`deployment_id`::
 (Required, string)
-include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=deployment-id]
 
 [[clear-trained-model-deployment-cache-example]]
 == {api-examples-title}

diff --git a/docs/reference/ml/trained-models/apis/get-trained-models-stats.asciidoc b/docs/reference/ml/trained-models/apis/get-trained-models-stats.asciidoc
@@ -16,11 +16,11 @@ Retrieves usage information for trained models.
 
 `GET _ml/trained_models/_all/_stats` +
 
-`GET _ml/trained_models/<model_id>/_stats` +
+`GET _ml/trained_models/<model_id_or_deployment_id>/_stats` +
 
-`GET _ml/trained_models/<model_id>,<model_id_2>/_stats` +
+`GET _ml/trained_models/<model_id_or_deployment_id>,<model_id_2_or_deployment_id_2>/_stats` +
 
-`GET _ml/trained_models/<model_id_pattern*>,<model_id_2>/_stats`
+`GET _ml/trained_models/<model_id_pattern*_or_deployment_id_pattern*>,<model_id_2_or_deployment_id_2>/_stats`
 
 
 [[ml-get-trained-models-stats-prereq]]
@@ -33,17 +33,20 @@ Requires the `monitor_ml` cluster privilege. This privilege is included in the
 [[ml-get-trained-models-stats-desc]]
 == {api-description-title}
 
-You can get usage information for multiple trained models in a single API
-request by using a comma-separated list of model IDs or a wildcard expression.
+You can get usage information for multiple trained models or trained model 
+deployments in a single API request by using a comma-separated list of model 
+IDs, deployment IDs, or a wildcard expression.
 
 
 [[ml-get-trained-models-stats-path-params]]
 == {api-path-parms-title}
 
-`<model_id>`::
+`<model_id_or_deployment_id>`::
 (Optional, string)
-include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id-or-alias]
-
+The unique identifier of the model or the deployment. If a model has multiple 
+deployments, and the ID of one of the deployments matches the model ID, then the 
+model ID takes precedence; the results are returned for all deployments of the 
+model.
 
 [[ml-get-trained-models-stats-query-params]]
 == {api-query-parms-title}
@@ -116,6 +119,9 @@ The detailed allocation state related to the nodes.
 The desired number of nodes for model allocation.
 ======
 
+`deployment_id`:::
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=deployment-id]
+
 `error_count`:::
 (integer)
 The sum of `error_count` for all nodes in the deployment.

diff --git a/docs/reference/ml/trained-models/apis/infer-trained-model.asciidoc b/docs/reference/ml/trained-models/apis/infer-trained-model.asciidoc
@@ -16,6 +16,7 @@ directly from the {infer} cache.
 == {api-request-title}
 
 `POST _ml/trained_models/<model_id>/_infer`
+`POST _ml/trained_models/<deployment_id>/_infer`
 
 ////
 [[infer-trained-model-prereq]]
@@ -32,8 +33,15 @@ directly from the {infer} cache.
 == {api-path-parms-title}
 
 `<model_id>`::
-(Required, string)
+(Optional, string)
 include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id-or-alias]
+If you specify the `model_id` in the API call, and the model has multiple 
+deployments, a random deployment will be used. If the `model_id` matches the ID 
+of one of the deployments, that deployment will be used.
+
+`<deployment_id>`::
+(Optional, string)
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=deployment-id]
 
 [[infer-trained-model-query-params]]
 == {api-query-parms-title}

diff --git a/docs/reference/ml/trained-models/apis/put-trained-models-aliases.asciidoc b/docs/reference/ml/trained-models/apis/put-trained-models-aliases.asciidoc
@@ -35,8 +35,7 @@ An alias must be unique and refer to only a single trained model. However,
 you can have multiple aliases for each trained model.
 
 API Restrictions:
-+
---
+
 * You are not allowed to update an alias such that it references a different
 trained model ID and the model uses a different type of {dfanalytics}. For example,
 this situation occurs if you have a trained model for
@@ -45,7 +44,6 @@ alias from one type of trained model to another.
 * You cannot update an alias from a `pytorch` model and a {dfanalytics} model.
 * You cannot update the alias from a deployed `pytorch` model to one
 not currently deployed.
---
 
 If you use this API to update an alias and there are very few input fields in
 common between the old and new trained models for the model alias, the API

diff --git a/docs/reference/ml/trained-models/apis/start-trained-model-deployment.asciidoc b/docs/reference/ml/trained-models/apis/start-trained-model-deployment.asciidoc
@@ -25,6 +25,11 @@ Currently only `pytorch` models are supported for deployment. Once deployed
 the model can be used by the <<inference-processor,{infer-cap} processor>>
 in an ingest pipeline or directly in the <<infer-trained-model>> API.
 
+A model can be deployed multiple times by using deployment IDs. A deployment ID 
+must be unique and should not match any other deployment ID or model ID, unless 
+it is the same as the ID of the model being deployed. If `deployment_id` is not 
+set, it defaults to the `model_id`.
+
 Scaling inference performance can be achieved by setting the parameters
 `number_of_allocations` and `threads_per_allocation`.
 
@@ -60,6 +65,11 @@ model. The default value is the size of the model as reported by the
 `model_size_bytes` field in the <<get-trained-models-stats>>. To disable the
 cache, `0b` can be provided.
 
+`deployment_id`::
+(Optional, string)
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=deployment-id]
+Defaults to `model_id`.
+
 `number_of_allocations`::
 (Optional, integer)
 The total number of allocations this model is assigned across {ml} nodes.
@@ -150,3 +160,25 @@ The API returns the following results:
     }
 }
 ----
+
+
+[[start-trained-model-deployment-deployment-id-example]]
+=== Using deployment IDs
+
+The following example starts a new deployment for the `my_model` trained model 
+with the ID `my_model_for_ingest`. The deployment ID an be used in {infer} API 
+calls or in {infer} processors.
+
+[source,console]
+--------------------------------------------------
+POST _ml/trained_models/my_model/deployment/_start?deployment_id=my_model_for_ingest
+--------------------------------------------------
+// TEST[skip:TBD]
+
+The `my_model` trained model can be deployed again with a different ID:
+
+[source,console]
+--------------------------------------------------
+POST _ml/trained_models/my_model/deployment/_start?deployment_id=my_model_for_search
+--------------------------------------------------
+// TEST[skip:TBD]
diff --git a/docs/reference/ml/trained-models/apis/stop-trained-model-deployment.asciidoc b/docs/reference/ml/trained-models/apis/stop-trained-model-deployment.asciidoc
@@ -11,7 +11,7 @@ Stops a trained model deployment.
 [[stop-trained-model-deployment-request]]
 == {api-request-title}
 
-`POST _ml/trained_models/<model_id>/deployment/_stop`
+`POST _ml/trained_models/<deployment_id>/deployment/_stop`
 
 [[stop-trained-model-deployment-prereq]]
 == {api-prereq-title}
@@ -27,9 +27,9 @@ Deployment is required only for trained models that have a PyTorch `model_type`.
 [[stop-trained-model-deployment-path-params]]
 == {api-path-parms-title}
 
-`<model_id>`::
+`<deployment_id>`::
 (Required, string)
-include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=deployment-id]
 
 
 [[stop-trained-model-deployment-query-params]]
@@ -40,9 +40,9 @@ include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
 include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=allow-no-match-deployments]
 
 `force`::
-(Optional, Boolean) If true, the deployment is stopped even if it or one of its model aliases
-is referenced by ingest pipelines. You can't use these pipelines until you restart the model
-deployment.
+(Optional, Boolean) If true, the deployment is stopped even if it or one of its 
+model aliases is referenced by ingest pipelines. You can't use these pipelines 
+until you restart the model deployment.
 
 ////
 [role="child_attributes"]
@@ -55,7 +55,12 @@ deployment.
 == {api-response-codes-title}
 ////
 
-////
 [[stop-trained-model-deployment-example]]
 == {api-examples-title}
-////
+
+The following example stops the `my_model_for_search` deployment:
+
+[source,console]
+--------------------------------------------------
+POST _ml/trained_models/my_model_for_search/deployment/_stop
+--------------------------------------------------
diff --git a/docs/reference/ml/trained-models/apis/update-trained-model-deployment.asciidoc b/docs/reference/ml/trained-models/apis/update-trained-model-deployment.asciidoc
@@ -14,7 +14,7 @@ beta::[]
 [[update-trained-model-deployment-request]]
 == {api-request-title}
 
-`POST _ml/trained_models/<model_id>/deployment/_update`
+`POST _ml/trained_models/<deployment_id>/deployment/_update`
 
 
 [[update-trained-model-deployments-prereqs]]
@@ -32,9 +32,9 @@ You can either increase or decrease the number of allocations of such a deployme
 [[update-trained-model-deployments-path-parms]]
 == {api-path-parms-title}
 
-`<model_id>`::
+`<deployment_id>`::
 (Required, string)
-include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=deployment-id]
 
 [[update-trained-model-deployment-request-body]]
 == {api-request-body-title}