From 7c141633c912bd563794bbe798b02562fdb44e44 Mon Sep 17 00:00:00 2001
From: Lisa Cawley <lcawley@elastic.co>
Date: Wed, 12 Mar 2025 08:22:50 -0700
Subject: [PATCH] Add OpenAI inference details (#3929)

(cherry picked from commit 5fda5989d5c4ab21850ee1682d795c3e747c133b)
---
 output/openapi/elasticsearch-openapi.json     | 175 ++++++++-
 .../elasticsearch-serverless-openapi.json     | 175 ++++++++-
 output/schema/schema.json                     | 336 +++++++++++++++++-
 output/typescript/types.ts                    |  30 ++
 specification/_doc_ids/table.csv              |   4 +
 .../_json_spec/inference.put.openai.json      |  35 ++
 specification/inference/_types/Services.ts    |  19 +-
 .../inference/put_openai/PutOpenAiRequest.ts  | 144 ++++++++
 .../inference/put_openai/PutOpenAiResponse.ts |  24 ++
 .../request/PutOpenAiRequestExample1.yaml     |  13 +
 .../request/PutOpenAiRequestExample2.yaml     |  12 +
 .../put_watsonx/PutWatsonxRequest.ts          |   2 +-
 12 files changed, 920 insertions(+), 49 deletions(-)
 create mode 100644 specification/_json_spec/inference.put.openai.json
 create mode 100644 specification/inference/put_openai/PutOpenAiRequest.ts
 create mode 100644 specification/inference/put_openai/PutOpenAiResponse.ts
 create mode 100644 specification/inference/put_openai/examples/request/PutOpenAiRequestExample1.yaml
 create mode 100644 specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 67c87ae3ae..63623cb0e3 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -17718,13 +17718,99 @@
         "x-state": "Added in 8.11.0"
       }
     },
+    "/_inference/{task_type}/{openai_inference_id}": {
+      "put": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Create an OpenAI inference endpoint",
+        "description": "Create an inference endpoint to perform an inference task with the `openai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+        "operationId": "inference-put-openai",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "task_type",
+            "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/inference.put_openai:OpenAITaskType"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "path",
+            "name": "openai_inference_id",
+            "description": "The unique identifier of the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "chunking_settings": {
+                    "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
+                  },
+                  "service": {
+                    "$ref": "#/components/schemas/inference.put_openai:ServiceType"
+                  },
+                  "service_settings": {
+                    "$ref": "#/components/schemas/inference.put_openai:OpenAIServiceSettings"
+                  },
+                  "task_settings": {
+                    "$ref": "#/components/schemas/inference.put_openai:OpenAITaskSettings"
+                  }
+                },
+                "required": [
+                  "service",
+                  "service_settings"
+                ]
+              },
+              "examples": {
+                "PutOpenAiRequestExample1": {
+                  "summary": "A text embedding task",
+                  "description": "Run `PUT _inference/text_embedding/openai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 128 dimensions.",
+                  "value": "{\n    \"service\": \"openai\",\n    \"service_settings\": {\n        \"api_key\": \"OpenAI-API-Key\",\n        \"model_id\": \"text-embedding-3-small\",\n        \"dimensions\": 128\n    }\n}"
+                },
+                "PutOpenAiRequestExample2": {
+                  "summary": "A completion task",
+                  "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.",
+                  "value": "{\n    \"service\": \"openai\",\n    \"service_settings\": {\n        \"api_key\": \"OpenAI-API-Key\",\n        \"model_id\": \"gpt-3.5-turbo\"\n    }\n}"
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo"
+                }
+              }
+            }
+          }
+        },
+        "x-state": "Added in 8.12.0"
+      }
+    },
     "/_inference/{task_type}/{watsonx_inference_id}": {
       "put": {
         "tags": [
           "inference"
         ],
         "summary": "Create a Watsonx inference endpoint",
-        "description": "Creates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+        "description": "Create an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
         "operationId": "inference-put-watsonx",
         "parameters": [
           {
@@ -76718,19 +76804,19 @@
             "type": "object",
             "properties": {
               "max_chunk_size": {
-                "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)",
+                "description": "The maximum size of a chunk in words.\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy).",
                 "type": "number"
               },
               "overlap": {
-                "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`",
+                "description": "The number of overlapping words for chunks.\nIt is applicable only to a `word` chunking strategy.\nThis value cannot be higher than half the `max_chunk_size` value.",
                 "type": "number"
               },
               "sentence_overlap": {
-                "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`",
+                "description": "The number of overlapping sentences for chunks.\nIt is applicable only for a `sentence` chunking strategy.\nIt can be either `1` or `0`.",
                 "type": "number"
               },
               "strategy": {
-                "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`",
+                "description": "The chunking strategy: `sentence` or `word`.",
                 "type": "string"
               }
             }
@@ -76740,6 +76826,76 @@
       "inference._types:ServiceSettings": {
         "type": "object"
       },
+      "inference.put_openai:OpenAITaskType": {
+        "type": "string",
+        "enum": [
+          "chat_completion",
+          "completion",
+          "text_embedding"
+        ]
+      },
+      "inference.put_openai:ServiceType": {
+        "type": "string",
+        "enum": [
+          "openai"
+        ]
+      },
+      "inference.put_openai:OpenAIServiceSettings": {
+        "type": "object",
+        "properties": {
+          "api_key": {
+            "externalDocs": {
+              "url": "https://platform.openai.com/api-keys"
+            },
+            "description": "A valid API key of your OpenAI account.\nYou can find your OpenAI API keys in your OpenAI account under the API keys section.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+            "type": "string"
+          },
+          "dimensions": {
+            "description": "The number of dimensions the resulting output embeddings should have.\nIt is supported only in `text-embedding-3` and later models.\nIf it is not set, the OpenAI defined default for the model is used.",
+            "type": "number"
+          },
+          "model_id": {
+            "externalDocs": {
+              "url": "https://platform.openai.com/docs/guides/embeddings/what-are-embeddings"
+            },
+            "description": "The name of the model to use for the inference task.\nRefer to the OpenAI documentation for the list of available text embedding models.",
+            "type": "string"
+          },
+          "organization_id": {
+            "description": "The unique identifier for your organization.\nYou can find the Organization ID in your OpenAI account under *Settings > Organizations*.",
+            "type": "string"
+          },
+          "rate_limit": {
+            "$ref": "#/components/schemas/inference._types:RateLimitSetting"
+          },
+          "url": {
+            "description": "The URL endpoint to use for the requests.\nIt can be changed for testing purposes.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "api_key",
+          "model_id"
+        ]
+      },
+      "inference._types:RateLimitSetting": {
+        "type": "object",
+        "properties": {
+          "requests_per_minute": {
+            "description": "The number of requests allowed per minute.",
+            "type": "number"
+          }
+        }
+      },
+      "inference.put_openai:OpenAITaskSettings": {
+        "type": "object",
+        "properties": {
+          "user": {
+            "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.",
+            "type": "string"
+          }
+        }
+      },
       "inference.put_watsonx:WatsonxTaskType": {
         "type": "string",
         "enum": [
@@ -76796,15 +76952,6 @@
           "url"
         ]
       },
-      "inference._types:RateLimitSetting": {
-        "type": "object",
-        "properties": {
-          "requests_per_minute": {
-            "description": "The number of requests allowed per minute.",
-            "type": "number"
-          }
-        }
-      },
       "inference._types:RerankedInferenceResult": {
         "type": "object",
         "properties": {
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 10789e1afe..316adbddfa 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -9689,13 +9689,99 @@
         "x-state": "Added in 8.11.0"
       }
     },
+    "/_inference/{task_type}/{openai_inference_id}": {
+      "put": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Create an OpenAI inference endpoint",
+        "description": "Create an inference endpoint to perform an inference task with the `openai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+        "operationId": "inference-put-openai",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "task_type",
+            "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/inference.put_openai:OpenAITaskType"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "path",
+            "name": "openai_inference_id",
+            "description": "The unique identifier of the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "chunking_settings": {
+                    "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
+                  },
+                  "service": {
+                    "$ref": "#/components/schemas/inference.put_openai:ServiceType"
+                  },
+                  "service_settings": {
+                    "$ref": "#/components/schemas/inference.put_openai:OpenAIServiceSettings"
+                  },
+                  "task_settings": {
+                    "$ref": "#/components/schemas/inference.put_openai:OpenAITaskSettings"
+                  }
+                },
+                "required": [
+                  "service",
+                  "service_settings"
+                ]
+              },
+              "examples": {
+                "PutOpenAiRequestExample1": {
+                  "summary": "A text embedding task",
+                  "description": "Run `PUT _inference/text_embedding/openai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 128 dimensions.",
+                  "value": "{\n    \"service\": \"openai\",\n    \"service_settings\": {\n        \"api_key\": \"OpenAI-API-Key\",\n        \"model_id\": \"text-embedding-3-small\",\n        \"dimensions\": 128\n    }\n}"
+                },
+                "PutOpenAiRequestExample2": {
+                  "summary": "A completion task",
+                  "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.",
+                  "value": "{\n    \"service\": \"openai\",\n    \"service_settings\": {\n        \"api_key\": \"OpenAI-API-Key\",\n        \"model_id\": \"gpt-3.5-turbo\"\n    }\n}"
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo"
+                }
+              }
+            }
+          }
+        },
+        "x-state": "Added in 8.12.0"
+      }
+    },
     "/_inference/{task_type}/{watsonx_inference_id}": {
       "put": {
         "tags": [
           "inference"
         ],
         "summary": "Create a Watsonx inference endpoint",
-        "description": "Creates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+        "description": "Create an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
         "operationId": "inference-put-watsonx",
         "parameters": [
           {
@@ -48053,19 +48139,19 @@
             "type": "object",
             "properties": {
               "max_chunk_size": {
-                "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)",
+                "description": "The maximum size of a chunk in words.\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy).",
                 "type": "number"
               },
               "overlap": {
-                "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`",
+                "description": "The number of overlapping words for chunks.\nIt is applicable only to a `word` chunking strategy.\nThis value cannot be higher than half the `max_chunk_size` value.",
                 "type": "number"
               },
               "sentence_overlap": {
-                "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`",
+                "description": "The number of overlapping sentences for chunks.\nIt is applicable only for a `sentence` chunking strategy.\nIt can be either `1` or `0`.",
                 "type": "number"
               },
               "strategy": {
-                "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`",
+                "description": "The chunking strategy: `sentence` or `word`.",
                 "type": "string"
               }
             }
@@ -48075,6 +48161,76 @@
       "inference._types:ServiceSettings": {
         "type": "object"
       },
+      "inference.put_openai:OpenAITaskType": {
+        "type": "string",
+        "enum": [
+          "chat_completion",
+          "completion",
+          "text_embedding"
+        ]
+      },
+      "inference.put_openai:ServiceType": {
+        "type": "string",
+        "enum": [
+          "openai"
+        ]
+      },
+      "inference.put_openai:OpenAIServiceSettings": {
+        "type": "object",
+        "properties": {
+          "api_key": {
+            "externalDocs": {
+              "url": "https://platform.openai.com/api-keys"
+            },
+            "description": "A valid API key of your OpenAI account.\nYou can find your OpenAI API keys in your OpenAI account under the API keys section.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+            "type": "string"
+          },
+          "dimensions": {
+            "description": "The number of dimensions the resulting output embeddings should have.\nIt is supported only in `text-embedding-3` and later models.\nIf it is not set, the OpenAI defined default for the model is used.",
+            "type": "number"
+          },
+          "model_id": {
+            "externalDocs": {
+              "url": "https://platform.openai.com/docs/guides/embeddings/what-are-embeddings"
+            },
+            "description": "The name of the model to use for the inference task.\nRefer to the OpenAI documentation for the list of available text embedding models.",
+            "type": "string"
+          },
+          "organization_id": {
+            "description": "The unique identifier for your organization.\nYou can find the Organization ID in your OpenAI account under *Settings > Organizations*.",
+            "type": "string"
+          },
+          "rate_limit": {
+            "$ref": "#/components/schemas/inference._types:RateLimitSetting"
+          },
+          "url": {
+            "description": "The URL endpoint to use for the requests.\nIt can be changed for testing purposes.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "api_key",
+          "model_id"
+        ]
+      },
+      "inference._types:RateLimitSetting": {
+        "type": "object",
+        "properties": {
+          "requests_per_minute": {
+            "description": "The number of requests allowed per minute.",
+            "type": "number"
+          }
+        }
+      },
+      "inference.put_openai:OpenAITaskSettings": {
+        "type": "object",
+        "properties": {
+          "user": {
+            "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.",
+            "type": "string"
+          }
+        }
+      },
       "inference.put_watsonx:WatsonxTaskType": {
         "type": "string",
         "enum": [
@@ -48131,15 +48287,6 @@
           "url"
         ]
       },
-      "inference._types:RateLimitSetting": {
-        "type": "object",
-        "properties": {
-          "requests_per_minute": {
-            "description": "The number of requests allowed per minute.",
-            "type": "number"
-          }
-        }
-      },
       "inference._types:RerankedInferenceResult": {
         "type": "object",
         "properties": {
diff --git a/output/schema/schema.json b/output/schema/schema.json
index f406840378..9f2db386d2 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -9306,6 +9306,51 @@
         }
       ]
     },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "8.12.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `openai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "docId": "inference-api-put-openai",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-openai.html",
+      "name": "inference.put_openai",
+      "privileges": {
+        "cluster": [
+          "manage_inference"
+        ]
+      },
+      "request": {
+        "name": "Request",
+        "namespace": "inference.put_openai"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.put_openai"
+      },
+      "responseMediaType": [
+        "application/json"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "PUT"
+          ],
+          "path": "/_inference/{task_type}/{openai_inference_id}"
+        }
+      ]
+    },
     {
       "availability": {
         "serverless": {
@@ -9318,7 +9363,7 @@
           "visibility": "public"
         }
       },
-      "description": "Create a Watsonx inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "description": "Create a Watsonx inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-watsonx",
       "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-watsonx-ai.html",
       "name": "inference.put_watsonx",
@@ -148437,7 +148482,7 @@
       },
       "properties": [
         {
-          "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)",
+          "description": "The maximum size of a chunk in words.\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy).",
           "name": "max_chunk_size",
           "required": false,
           "serverDefault": 250,
@@ -148450,7 +148495,7 @@
           }
         },
         {
-          "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`",
+          "description": "The number of overlapping words for chunks.\nIt is applicable only to a `word` chunking strategy.\nThis value cannot be higher than half the `max_chunk_size` value.",
           "name": "overlap",
           "required": false,
           "serverDefault": 100,
@@ -148463,7 +148508,7 @@
           }
         },
         {
-          "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`",
+          "description": "The number of overlapping sentences for chunks.\nIt is applicable only for a `sentence` chunking strategy.\nIt can be either `1` or `0`.",
           "name": "sentence_overlap",
           "required": false,
           "serverDefault": 1,
@@ -148476,7 +148521,7 @@
           }
         },
         {
-          "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`",
+          "description": "The chunking strategy: `sentence` or `word`.",
           "name": "strategy",
           "required": false,
           "serverDefault": "sentence",
@@ -148489,7 +148534,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L60-L90"
+      "specLocation": "inference/_types/Services.ts#L60-L89"
     },
     {
       "kind": "interface",
@@ -148655,7 +148700,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L96-L101"
+      "specLocation": "inference/_types/Services.ts#L95-L100"
     },
     {
       "kind": "interface",
@@ -148688,7 +148733,7 @@
         "name": "ServiceSettings",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Services.ts#L92-L92",
+      "specLocation": "inference/_types/Services.ts#L91-L91",
       "type": {
         "kind": "user_defined_value"
       }
@@ -148772,7 +148817,7 @@
         "name": "TaskSettings",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Services.ts#L94-L94",
+      "specLocation": "inference/_types/Services.ts#L93-L93",
       "type": {
         "kind": "user_defined_value"
       }
@@ -149806,6 +149851,277 @@
       },
       "specLocation": "inference/put/PutResponse.ts#L22-L24"
     },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "OpenAIServiceSettings",
+        "namespace": "inference.put_openai"
+      },
+      "properties": [
+        {
+          "description": "A valid API key of your OpenAI account.\nYou can find your OpenAI API keys in your OpenAI account under the API keys section.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+          "extDocId": "openai-api-keys",
+          "extDocUrl": "https://platform.openai.com/api-keys",
+          "name": "api_key",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The number of dimensions the resulting output embeddings should have.\nIt is supported only in `text-embedding-3` and later models.\nIf it is not set, the OpenAI defined default for the model is used.",
+          "name": "dimensions",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The name of the model to use for the inference task.\nRefer to the OpenAI documentation for the list of available text embedding models.",
+          "extDocId": "openai-models",
+          "extDocUrl": "https://platform.openai.com/docs/guides/embeddings/what-are-embeddings",
+          "name": "model_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The unique identifier for your organization.\nYou can find the Organization ID in your OpenAI account under *Settings > Organizations*.",
+          "name": "organization_id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from OpenAI.\nThe `openai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `3000`.\nFor `completion`, it is set to `500`.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The URL endpoint to use for the requests.\nIt can be changed for testing purposes.",
+          "name": "url",
+          "required": false,
+          "serverDefault": "https://api.openai.com/v1/embeddings.",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L94-L136"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "OpenAITaskSettings",
+        "namespace": "inference.put_openai"
+      },
+      "properties": [
+        {
+          "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.",
+          "name": "user",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L138-L144"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "chat_completion"
+        },
+        {
+          "name": "completion"
+        },
+        {
+          "name": "text_embedding"
+        }
+      ],
+      "name": {
+        "name": "OpenAITaskType",
+        "namespace": "inference.put_openai"
+      },
+      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L84-L88"
+    },
+    {
+      "kind": "request",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "properties",
+        "properties": [
+          {
+            "description": "The chunking configuration object.",
+            "extDocId": "inference-chunking",
+            "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config",
+            "name": "chunking_settings",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "InferenceChunkingSettings",
+                "namespace": "inference._types"
+              }
+            }
+          },
+          {
+            "description": "The type of service supported for the specified task type. In this case, `openai`.",
+            "name": "service",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ServiceType",
+                "namespace": "inference.put_openai"
+              }
+            }
+          },
+          {
+            "description": "Settings used to install the inference model. These settings are specific to the `openai` service.",
+            "name": "service_settings",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "OpenAIServiceSettings",
+                "namespace": "inference.put_openai"
+              }
+            }
+          },
+          {
+            "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.",
+            "name": "task_settings",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "OpenAITaskSettings",
+                "namespace": "inference.put_openai"
+              }
+            }
+          }
+        ]
+      },
+      "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `openai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "examples": {
+        "PutOpenAiRequestExample1": {
+          "description": "Run `PUT _inference/text_embedding/openai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 128 dimensions.",
+          "summary": "A text embedding task",
+          "value": "{\n    \"service\": \"openai\",\n    \"service_settings\": {\n        \"api_key\": \"OpenAI-API-Key\",\n        \"model_id\": \"text-embedding-3-small\",\n        \"dimensions\": 128\n    }\n}"
+        },
+        "PutOpenAiRequestExample2": {
+          "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.",
+          "summary": "A completion task",
+          "value": "{\n    \"service\": \"openai\",\n    \"service_settings\": {\n        \"api_key\": \"OpenAI-API-Key\",\n        \"model_id\": \"gpt-3.5-turbo\"\n    }\n}"
+        }
+      },
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "Request",
+        "namespace": "inference.put_openai"
+      },
+      "path": [
+        {
+          "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.",
+          "name": "task_type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "OpenAITaskType",
+              "namespace": "inference.put_openai"
+            }
+          }
+        },
+        {
+          "description": "The unique identifier of the inference endpoint.",
+          "name": "openai_inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [],
+      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L28-L82"
+    },
+    {
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "InferenceEndpointInfo",
+            "namespace": "inference._types"
+          }
+        }
+      },
+      "name": {
+        "name": "Response",
+        "namespace": "inference.put_openai"
+      },
+      "specLocation": "inference/put_openai/PutOpenAiResponse.ts#L22-L24"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "openai"
+        }
+      ],
+      "name": {
+        "name": "ServiceType",
+        "namespace": "inference.put_openai"
+      },
+      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L90-L92"
+    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -149840,7 +150156,7 @@
           }
         ]
       },
-      "description": "Create a Watsonx inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "description": "Create a Watsonx inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "examples": {
         "InferenceRequestExample1": {
           "description": "Run `PUT _inference/text_embedding/watsonx-embeddings` to create an Watonsx inference endpoint that performs a text embedding task.",
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index e2903b2d45..32b39ffc9f 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -13219,6 +13219,36 @@ export interface InferencePutRequest extends RequestBase {
 
 export type InferencePutResponse = InferenceInferenceEndpointInfo
 
+export interface InferencePutOpenaiOpenAIServiceSettings {
+  api_key: string
+  dimensions?: integer
+  model_id: string
+  organization_id?: string
+  rate_limit?: InferenceRateLimitSetting
+  url?: string
+}
+
+export interface InferencePutOpenaiOpenAITaskSettings {
+  user?: string
+}
+
+export type InferencePutOpenaiOpenAITaskType = 'chat_completion' | 'completion' | 'text_embedding'
+
+export interface InferencePutOpenaiRequest extends RequestBase {
+  task_type: InferencePutOpenaiOpenAITaskType
+  openai_inference_id: Id
+  body?: {
+    chunking_settings?: InferenceInferenceChunkingSettings
+    service: InferencePutOpenaiServiceType
+    service_settings: InferencePutOpenaiOpenAIServiceSettings
+    task_settings?: InferencePutOpenaiOpenAITaskSettings
+  }
+}
+
+export type InferencePutOpenaiResponse = InferenceInferenceEndpointInfo
+
+export type InferencePutOpenaiServiceType = 'openai'
+
 export interface InferencePutWatsonxRequest extends RequestBase {
   task_type: InferencePutWatsonxWatsonxTaskType
   watsonx_inference_id: Id
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
index d25b687cf7..b4ea1b4724 100644
--- a/specification/_doc_ids/table.csv
+++ b/specification/_doc_ids/table.csv
@@ -318,10 +318,12 @@ inference-api-delete,https://www.elastic.co/guide/en/elasticsearch/reference/{br
 inference-api-get,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/get-inference-api.html
 inference-api-post,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html
 inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html
+inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-openai.html
 inference-api-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-watsonx-ai.html
 inference-api-stream,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html
 inference-api-chat-completion,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/chat-completion-inference-api.html
 inference-api-update,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/update-inference-api.html
+inference-chunking,https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config
 inference-processor,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-processor.html
 info-api,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/info-api.html
 ingest,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/ingest.html
@@ -436,6 +438,8 @@ node-roles,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/modu
 nodes-api-shutdown-delete,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/delete-shutdown.html
 nodes-api-shutdown-status,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/get-shutdown.html
 nodes-api-shutdown,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-shutdown.html
+openai-api-keys,https://platform.openai.com/api-keys
+openai-models,https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
 optimistic-concurrency,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/optimistic-concurrency-control.html
 paginate-search-results,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/paginate-search-results.html
 painless-contexts,https://www.elastic.co/guide/en/elasticsearch/painless/{branch}/painless-contexts.html
diff --git a/specification/_json_spec/inference.put.openai.json b/specification/_json_spec/inference.put.openai.json
new file mode 100644
index 0000000000..5405206c8c
--- /dev/null
+++ b/specification/_json_spec/inference.put.openai.json
@@ -0,0 +1,35 @@
+{
+  "inference.put_openai": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-openai.html",
+      "description": "Configure an OpenAI inference endpoint"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/{task_type}/{openai_inference_id}",
+          "methods": ["PUT"],
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
+            },
+            "openai_inference_id": {
+              "type": "string",
+              "description": "The inference ID"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference endpoint's task and service settings"
+    }
+  }
+}
diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
index f6d6a66ac1..a5c72d352f 100644
--- a/specification/inference/_types/Services.ts
+++ b/specification/inference/_types/Services.ts
@@ -62,28 +62,27 @@ export class InferenceEndpointInfo extends InferenceEndpoint {
  */
 export class InferenceChunkingSettings extends InferenceEndpoint {
   /**
-   * Specifies the maximum size of a chunk in words
-   * This value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)
+   * The maximum size of a chunk in words.
+   * This value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy).
    * @server_default 250
    */
   max_chunk_size?: integer
   /**
-   * Specifies the number of overlapping words for chunks
-   * Only for `word` chunking strategy
-   * This value cannot be higher than the half of `max_chunk_size`
+   * The number of overlapping words for chunks.
+   * It is applicable only to a `word` chunking strategy.
+   * This value cannot be higher than half the `max_chunk_size` value.
    * @server_default 100
    */
   overlap?: integer
   /**
-   * Specifies the number of overlapping sentences for chunks
-   * Only for `sentence` chunking strategy
-   * It can be either `1` or `0`
+   * The number of overlapping sentences for chunks.
+   * It is applicable only for a `sentence` chunking strategy.
+   * It can be either `1` or `0`.
    * @server_default 1
    */
   sentence_overlap?: integer
   /**
-   * Specifies the chunking strategy
-   * It could be either `sentence` or `word`
+   * The chunking strategy: `sentence` or `word`.
    * @server_default sentence
    */
   strategy?: string
diff --git a/specification/inference/put_openai/PutOpenAiRequest.ts b/specification/inference/put_openai/PutOpenAiRequest.ts
new file mode 100644
index 0000000000..886905600e
--- /dev/null
+++ b/specification/inference/put_openai/PutOpenAiRequest.ts
@@ -0,0 +1,144 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import {
+  InferenceChunkingSettings,
+  RateLimitSetting
+} from '@inference/_types/Services'
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { integer } from '@_types/Numeric'
+
+/**
+ * Create an OpenAI inference endpoint.
+ *
+ * Create an inference endpoint to perform an inference task with the `openai` service.
+ *
+ * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
+ * After creating the endpoint, wait for the model deployment to complete before using it.
+ * To verify the deployment status, use the get trained model statistics API.
+ * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
+ * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
+ * @rest_spec_name inference.put_openai
+ * @availability stack since=8.12.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @cluster_privileges manage_inference
+ * @doc_id inference-api-put-openai
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/{task_type}/{openai_inference_id}'
+      methods: ['PUT']
+    }
+  ]
+  path_parts: {
+    /**
+     * The type of the inference task that the model will perform.
+     * NOTE: The `chat_completion` task type only supports streaming and only through the _stream API.
+     */
+    task_type: OpenAITaskType
+    /**
+     * The unique identifier of the inference endpoint.
+     */
+    openai_inference_id: Id
+  }
+  body: {
+    /**
+     * The chunking configuration object.
+     * @ext_doc_id inference-chunking
+     */
+    chunking_settings?: InferenceChunkingSettings
+    /**
+     * The type of service supported for the specified task type. In this case, `openai`.
+     */
+    service: ServiceType
+    /**
+     * Settings used to install the inference model. These settings are specific to the `openai` service.
+     */
+    service_settings: OpenAIServiceSettings
+    /**
+     * Settings to configure the inference task.
+     * These settings are specific to the task type you specified.
+     */
+    task_settings?: OpenAITaskSettings
+  }
+}
+
+export enum OpenAITaskType {
+  chat_completion,
+  completion,
+  text_embedding
+}
+
+export enum ServiceType {
+  openai
+}
+
+export class OpenAIServiceSettings {
+  /**
+   * A valid API key of your OpenAI account.
+   * You can find your OpenAI API keys in your OpenAI account under the API keys section.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   * @ext_doc_id openai-api-keys
+   */
+  api_key: string
+  /**
+   * The number of dimensions the resulting output embeddings should have.
+   * It is supported only in `text-embedding-3` and later models.
+   * If it is not set, the OpenAI defined default for the model is used.
+   */
+  dimensions?: integer
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the OpenAI documentation for the list of available text embedding models.
+   * @ext_doc_id openai-models
+   */
+  model_id: string
+  /**
+   * The unique identifier for your organization.
+   * You can find the Organization ID in your OpenAI account under *Settings > Organizations*.
+   */
+  organization_id?: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from OpenAI.
+   * The `openai` service sets a default number of requests allowed per minute depending on the task type.
+   * For `text_embedding`, it is set to `3000`.
+   * For `completion`, it is set to `500`.
+   */
+  rate_limit?: RateLimitSetting
+  /**
+   * The URL endpoint to use for the requests.
+   * It can be changed for testing purposes.
+   * @server_default https://api.openai.com/v1/embeddings.
+   */
+  url?: string
+}
+
+export class OpenAITaskSettings {
+  /**
+   * For a `completion` or `text_embedding` task, specify the user issuing the request.
+   * This informaiton can be used for abuse detection.
+   */
+  user?: string
+}
diff --git a/specification/inference/put_openai/PutOpenAiResponse.ts b/specification/inference/put_openai/PutOpenAiResponse.ts
new file mode 100644
index 0000000000..d40639b031
--- /dev/null
+++ b/specification/inference/put_openai/PutOpenAiResponse.ts
@@ -0,0 +1,24 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { InferenceEndpointInfo } from '@inference/_types/Services'
+
+export class Response {
+  body: InferenceEndpointInfo
+}
diff --git a/specification/inference/put_openai/examples/request/PutOpenAiRequestExample1.yaml b/specification/inference/put_openai/examples/request/PutOpenAiRequestExample1.yaml
new file mode 100644
index 0000000000..5b1d6932a7
--- /dev/null
+++ b/specification/inference/put_openai/examples/request/PutOpenAiRequestExample1.yaml
@@ -0,0 +1,13 @@
+summary: A text embedding task
+description: Run `PUT _inference/text_embedding/openai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 128 dimensions.
+# method_request: "PUT _inference/text_embedding/openai-embeddings"
+# type: "request"
+value: |-
+  {
+      "service": "openai",
+      "service_settings": {
+          "api_key": "OpenAI-API-Key",
+          "model_id": "text-embedding-3-small",
+          "dimensions": 128
+      }
+  }
diff --git a/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml b/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml
new file mode 100644
index 0000000000..d21fd0d2aa
--- /dev/null
+++ b/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml
@@ -0,0 +1,12 @@
+summary: A completion task
+description: Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.
+# method_request: "PUT _inference/completion/openai-completion"
+# type: "request"
+value: |-
+  {
+      "service": "openai",
+      "service_settings": {
+          "api_key": "OpenAI-API-Key",
+          "model_id": "gpt-3.5-turbo"
+      }
+  }
diff --git a/specification/inference/put_watsonx/PutWatsonxRequest.ts b/specification/inference/put_watsonx/PutWatsonxRequest.ts
index 92b0eedb59..2721524316 100644
--- a/specification/inference/put_watsonx/PutWatsonxRequest.ts
+++ b/specification/inference/put_watsonx/PutWatsonxRequest.ts
@@ -24,7 +24,7 @@ import { Id } from '@_types/common'
 /**
  * Create a Watsonx inference endpoint.
  *
- * Creates an inference endpoint to perform an inference task with the `watsonxai` service.
+ * Create an inference endpoint to perform an inference task with the `watsonxai` service.
  * You need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.
  * You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.
  *