Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
0fb460a
Starting completion model
jonathan-buttner Dec 6, 2024
8ef932a
Adding model
jonathan-buttner Dec 6, 2024
bb97600
initial implementation of request and response handling, manager, and…
Dec 9, 2024
31f3f2c
Working response from openai
jonathan-buttner Dec 9, 2024
7213932
Update docs/changelog/118301.yaml
jonathan-buttner Dec 9, 2024
ae9dbf7
Fixing comment
jonathan-buttner Dec 10, 2024
147ba77
Adding some initial tests
jonathan-buttner Dec 11, 2024
9d4e02e
Moving tests around
jonathan-buttner Dec 11, 2024
9f78b40
Address some TODOs
jaybcee Dec 18, 2024
b09b3f5
Remove a TODO
jaybcee Dec 18, 2024
b92724b
[CI] Auto commit changes from spotless
Dec 18, 2024
25ab348
Delete docs/changelog/118301.yaml
jaybcee Dec 19, 2024
7168dc6
Rename EISUnifiedChatCompletionResponseHandler
jaybcee Dec 19, 2024
91daa01
Renames to ElasticInferenceServiceUnifiedCompletionRequestManager
jaybcee Dec 19, 2024
7842b6c
Renames EISUnifiedChatCompletionRequest
jaybcee Dec 19, 2024
7f44d04
Renames and comments
jaybcee Dec 19, 2024
afc8ebc
propagateTraceContext extraction
jaybcee Dec 19, 2024
346ceba
Clean up trace
jaybcee Dec 19, 2024
a206fab
Address comments
jaybcee Dec 23, 2024
cef606d
Update x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/…
jaybcee Jan 7, 2025
a5e91d9
Update x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/…
jaybcee Jan 7, 2025
6933c49
[CI] Auto commit changes from spotless
Jan 7, 2025
31fe29c
Address comments
jaybcee Jan 7, 2025
8ebe833
add default endpoint for EIS completion
Jan 7, 2025
1be3aca
Avoid using immutable map for constructing EISCompletionModel
Jan 7, 2025
0278776
Update docs/changelog/119694.yaml
maxhniebergall Jan 7, 2025
eeb12b3
actually include service settings
Jan 7, 2025
a619561
[CI] Auto commit changes from spotless
Jan 7, 2025
f9e6b7c
Update changemessage
maxhniebergall Jan 7, 2025
59c8791
match ElasticsearchInternalService implementation of defaults
Jan 8, 2025
5d430eb
Update tests
Jan 8, 2025
ad8c7ab
[CI] Auto commit changes from spotless
Jan 8, 2025
84b654c
[CI] Auto commit changes from spotless
Jan 8, 2025
1687bba
update model name constant
Jan 9, 2025
ef802b0
[CI] Auto commit changes from spotless
Jan 9, 2025
986654b
fix merge conflicts
Jan 9, 2025
1d79df7
remove uncessary comment
Jan 9, 2025
fa74489
remove todo
Jan 9, 2025
5b1a509
Replace local constant with class variable
Jan 9, 2025
1cd60f6
Delete docs/changelog/119694.yaml
maxhniebergall Jan 10, 2025
5a74d97
Merge branch 'main' into defaultEISEndpoint
jonathan-buttner Jan 10, 2025
8657c09
Adding the model id name and refactoring
jonathan-buttner Jan 13, 2025
69868af
Merge branch 'main' of github.com:elastic/elasticsearch into defaultE…
jonathan-buttner Jan 13, 2025
b06af9b
Refactoring so we only create the model once
jonathan-buttner Jan 13, 2025
c13fc53
Merge branch 'main' into defaultEISEndpoint
jonathan-buttner Jan 13, 2025
cb219e6
Merge branch 'main' into defaultEISEndpoint
jonathan-buttner Jan 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import java.util.stream.Stream;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.equalToIgnoringCase;
import static org.hamcrest.Matchers.hasSize;
Expand All @@ -58,7 +57,7 @@ public void testCRUD() throws IOException {
}

var getAllModels = getAllModels();
int numModels = 12;
int numModels = 13;
assertThat(getAllModels, hasSize(numModels));

var getSparseModels = getModels("_all", TaskType.SPARSE_EMBEDDING);
Expand Down Expand Up @@ -553,8 +552,8 @@ private static String expectedResult(String input) {
}
}

public void testGetZeroModels() throws IOException {
public void testGetCompletionModels() throws IOException {
var models = getModels("_all", TaskType.COMPLETION);
assertThat(models, empty());
assertEquals(models.size(), 1);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.inference.ChunkedInference;
import org.elasticsearch.inference.EmptySecretSettings;
import org.elasticsearch.inference.EmptyTaskSettings;
import org.elasticsearch.inference.InferenceServiceConfiguration;
import org.elasticsearch.inference.InferenceServiceResults;
import org.elasticsearch.inference.InputType;
Expand All @@ -42,6 +44,7 @@
import org.elasticsearch.xpack.inference.services.SenderService;
import org.elasticsearch.xpack.inference.services.ServiceComponents;
import org.elasticsearch.xpack.inference.services.elastic.completion.ElasticInferenceServiceCompletionModel;
import org.elasticsearch.xpack.inference.services.elastic.completion.ElasticInferenceServiceCompletionServiceSettings;
import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
import org.elasticsearch.xpack.inference.telemetry.TraceContext;

Expand All @@ -67,10 +70,14 @@ public class ElasticInferenceService extends SenderService {
public static final String NAME = "elastic";
public static final String ELASTIC_INFERENCE_SERVICE_IDENTIFIER = "Elastic Inference Service";

private final ElasticInferenceServiceComponents elasticInferenceServiceComponents;

private static final EnumSet<TaskType> supportedTaskTypes = EnumSet.of(TaskType.SPARSE_EMBEDDING, TaskType.COMPLETION);
private static final String SERVICE_NAME = "Elastic";
private static final String DEFAULT_EIS_CHAT_COMPLETION_MODEL_ID_V1 = "rainbow-sprinkles";
private static final String DEFAULT_EIS_CHAT_COMPLETION_ENDPOINT_ID_V1 = ".eis-alpha-1";
Copy link
Member

@joshdevins joshdevins Jan 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we name this the same way we name other default endpoints? I think we settled on modelId-providerName which would mean: .rainbow-sprinkles-elastic?

private static final Set<String> DEFAULT_EIS_ENDPOINT_IDS = Set.of(DEFAULT_EIS_CHAT_COMPLETION_ENDPOINT_ID_V1);

private final ElasticInferenceServiceComponents elasticInferenceServiceComponents;
private final List<Model> defaultEndpoints;

public ElasticInferenceService(
HttpRequestSender.Factory factory,
Expand All @@ -79,6 +86,23 @@ public ElasticInferenceService(
) {
super(factory, serviceComponents);
this.elasticInferenceServiceComponents = elasticInferenceServiceComponents;
this.defaultEndpoints = initDefaultEndpoints();
}

private List<Model> initDefaultEndpoints() {
return List.of(v1DefaultCompletionModel());
}

private ElasticInferenceServiceCompletionModel v1DefaultCompletionModel() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this initialization of the default endpoint conditional on the health check? We want to add an ACL in the gateway which will dynamically control if EIS is available or not. In the case that this customer does not have access to EIS, will it skip creating the default endpoint?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a great point. The current implementation does not consider that. I believe I also read that individual models might be available separately for different customers. Which means that we'd need EIS enabled for the customer, and this particular model.

@timgrein @maxjakob @jaybcee

Since it sounds like we're going to keep the information in memory to determine what is available to this cluster, I wonder if we could pass that information along via the ElasticInferenceServiceComponents in the constructor 🤔 . Although if we ever go with a caching approach then the default inference endpoints should attempt to be created on each call to retrieve the defaults 🤔

I think we should postpone merging this PR until the logic is merged to handle determine if EIS is available and which models are available.

return new ElasticInferenceServiceCompletionModel(
DEFAULT_EIS_CHAT_COMPLETION_ENDPOINT_ID_V1,
TaskType.COMPLETION,
NAME,
new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_EIS_CHAT_COMPLETION_MODEL_ID_V1, null),
EmptyTaskSettings.INSTANCE,
EmptySecretSettings.INSTANCE,
elasticInferenceServiceComponents
);
}

@Override
Expand Down Expand Up @@ -175,6 +199,17 @@ public void parseRequestConfig(
Map<String, Object> config,
ActionListener<Model> parsedModelListener
) {
if (DEFAULT_EIS_ENDPOINT_IDS.contains(inferenceEntityId)) {
parsedModelListener.onFailure(
new ElasticsearchStatusException(
"[{}] is a reserved inference Id. Cannot create a new inference endpoint with a reserved Id",
RestStatus.BAD_REQUEST,
inferenceEntityId
)
);
return;
}

Comment on lines +202 to +212
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nothing wrong here, but I don't quite understand whats going on. Can you explain

  1. What this is loosely?
  2. How could this happen?
  3. What is the result of it happening? Does ES fail to boot?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah sure:

What this is loosely?

During a PUT request to persist a new inference endpoint, this could is checking to see if the inferenceEntityId that the user is requesting for the new endpoint matches one of the default ones we've reserved. We don't allow that because it could cause clashes and we wouldn't know how to handle inference requests against that inference endpoint.

How could this happen?

This can happen when a user is creating a new inference endpoint like this:

PUT http://localhost:9200/_inference/.eis-alpha-1
{
    "service": "elastic",
    "task_type": "completion",
    "service_settings": {
         ...
    }
}

It's protecting us from a user trying to use a reserved value for their inference endpoint id

What is the result of it happening? Does ES fail to boot?

The user's PUT request will result in a 400 error. I don't believe this code path would be executed during boot up.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! Let's move this check to TransportPutInferenceModelAction then the check will apply to all the services.

ModelRegistry knows about the default Ids via the addDefaultIds(defaultIds) function which is called by the InferencePlugin at node start up.

In TransportPutInferenceModelAction#masterOperation the same check can be made by querying ModelRegistry

try {
Map<String, Object> serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS);
Map<String, Object> taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS);
Expand Down Expand Up @@ -210,6 +245,16 @@ public EnumSet<TaskType> supportedTaskTypes() {
return supportedTaskTypes;
}

@Override
public List<DefaultConfigId> defaultConfigIds() {
return List.of(new DefaultConfigId(DEFAULT_EIS_CHAT_COMPLETION_ENDPOINT_ID_V1, TaskType.COMPLETION, this));
}

@Override
public void defaultConfigs(ActionListener<List<Model>> defaultsListener) {
defaultsListener.onResponse(defaultEndpoints);
}

private static ElasticInferenceServiceModel createModel(
String inferenceEntityId,
TaskType taskType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ public class ElasticInferenceServiceSettings {
public ElasticInferenceServiceSettings(Settings settings) {
eisGatewayUrl = EIS_GATEWAY_URL.get(settings);
elasticInferenceServiceUrl = ELASTIC_INFERENCE_SERVICE_URL.get(settings);

}

public static List<Setting<?>> getSettingsDefinitions() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public ElasticInferenceServiceCompletionModel(

}

ElasticInferenceServiceCompletionModel(
public ElasticInferenceServiceCompletionModel(
String inferenceEntityId,
TaskType taskType,
String service,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.elasticsearch.common.ValidationException;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.inference.ModelConfigurations;
import org.elasticsearch.inference.ServiceSettings;
import org.elasticsearch.xcontent.XContentBuilder;
Expand Down Expand Up @@ -60,7 +61,7 @@ public static ElasticInferenceServiceCompletionServiceSettings fromMap(Map<Strin
private final String modelId;
private final RateLimitSettings rateLimitSettings;

public ElasticInferenceServiceCompletionServiceSettings(String modelId, RateLimitSettings rateLimitSettings) {
public ElasticInferenceServiceCompletionServiceSettings(String modelId, @Nullable RateLimitSettings rateLimitSettings) {
this.modelId = Objects.requireNonNull(modelId);
this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
}
Expand Down
Loading