From 886736c2f42ff079dc35ef7307fd7e5239139e67 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 13 Jun 2025 21:56:20 +0100 Subject: [PATCH] Fix model settings propagation when merging semantic text fields (#129438) Ensure that model settings are correctly set during mapping merges. While this is not an issue currently, since the underlying embedding field is not customizable, this fix is required for correct behavior in #119967. --- .../mapper/SemanticTextFieldMapper.java | 81 +++++++++++-------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 280241b5f240b..37974a044e75f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -212,7 +212,6 @@ public static class Builder extends FieldMapper.Builder { private final Parameter> meta = Parameter.metaParam(); - private MinimalServiceSettings resolvedModelSettings; private Function inferenceFieldBuilder; public static Builder from(SemanticTextFieldMapper mapper) { @@ -235,14 +234,18 @@ public Builder( super(name); this.modelRegistry = modelRegistry; this.useLegacyFormat = InferenceMetadataFieldsMapper.isEnabled(indexSettings.getSettings()) == false; - this.inferenceFieldBuilder = c -> createInferenceField( - c, - indexSettings.getIndexVersionCreated(), - useLegacyFormat, - modelSettings.get(), - bitSetProducer, - indexSettings - ); + this.inferenceFieldBuilder = c -> { + // Resolve the model setting from the registry if it has not been set yet. + var resolvedModelSettings = modelSettings.get() != null ? modelSettings.get() : getResolvedModelSettings(c, false); + return createInferenceField( + c, + indexSettings.getIndexVersionCreated(), + useLegacyFormat, + resolvedModelSettings, + bitSetProducer, + indexSettings + ); + }; } public Builder setInferenceId(String id) { @@ -283,29 +286,26 @@ protected void merge(FieldMapper mergeWith, Conflicts conflicts, MapperMergeCont inferenceFieldBuilder = c -> mergedInferenceField; } - @Override - public SemanticTextFieldMapper build(MapperBuilderContext context) { - if (useLegacyFormat && copyTo.copyToFields().isEmpty() == false) { - throw new IllegalArgumentException(CONTENT_TYPE + " field [" + leafName() + "] does not support [copy_to]"); - } - if (useLegacyFormat && multiFieldsBuilder.hasMultiFields()) { - throw new IllegalArgumentException(CONTENT_TYPE + " field [" + leafName() + "] does not support multi-fields"); + /** + * Returns the {@link MinimalServiceSettings} defined in this builder if set; + * otherwise, resolves and returns the settings from the registry. + */ + private MinimalServiceSettings getResolvedModelSettings(MapperBuilderContext context, boolean logWarning) { + if (context.getMergeReason() == MapperService.MergeReason.MAPPING_RECOVERY) { + // the model registry is not available yet + return null; } - - if (context.getMergeReason() != MapperService.MergeReason.MAPPING_RECOVERY && modelSettings.get() == null) { - try { - /* - * If the model is not already set and we are not in a recovery scenario, resolve it using the registry. - * Note: We do not set the model in the mapping at this stage. Instead, the model will be added through - * a mapping update during the first ingestion. - * This approach allows mappings to reference inference endpoints that may not yet exist. - * The only requirement is that the referenced inference endpoint must be available at the time of ingestion. - */ - resolvedModelSettings = modelRegistry.getMinimalServiceSettings(inferenceId.get()); - if (resolvedModelSettings != null) { - validateServiceSettings(resolvedModelSettings, null); - } - } catch (ResourceNotFoundException exc) { + try { + /* + * If the model is not already set and we are not in a recovery scenario, resolve it using the registry. + * Note: We do not set the model in the mapping at this stage. Instead, the model will be added through + * a mapping update during the first ingestion. + * This approach allows mappings to reference inference endpoints that may not yet exist. + * The only requirement is that the referenced inference endpoint must be available at the time of ingestion. + */ + return modelRegistry.getMinimalServiceSettings(inferenceId.get()); + } catch (ResourceNotFoundException exc) { + if (logWarning) { /* We allow the inference ID to be unregistered at this point. * This will delay the creation of sub-fields, so indexing and querying for this field won't work * until the corresponding inference endpoint is created. @@ -318,8 +318,22 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) { inferenceId.get() ); } - } else { - resolvedModelSettings = modelSettings.get(); + return null; + } + } + + @Override + public SemanticTextFieldMapper build(MapperBuilderContext context) { + if (useLegacyFormat && copyTo.copyToFields().isEmpty() == false) { + throw new IllegalArgumentException(CONTENT_TYPE + " field [" + leafName() + "] does not support [copy_to]"); + } + if (useLegacyFormat && multiFieldsBuilder.hasMultiFields()) { + throw new IllegalArgumentException(CONTENT_TYPE + " field [" + leafName() + "] does not support multi-fields"); + } + + var resolvedModelSettings = modelSettings.get(); + if (modelSettings.get() == null) { + resolvedModelSettings = getResolvedModelSettings(context, true); } if (modelSettings.get() != null) { @@ -366,7 +380,6 @@ private void validateServiceSettings(MinimalServiceSettings settings, MinimalSer + settings.taskType().name() ); } - if (resolved != null && settings.canMergeWith(resolved) == false) { throw new IllegalArgumentException( "Mismatch between provided and registered inference model settings. "