From 18c548d321329ae291d4f2683dbe40de2dc64252 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Sat, 16 Aug 2025 09:54:55 -0700 Subject: [PATCH 01/33] Replace transport version utils with a build service Several transport version build tasks have a need to access the repository-wide transport version resources. Thus far it has been done through several utility methods. This commit moves these utility methods into an encapsulated build service that is shared across the transport version tasks. The advantage is that no paths are used, the build service encapsulates access to the resources and understands internally how to find the correct filesystem path and load it. --- .../GenerateTransportVersionManifestTask.java | 19 +- .../transport/TransportVersionReference.java | 11 + .../TransportVersionReferencesPlugin.java | 9 +- .../TransportVersionResourcesPlugin.java | 14 +- .../TransportVersionResourcesService.java | 210 +++++++++++++++ .../transport/TransportVersionUtils.java | 56 ---- ...alidateTransportVersionReferencesTask.java | 22 +- ...ValidateTransportVersionResourcesTask.java | 239 ++++++------------ 8 files changed, 325 insertions(+), 255 deletions(-) create mode 100644 build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java delete mode 100644 build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionUtils.java diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/GenerateTransportVersionManifestTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/GenerateTransportVersionManifestTask.java index cb39a08a6aa44..153fab4723ac0 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/GenerateTransportVersionManifestTask.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/GenerateTransportVersionManifestTask.java @@ -10,10 +10,14 @@ package org.elasticsearch.gradle.internal.transport; import org.gradle.api.DefaultTask; -import org.gradle.api.file.DirectoryProperty; import org.gradle.api.file.RegularFileProperty; +import org.gradle.api.provider.Property; +import org.gradle.api.services.ServiceReference; import org.gradle.api.tasks.InputDirectory; +import org.gradle.api.tasks.Optional; import org.gradle.api.tasks.OutputFile; +import org.gradle.api.tasks.PathSensitive; +import org.gradle.api.tasks.PathSensitivity; import org.gradle.api.tasks.TaskAction; import java.io.IOException; @@ -24,15 +28,24 @@ import java.nio.file.attribute.BasicFileAttributes; public abstract class GenerateTransportVersionManifestTask extends DefaultTask { + + @ServiceReference("transportVersionResources") + abstract Property getTransportResources(); + @InputDirectory - public abstract DirectoryProperty getDefinitionsDirectory(); + @Optional + @PathSensitive(PathSensitivity.RELATIVE) + public Path getDefinitionsDirectory() { + return getTransportResources().get().getDefinitionsDir(); + } @OutputFile public abstract RegularFileProperty getManifestFile(); @TaskAction public void generateTransportVersionManifest() throws IOException { - Path definitionsDir = getDefinitionsDirectory().get().getAsFile().toPath(); + + Path definitionsDir = getDefinitionsDirectory(); Path manifestFile = getManifestFile().get().getAsFile().toPath(); try (var writer = Files.newBufferedWriter(manifestFile)) { Files.walkFileTree(definitionsDir, new SimpleFileVisitor<>() { diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionReference.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionReference.java index 5c89b41db799d..f94f4fc6d9b6b 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionReference.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionReference.java @@ -12,12 +12,15 @@ import org.gradle.api.attributes.Attribute; import org.gradle.api.attributes.AttributeContainer; +import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; import static org.gradle.api.artifacts.type.ArtifactTypeDefinition.ARTIFACT_TYPE_ATTRIBUTE; @@ -43,6 +46,14 @@ static void addArtifactAttribute(AttributeContainer attributes) { attributes.attribute(REFERENCES_ATTRIBUTE, true); } + static Set collectNames(Iterable referencesFiles) throws IOException { + Set names = new HashSet<>(); + for (var referencesFile : referencesFiles) { + listFromFile(referencesFile.toPath()).stream().map(TransportVersionReference::name).forEach(names::add); + } + return names; + } + @Override public String toString() { return name + "," + location; diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionReferencesPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionReferencesPlugin.java index 60012feac5da3..4850bd39bb242 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionReferencesPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionReferencesPlugin.java @@ -13,15 +13,12 @@ import org.gradle.api.Plugin; import org.gradle.api.Project; import org.gradle.api.artifacts.Configuration; -import org.gradle.api.file.Directory; import org.gradle.api.tasks.SourceSet; import org.gradle.language.base.plugins.LifecycleBasePlugin; -import static org.elasticsearch.gradle.internal.transport.TransportVersionUtils.getDefinitionsDirectory; -import static org.elasticsearch.gradle.internal.transport.TransportVersionUtils.getResourcesDirectory; - public class TransportVersionReferencesPlugin implements Plugin { + @Override public void apply(Project project) { project.getPluginManager().apply(LifecycleBasePlugin.class); @@ -46,10 +43,6 @@ public void apply(Project project) { .register("validateTransportVersionReferences", ValidateTransportVersionReferencesTask.class, t -> { t.setGroup("Transport Versions"); t.setDescription("Validates that all TransportVersion references used in the project have an associated definition file"); - Directory definitionsDir = getDefinitionsDirectory(getResourcesDirectory(project)); - if (definitionsDir.getAsFile().exists()) { - t.getDefinitionsDirectory().set(definitionsDir); - } t.getReferencesFile().set(collectTask.get().getOutputFile()); }); project.getTasks().named(LifecycleBasePlugin.CHECK_TASK_NAME).configure(t -> t.dependsOn(validateTask)); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java index 699cd4294ce9e..958e69c3ad983 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java @@ -20,15 +20,18 @@ import java.util.Map; -import static org.elasticsearch.gradle.internal.transport.TransportVersionUtils.getDefinitionsDirectory; -import static org.elasticsearch.gradle.internal.transport.TransportVersionUtils.getResourcesDirectory; - public class TransportVersionResourcesPlugin implements Plugin { @Override public void apply(Project project) { project.getPluginManager().apply(LifecycleBasePlugin.class); + project.getGradle().getSharedServices().registerIfAbsent("transportVersionResources", TransportVersionResourcesService.class, spec -> { + Directory transportResources = project.getLayout().getProjectDirectory().dir("src/main/resources/transport"); + spec.getParameters().getResourcesDirectory().set(transportResources); + spec.getParameters().getRootDirectory().set(project.getRootProject().getRootDir()); + }); + DependencyHandler depsHandler = project.getDependencies(); Configuration tvReferencesConfig = project.getConfigurations().create("globalTvReferences"); tvReferencesConfig.setCanBeConsumed(false); @@ -46,10 +49,6 @@ public void apply(Project project) { .register("validateTransportVersionDefinitions", ValidateTransportVersionResourcesTask.class, t -> { t.setGroup("Transport Versions"); t.setDescription("Validates that all defined TransportVersion constants are used in at least one project"); - Directory resourcesDir = getResourcesDirectory(project); - if (resourcesDir.getAsFile().exists()) { - t.getResourcesDirectory().set(resourcesDir); - } t.getReferencesFiles().setFrom(tvReferencesConfig); }); project.getTasks().named(LifecycleBasePlugin.CHECK_TASK_NAME).configure(t -> t.dependsOn(validateTask)); @@ -58,7 +57,6 @@ public void apply(Project project) { .register("generateTransportVersionManifest", GenerateTransportVersionManifestTask.class, t -> { t.setGroup("Transport Versions"); t.setDescription("Generate a manifest resource for all the known transport version definitions"); - t.getDefinitionsDirectory().set(getDefinitionsDirectory(getResourcesDirectory(project))); t.getManifestFile().set(project.getLayout().getBuildDirectory().file("generated-resources/manifest.txt")); }); project.getTasks().named(JavaPlugin.PROCESS_RESOURCES_TASK_NAME, Copy.class).configure(t -> { diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java new file mode 100644 index 0000000000000..241af929f1abd --- /dev/null +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java @@ -0,0 +1,210 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.gradle.internal.transport; + +import org.gradle.api.file.DirectoryProperty; +import org.gradle.api.services.BuildService; +import org.gradle.api.services.BuildServiceParameters; +import org.gradle.process.ExecOperations; +import org.gradle.process.ExecResult; + +import javax.inject.Inject; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiFunction; + +public abstract class TransportVersionResourcesService implements BuildService { + + public interface Parameters extends BuildServiceParameters { + DirectoryProperty getResourcesDirectory(); + DirectoryProperty getRootDirectory(); + } + + @Inject + public abstract ExecOperations getExecOperations(); + + private static final Path DEFINITIONS_DIR = Path.of("definitions"); + private static final Path NAMED_DIR = DEFINITIONS_DIR.resolve("named"); + private static final Path INITIAL_DIR = DEFINITIONS_DIR.resolve("initial"); + private static final Path LATEST_DIR = Path.of("latest"); + + private final Path resourcesDir; + private final Path rootDir; + private final AtomicReference> mainResources = new AtomicReference<>(null); + private final AtomicReference> changedResources = new AtomicReference<>(null); + + @Inject + public TransportVersionResourcesService(Parameters params) { + this.resourcesDir = params.getResourcesDirectory().get().getAsFile().toPath(); + this.rootDir = params.getRootDirectory().get().getAsFile().toPath(); + } + + /** + * Return the transport version resources directory for this repository. + * This should be an input to any tasks reading resources from this service. + */ + Path getResourcesDir() { + return resourcesDir; + } + + /** + * Return the transport version definitions directory for this repository. + * This should be an input to any tasks that only read definitions from this service. + */ + Path getDefinitionsDir() { + return resourcesDir.resolve(DEFINITIONS_DIR); + } + + // return the path, relative to the resources dir, of a named definition + private Path getNamedDefinitionRelativePath(String name) { + return NAMED_DIR.resolve(name + ".csv"); + } + + /** Return all named definitions, mapped by their name. */ + Map getNamedDefinitions() throws IOException { + Map definitions = new HashMap<>(); + // temporarily include initial in named until validation understands the distinction + for (var dir : List.of(NAMED_DIR, INITIAL_DIR)) { + try (var definitionsStream = Files.list(resourcesDir.resolve(dir))) { + for (var definitionFile : definitionsStream.toList()) { + String contents = Files.readString(definitionFile, StandardCharsets.UTF_8).strip(); + var definition = TransportVersionDefinition.fromString(definitionFile.getFileName().toString(), contents); + definitions.put(definition.name(), definition); + } + } + } + return definitions; + } + + /** Test whether the given named definition exists */ + TransportVersionDefinition getNamedDefinitionFromMain(String name) { + String resourcePath = getNamedDefinitionRelativePath(name).toString(); + return getMainFile(resourcePath, TransportVersionDefinition::fromString); + } + + /** Test whether the given named definition exists */ + boolean namedDefinitionExists(String name) { + return Files.exists(resourcesDir.resolve(getNamedDefinitionRelativePath(name))); + } + + /** Return the path within the repository of the given named definition */ + Path getRepositoryPath(TransportVersionDefinition definition) { + return rootDir.relativize(resourcesDir.resolve(getNamedDefinitionRelativePath(definition.name()))); + } + + /** Read all latest files and return them mapped by their release branch */ + Map getLatestByReleaseBranch() throws IOException { + Map latests = new HashMap<>(); + try (var stream = Files.list(resourcesDir.resolve(LATEST_DIR))) { + for (var latestFile : stream.toList()) { + String contents = Files.readString(latestFile, StandardCharsets.UTF_8).strip(); + var latest = TransportVersionLatest.fromString(latestFile.getFileName().toString(), contents); + latests.put(latest.name(), latest); + } + } + return latests; + } + + /** Retrieve the latest transport version for the given release branch on main */ + TransportVersionLatest getLatestFromMain(String releaseBranch) { + String resourcePath = getLatestRelativePath(releaseBranch).toString(); + return getMainFile(resourcePath, TransportVersionLatest::fromString); + } + + /** Return the path within the repository of the given latest */ + Path getRepositoryPath(TransportVersionLatest latest) { + return rootDir.relativize(resourcesDir.resolve(getLatestRelativePath(latest.branch()))); + } + + private Path getLatestRelativePath(String releaseBranch) { + return LATEST_DIR.resolve(releaseBranch + ".csv"); + } + + // Return the transport version resources paths that exist in main + private Set getMainResources() { + if (mainResources.get() == null) { + synchronized (mainResources) { + String output = gitCommand("ls-tree", "--name-only", "-r", "main", "."); + + HashSet resources = new HashSet<>(); + Collections.addAll(resources, output.split(System.lineSeparator())); + mainResources.set(resources); + } + } + return mainResources.get(); + } + + // Return the transport version resources paths that have been changed relative to main + private Set getChangedResources() { + if (changedResources.get() == null) { + synchronized (changedResources) { + String output = gitCommand("diff", "--name-only", "main", "."); + + HashSet resources = new HashSet<>(); + Collections.addAll(resources, output.split(System.lineSeparator())); + changedResources.set(resources); + } + } + return changedResources.get(); + } + + // Read a trasnport version resource from the main branch, or return null if it doesn't exist on main + private T getMainFile(String resourcePath, BiFunction parser) { + if (getMainResources().contains(resourcePath) == false) { + return null; + } + String content = gitCommand("show", "main:./" + resourcePath).strip(); + return parser.apply(resourcePath, content); + } + + // run a git command, relative to the transport version resources directory + private String gitCommand(String... args) { + ByteArrayOutputStream stdout = new ByteArrayOutputStream(); + + List command = new ArrayList<>(); + Collections.addAll(command, "git", "-C", getResourcesDir().toString()); + Collections.addAll(command, args); + + ExecResult result = getExecOperations().exec(spec -> { + spec.setCommandLine(command); + spec.setStandardOutput(stdout); + spec.setErrorOutput(stdout); + spec.setIgnoreExitValue(true); + }); + + if (result.getExitValue() != 0) { + throw new RuntimeException( + "git command failed with exit code " + + result.getExitValue() + + System.lineSeparator() + + "command: " + + String.join(" ", command) + + System.lineSeparator() + + "output:" + + System.lineSeparator() + + stdout.toString(StandardCharsets.UTF_8) + ); + } + + return stdout.toString(StandardCharsets.UTF_8); + } +} diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionUtils.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionUtils.java deleted file mode 100644 index 0aa2b173d2466..0000000000000 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionUtils.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.gradle.internal.transport; - -import org.gradle.api.Project; -import org.gradle.api.file.Directory; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; - -class TransportVersionUtils { - - static Path definitionFilePath(Directory resourcesDirectory, String name) { - return getDefinitionsDirectory(resourcesDirectory).getAsFile().toPath().resolve("named/" + name + ".csv"); - } - - static Path latestFilePath(Directory resourcesDirectory, String name) { - return getLatestDirectory(resourcesDirectory).getAsFile().toPath().resolve(name + ".csv"); - } - - static TransportVersionDefinition readDefinitionFile(Path file) throws IOException { - String contents = Files.readString(file, StandardCharsets.UTF_8).strip(); - return TransportVersionDefinition.fromString(file.getFileName().toString(), contents); - } - - static TransportVersionLatest readLatestFile(Path file) throws IOException { - String contents = Files.readString(file, StandardCharsets.UTF_8).strip(); - return TransportVersionLatest.fromString(file.getFileName().toString(), contents); - } - - static Directory getDefinitionsDirectory(Directory resourcesDirectory) { - return resourcesDirectory.dir("definitions"); - } - - static Directory getLatestDirectory(Directory resourcesDirectory) { - return resourcesDirectory.dir("latest"); - } - - static Directory getResourcesDirectory(Project project) { - var projectName = project.findProperty("org.elasticsearch.transport.definitionsProject"); - if (projectName == null) { - projectName = ":server"; - } - Directory projectDir = project.project(projectName.toString()).getLayout().getProjectDirectory(); - return projectDir.dir("src/main/resources/transport"); - } -} diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionReferencesTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionReferencesTask.java index 2a19900076ec7..2ddfeb2f4d060 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionReferencesTask.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionReferencesTask.java @@ -10,8 +10,9 @@ package org.elasticsearch.gradle.internal.transport; import org.gradle.api.DefaultTask; -import org.gradle.api.file.DirectoryProperty; import org.gradle.api.file.RegularFileProperty; +import org.gradle.api.provider.Property; +import org.gradle.api.services.ServiceReference; import org.gradle.api.tasks.CacheableTask; import org.gradle.api.tasks.InputDirectory; import org.gradle.api.tasks.InputFile; @@ -21,9 +22,7 @@ import org.gradle.api.tasks.TaskAction; import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; -import java.util.function.Predicate; /** * Validates that each transport version named reference has a constant definition. @@ -31,10 +30,15 @@ @CacheableTask public abstract class ValidateTransportVersionReferencesTask extends DefaultTask { + @ServiceReference("transportVersionResources") + abstract Property getTransportResources(); + @InputDirectory @Optional @PathSensitive(PathSensitivity.RELATIVE) - public abstract DirectoryProperty getDefinitionsDirectory(); + public Path getDefinitionsDir() { + return getTransportResources().get().getDefinitionsDir(); + } @InputFile @PathSensitive(PathSensitivity.RELATIVE) @@ -42,17 +46,11 @@ public abstract class ValidateTransportVersionReferencesTask extends DefaultTask @TaskAction public void validateTransportVersions() throws IOException { - final Predicate referenceChecker; - if (getDefinitionsDirectory().isPresent()) { - Path definitionsDir = getDefinitionsDirectory().getAsFile().get().toPath(); - referenceChecker = (name) -> Files.exists(definitionsDir.resolve("named/" + name + ".csv")); - } else { - referenceChecker = (name) -> false; - } Path namesFile = getReferencesFile().get().getAsFile().toPath(); + TransportVersionResourcesService resources = getTransportResources().get(); for (var tvReference : TransportVersionReference.listFromFile(namesFile)) { - if (referenceChecker.test(tvReference.name()) == false) { + if (resources.namedDefinitionExists(tvReference.name()) == false) { throw new RuntimeException( "TransportVersion.fromName(\"" + tvReference.name() diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java index 7e99720a6d76e..47131bb809768 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java @@ -13,7 +13,8 @@ import org.gradle.api.DefaultTask; import org.gradle.api.file.ConfigurableFileCollection; -import org.gradle.api.file.DirectoryProperty; +import org.gradle.api.provider.Property; +import org.gradle.api.services.ServiceReference; import org.gradle.api.tasks.CacheableTask; import org.gradle.api.tasks.InputDirectory; import org.gradle.api.tasks.InputFiles; @@ -21,34 +22,18 @@ import org.gradle.api.tasks.PathSensitive; import org.gradle.api.tasks.PathSensitivity; import org.gradle.api.tasks.TaskAction; -import org.gradle.process.ExecOperations; -import org.gradle.process.ExecResult; -import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Collections; +import java.util.Collection; import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.function.BiFunction; -import java.util.function.Function; import java.util.regex.Pattern; -import javax.inject.Inject; - -import static org.elasticsearch.gradle.internal.transport.TransportVersionReference.listFromFile; -import static org.elasticsearch.gradle.internal.transport.TransportVersionUtils.definitionFilePath; -import static org.elasticsearch.gradle.internal.transport.TransportVersionUtils.latestFilePath; -import static org.elasticsearch.gradle.internal.transport.TransportVersionUtils.readDefinitionFile; -import static org.elasticsearch.gradle.internal.transport.TransportVersionUtils.readLatestFile; - /** * Validates that each defined transport version constant is referenced by at least one project. */ @@ -58,7 +43,9 @@ public abstract class ValidateTransportVersionResourcesTask extends DefaultTask @InputDirectory @Optional @PathSensitive(PathSensitivity.RELATIVE) - public abstract DirectoryProperty getResourcesDirectory(); + public Path getResourcesDir() { + return getResources().get().getResourcesDir(); + } @InputFiles @PathSensitive(PathSensitivity.RELATIVE) @@ -68,197 +55,118 @@ private record IdAndDefinition(TransportVersionId id, TransportVersionDefinition private static final Pattern NAME_FORMAT = Pattern.compile("[a-z0-9_]+"); - private final Path rootPath; - private final ExecOperations execOperations; - - // all transport version names referenced - private final Set allNames = new HashSet<>(); - // direct lookup of definition by name - private final Map definitions = new HashMap<>(); - // which resource files already existed - private final Set existingResources = new HashSet<>(); - // reverse lookup of ids back to name - private final Map definedIds = new HashMap<>(); - // lookup of base ids back to definition - private final Map> idsByBase = new HashMap<>(); - // direct lookup of latest for each branch - Map latestByBranch = new HashMap<>(); - - @Inject - public ValidateTransportVersionResourcesTask(ExecOperations execOperations) { - this.execOperations = execOperations; - this.rootPath = getProject().getRootProject().getLayout().getProjectDirectory().getAsFile().toPath(); - } + @ServiceReference("transportVersionResources") + abstract Property getResources(); @TaskAction public void validateTransportVersions() throws IOException { - Path resourcesDir = getResourcesDirectory().getAsFile().get().toPath(); - Path definitionsDir = resourcesDir.resolve("definitions"); - Path latestDir = resourcesDir.resolve("latest"); - - // first check which resource files already exist in main - recordExistingResources(); + Set referencedNames = TransportVersionReference.collectNames(getReferencesFiles()); + Map definitions = getResources().get().getNamedDefinitions(); + Map> idsByBase = collectIdsByBase(definitions.values()); + Map latestByReleaseBranch = getResources().get().getLatestByReleaseBranch(); - // then collect all names referenced in the codebase - for (var referencesFile : getReferencesFiles()) { - listFromFile(referencesFile.toPath()).stream().map(TransportVersionReference::name).forEach(allNames::add); - } // now load all definitions, do some validation and record them by various keys for later quick lookup // NOTE: this must run after loading referenced names and existing definitions // NOTE: this is sorted so that the order of cross validation is deterministic - for (String subDir : List.of("initial", "named")) { - try (var definitionsStream = Files.list(definitionsDir.resolve(subDir)).sorted()) { - for (var definitionFile : definitionsStream.toList()) { - recordAndValidateDefinition(readDefinitionFile(definitionFile)); - } - } + for (var definition : definitions.values()) { + validateDefinition(definition, referencedNames); } // cleanup base lookup so we can check ids // NOTE: this must run after definition recording for (var entry : idsByBase.entrySet()) { - cleanupAndValidateBase(entry.getKey(), entry.getValue()); + validateBase(entry.getKey(), entry.getValue()); } // now load all latest versions and do validation // NOTE: this must run after definition recording and idsByBase cleanup - try (var latestStream = Files.list(latestDir)) { - for (var latestFile : latestStream.toList()) { - recordAndValidateLatest(readLatestFile(latestFile)); - } + + for (var latest : latestByReleaseBranch.values()) { + validateLatest(latest, definitions, idsByBase); } } - private String gitCommand(String... args) { - final ByteArrayOutputStream stdout = new ByteArrayOutputStream(); - - List command = new ArrayList<>(); - Collections.addAll(command, "git", "-C", rootPath.toAbsolutePath().toString()); - Collections.addAll(command, args); - - ExecResult result = execOperations.exec(spec -> { - spec.setCommandLine(command); - spec.setStandardOutput(stdout); - spec.setErrorOutput(stdout); - spec.setIgnoreExitValue(true); - }); - - if (result.getExitValue() != 0) { - throw new RuntimeException( - "git command failed with exit code " - + result.getExitValue() - + System.lineSeparator() - + "command: " - + String.join(" ", command) - + System.lineSeparator() - + "output:" - + System.lineSeparator() - + stdout.toString(StandardCharsets.UTF_8) - ); + private Map> collectIdsByBase(Collection definitions) { + Map> idsByBase = new HashMap<>(); + + // first collect all ids, organized by base + for (TransportVersionDefinition definition : definitions) { + for (TransportVersionId id : definition.ids()) { + idsByBase.computeIfAbsent(id.base(), k -> new ArrayList<>()).add(new IdAndDefinition(id, definition)); + } } - return stdout.toString(StandardCharsets.UTF_8); - } + // now sort the ids within each base so we can check density later + for (var ids : idsByBase.values()) { + // first sort the ids list so we can check compactness and quickly lookup the highest id later + ids.sort(Comparator.comparingInt(a -> a.id().complete())); + } - private void recordExistingResources() { - String resourcesPath = relativePath(getResourcesDirectory().getAsFile().get().toPath()); - String output = gitCommand("ls-tree", "--name-only", "-r", "main", resourcesPath); - Collections.addAll(existingResources, output.split(System.lineSeparator())); + return idsByBase; } - private void recordAndValidateDefinition(TransportVersionDefinition definition) { - definitions.put(definition.name(), definition); - // record the ids for each base id so we can ensure compactness later - for (TransportVersionId id : definition.ids()) { - idsByBase.computeIfAbsent(id.base(), k -> new ArrayList<>()).add(new IdAndDefinition(id, definition)); - } + private void validateDefinition(TransportVersionDefinition definition, Set referencedNames) { // validate any modifications Map existingIdsByBase = new HashMap<>(); - TransportVersionDefinition originalDefinition = readExistingDefinition(definition.name()); + TransportVersionDefinition originalDefinition = getResources().get().getNamedDefinitionFromMain(definition.name()); if (originalDefinition != null) { int primaryId = definition.ids().get(0).complete(); int originalPrimaryId = originalDefinition.ids().get(0).complete(); if (primaryId != originalPrimaryId) { - throwDefinitionFailure(definition.name(), "has modified primary id from " + originalPrimaryId + " to " + primaryId); + throwDefinitionFailure(definition, "has modified primary id from " + originalPrimaryId + " to " + primaryId); } originalDefinition.ids().forEach(id -> existingIdsByBase.put(id.base(), id)); } - if (allNames.contains(definition.name()) == false && definition.name().startsWith("initial_") == false) { - throwDefinitionFailure(definition.name(), "is not referenced"); + if (referencedNames.contains(definition.name()) == false && definition.name().startsWith("initial_") == false) { + throwDefinitionFailure(definition, "is not referenced"); } if (NAME_FORMAT.matcher(definition.name()).matches() == false) { - throwDefinitionFailure(definition.name(), "does not have a valid name, must be lowercase alphanumeric and underscore"); + throwDefinitionFailure(definition, "does not have a valid name, must be lowercase alphanumeric and underscore"); } if (definition.ids().isEmpty()) { - throwDefinitionFailure(definition.name(), "does not contain any ids"); + throwDefinitionFailure(definition, "does not contain any ids"); } if (Comparators.isInOrder(definition.ids(), Comparator.reverseOrder()) == false) { - throwDefinitionFailure(definition.name(), "does not have ordered ids"); + throwDefinitionFailure(definition, "does not have ordered ids"); } for (int ndx = 0; ndx < definition.ids().size(); ++ndx) { TransportVersionId id = definition.ids().get(ndx); - String existing = definedIds.put(id.complete(), definition.name()); - if (existing != null) { - throwDefinitionFailure( - definition.name(), - "contains id " + id + " already defined in [" + definitionRelativePath(existing) + "]" - ); - } - if (ndx == 0) { // TODO: initial versions will only be applicable to a release branch, so they won't have an associated // main version. They will also be loaded differently in the future, but until they are separate, we ignore them here. if (id.patch() != 0 && definition.name().startsWith("initial_") == false) { - throwDefinitionFailure(definition.name(), "has patch version " + id.complete() + " as primary id"); + throwDefinitionFailure(definition, "has patch version " + id.complete() + " as primary id"); } } else { if (id.patch() == 0) { - throwDefinitionFailure(definition.name(), "contains bwc id [" + id + "] with a patch part of 0"); + throwDefinitionFailure(definition, "contains bwc id [" + id + "] with a patch part of 0"); } } // check modifications of ids on same branch, ie sharing same base TransportVersionId maybeModifiedId = existingIdsByBase.get(id.base()); if (maybeModifiedId != null && maybeModifiedId.complete() != id.complete()) { - throwDefinitionFailure(definition.name(), "modifies existing patch id from " + maybeModifiedId + " to " + id); + throwDefinitionFailure(definition, "modifies existing patch id from " + maybeModifiedId + " to " + id); } } } - private TransportVersionDefinition readExistingDefinition(String name) { - return readExistingFile(name, this::definitionRelativePath, TransportVersionDefinition::fromString); - } - - private TransportVersionLatest readExistingLatest(String branch) { - return readExistingFile(branch, this::latestRelativePath, TransportVersionLatest::fromString); - } - - private T readExistingFile(String name, Function pathFunction, BiFunction parser) { - String relativePath = pathFunction.apply(name); - if (existingResources.contains(relativePath) == false) { - return null; - } - String content = gitCommand("show", "main:" + relativePath).strip(); - return parser.apply(relativePath, content); - } - - private void recordAndValidateLatest(TransportVersionLatest latest) { - latestByBranch.put(latest.branch(), latest); - + private void validateLatest(TransportVersionLatest latest, Map definitions, Map> idsByBase) { TransportVersionDefinition latestDefinition = definitions.get(latest.name()); if (latestDefinition == null) { - throwLatestFailure(latest.branch(), "contains transport version name [" + latest.name() + "] which is not defined"); + throwLatestFailure(latest, "contains transport version name [" + latest.name() + "] which is not defined"); } if (latestDefinition.ids().contains(latest.id()) == false) { + Path relativePath = getResources().get().getRepositoryPath(latestDefinition); throwLatestFailure( - latest.branch(), - "has id " + latest.id() + " which is not in definition [" + definitionRelativePath(latest.name()) + "]" + latest, + "has id " + latest.id() + " which is not in definition [" + relativePath + "]" ); } @@ -266,7 +174,7 @@ private void recordAndValidateLatest(TransportVersionLatest latest) { IdAndDefinition lastId = baseIds.getLast(); if (lastId.id().complete() != latest.id().complete()) { throwLatestFailure( - latest.branch(), + latest, "has id " + latest.id() + " from [" @@ -281,49 +189,44 @@ private void recordAndValidateLatest(TransportVersionLatest latest) { ); } - TransportVersionLatest existingLatest = readExistingLatest(latest.branch()); + TransportVersionLatest existingLatest = getResources().get().getLatestFromMain(latest.branch()); if (existingLatest != null) { if (latest.id().patch() != 0 && latest.id().base() != existingLatest.id().base()) { - throwLatestFailure(latest.branch(), "modifies base id from " + existingLatest.id().base() + " to " + latest.id().base()); + throwLatestFailure(latest, "modifies base id from " + existingLatest.id().base() + " to " + latest.id().base()); } } } - private void cleanupAndValidateBase(int base, List ids) { - // first sort the ids list so we can check compactness and quickly lookup the highest id later - ids.sort(Comparator.comparingInt(a -> a.id().complete())); - + private void validateBase(int base, List ids) { // TODO: switch this to a fully dense check once all existing transport versions have been migrated IdAndDefinition previous = ids.getLast(); for (int ndx = ids.size() - 2; ndx >= 0; --ndx) { - IdAndDefinition next = ids.get(ndx); - // note that next and previous are reversed here because we are iterating in reverse order - if (previous.id().complete() - 1 != next.id().complete()) { + IdAndDefinition current = ids.get(ndx); + + if (previous.id().equals(current.id())) { + Path existingDefinitionPath = getResources().get().getRepositoryPath(previous.definition); + throwDefinitionFailure( + current.definition(), + "contains id " + current.id + " already defined in [" + existingDefinitionPath + "]" + ); + } + + if (previous.id().complete() - 1 != current.id().complete()) { throw new IllegalStateException( - "Transport version base id " + base + " is missing patch ids between " + next.id() + " and " + previous.id() + "Transport version base id " + base + " is missing patch ids between " + current.id() + " and " + previous.id() ); } - previous = next; + previous = current; } } - private void throwDefinitionFailure(String name, String message) { - throw new IllegalStateException("Transport version definition file [" + definitionRelativePath(name) + "] " + message); - } - - private void throwLatestFailure(String branch, String message) { - throw new IllegalStateException("Latest transport version file [" + latestRelativePath(branch) + "] " + message); - } - - private String definitionRelativePath(String name) { - return relativePath(definitionFilePath(getResourcesDirectory().get(), name)); - } - - private String latestRelativePath(String branch) { - return relativePath(latestFilePath(getResourcesDirectory().get(), branch)); + private void throwDefinitionFailure(TransportVersionDefinition definition, String message) { + Path relativePath = getResources().get().getRepositoryPath(definition); + throw new IllegalStateException("Transport version definition file [" + relativePath + "] " + message); } - private String relativePath(Path file) { - return rootPath.relativize(file).toString(); + private void throwLatestFailure(TransportVersionLatest latest, String message) { + Path relativePath = getResources().get().getRepositoryPath(latest); + throw new IllegalStateException("Latest transport version file [" + relativePath + "] " + message); } } From 303ba81c6d10806fd6aaf52fbdae3207837edb01 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 16 Aug 2025 17:03:15 +0000 Subject: [PATCH 02/33] [CI] Auto commit changes from spotless --- .../transport/TransportVersionReferencesPlugin.java | 1 - .../transport/TransportVersionResourcesPlugin.java | 12 +++++++----- .../transport/TransportVersionResourcesService.java | 5 +++-- .../ValidateTransportVersionResourcesTask.java | 12 ++++++------ 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionReferencesPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionReferencesPlugin.java index 4850bd39bb242..da3f056825aeb 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionReferencesPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionReferencesPlugin.java @@ -18,7 +18,6 @@ public class TransportVersionReferencesPlugin implements Plugin { - @Override public void apply(Project project) { project.getPluginManager().apply(LifecycleBasePlugin.class); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java index 958e69c3ad983..4512cfb32cfac 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java @@ -26,11 +26,13 @@ public class TransportVersionResourcesPlugin implements Plugin { public void apply(Project project) { project.getPluginManager().apply(LifecycleBasePlugin.class); - project.getGradle().getSharedServices().registerIfAbsent("transportVersionResources", TransportVersionResourcesService.class, spec -> { - Directory transportResources = project.getLayout().getProjectDirectory().dir("src/main/resources/transport"); - spec.getParameters().getResourcesDirectory().set(transportResources); - spec.getParameters().getRootDirectory().set(project.getRootProject().getRootDir()); - }); + project.getGradle() + .getSharedServices() + .registerIfAbsent("transportVersionResources", TransportVersionResourcesService.class, spec -> { + Directory transportResources = project.getLayout().getProjectDirectory().dir("src/main/resources/transport"); + spec.getParameters().getResourcesDirectory().set(transportResources); + spec.getParameters().getRootDirectory().set(project.getRootProject().getRootDir()); + }); DependencyHandler depsHandler = project.getDependencies(); Configuration tvReferencesConfig = project.getConfigurations().create("globalTvReferences"); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java index 241af929f1abd..5e234aa38ed7f 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java @@ -15,8 +15,6 @@ import org.gradle.process.ExecOperations; import org.gradle.process.ExecResult; -import javax.inject.Inject; - import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -32,10 +30,13 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiFunction; +import javax.inject.Inject; + public abstract class TransportVersionResourcesService implements BuildService { public interface Parameters extends BuildServiceParameters { DirectoryProperty getResourcesDirectory(); + DirectoryProperty getRootDirectory(); } diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java index 47131bb809768..68478564569f0 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java @@ -65,7 +65,6 @@ public void validateTransportVersions() throws IOException { Map> idsByBase = collectIdsByBase(definitions.values()); Map latestByReleaseBranch = getResources().get().getLatestByReleaseBranch(); - // now load all definitions, do some validation and record them by various keys for later quick lookup // NOTE: this must run after loading referenced names and existing definitions // NOTE: this is sorted so that the order of cross validation is deterministic @@ -157,17 +156,18 @@ private void validateDefinition(TransportVersionDefinition definition, Set definitions, Map> idsByBase) { + private void validateLatest( + TransportVersionLatest latest, + Map definitions, + Map> idsByBase + ) { TransportVersionDefinition latestDefinition = definitions.get(latest.name()); if (latestDefinition == null) { throwLatestFailure(latest, "contains transport version name [" + latest.name() + "] which is not defined"); } if (latestDefinition.ids().contains(latest.id()) == false) { Path relativePath = getResources().get().getRepositoryPath(latestDefinition); - throwLatestFailure( - latest, - "has id " + latest.id() + " which is not in definition [" + relativePath + "]" - ); + throwLatestFailure(latest, "has id " + latest.id() + " which is not in definition [" + relativePath + "]"); } List baseIds = idsByBase.get(latest.id().base()); From 45235857742d2532c4dcbeead48c0bc49e6cfe0d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Sun, 17 Aug 2025 00:26:03 +0200 Subject: [PATCH 03/33] Mute org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT test {p0=search.vectors/45_knn_search_bit/Vector similarity with filter only} #133037 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 799f4672ade02..22e37979763a3 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -567,6 +567,9 @@ tests: - class: org.elasticsearch.xpack.remotecluster.RemoteClusterSecurityEsqlIT method: testCrossClusterEnrichWithOnlyRemotePrivs issue: https://github.com/elastic/elasticsearch/issues/133031 +- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT + method: test {p0=search.vectors/45_knn_search_bit/Vector similarity with filter only} + issue: https://github.com/elastic/elasticsearch/issues/133037 # Examples: # From 67effbdf8049fbaff3af8ed1fb36fb5723cea7a5 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Sun, 17 Aug 2025 08:24:59 +0200 Subject: [PATCH 04/33] Mute org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT test {p0=search.vectors/45_knn_search_bit/Vector rescoring has no effect for non-quantized vectors and provides same results as non-rescored knn} #133039 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 22e37979763a3..176019fd94e4d 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -570,6 +570,9 @@ tests: - class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT method: test {p0=search.vectors/45_knn_search_bit/Vector similarity with filter only} issue: https://github.com/elastic/elasticsearch/issues/133037 +- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT + method: test {p0=search.vectors/45_knn_search_bit/Vector rescoring has no effect for non-quantized vectors and provides same results as non-rescored knn} + issue: https://github.com/elastic/elasticsearch/issues/133039 # Examples: # From f1699d83cd94de3e4be1b5dba118fd40d8a04c1a Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Sun, 17 Aug 2025 19:55:13 -0700 Subject: [PATCH 05/33] Enable doc partitioning by default for time-series queries (#133038) With #132774, the overhead of running queries with DOC partitioning is small. While we might switch the default data partitioning to DOC for all queries in the future, this PR defaults data partitioning to DOC for time-series queries only to minimize any unexpected impact. Relates #132774 --- .../compute/lucene/DataPartitioning.java | 18 +++++++++++++++++- .../compute/lucene/LuceneSourceOperator.java | 3 ++- .../elasticsearch/compute/OperatorTests.java | 1 + .../lucene/LuceneQueryEvaluatorTests.java | 1 + .../lucene/LuceneSourceOperatorTests.java | 1 + .../ValueSourceReaderTypeConversionTests.java | 3 +++ .../read/ValuesSourceReaderOperatorTests.java | 3 +++ .../xpack/esql/action/LookupFromIndexIT.java | 5 +++-- .../planner/EsPhysicalOperationProviders.java | 1 + .../esql/planner/LocalExecutionPlanner.java | 9 +++++++++ 10 files changed, 41 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/DataPartitioning.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/DataPartitioning.java index 57aa49911146b..216cd895531b3 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/DataPartitioning.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/DataPartitioning.java @@ -7,9 +7,11 @@ package org.elasticsearch.compute.lucene; +import org.apache.lucene.search.Query; import org.elasticsearch.compute.operator.Driver; import java.util.List; +import java.util.function.Function; /** * How we partition the data across {@link Driver}s. Each request forks into @@ -54,5 +56,19 @@ public enum DataPartitioning { * their own tasks. See {@link LuceneSliceQueue#nextSlice(LuceneSlice)}. * */ - DOC + DOC; + + @FunctionalInterface + public interface AutoStrategy { + Function pickStrategy(int limit); + + AutoStrategy DEFAULT = LuceneSourceOperator.Factory::autoStrategy; + AutoStrategy DEFAULT_TIME_SERIES = limit -> { + if (limit == LuceneOperator.NO_LIMIT) { + return q -> LuceneSliceQueue.PartitioningStrategy.DOC; + } else { + return DEFAULT.pickStrategy(limit); + } + }; + } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneSourceOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneSourceOperator.java index 5201eede502df..5d0fc84727fea 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneSourceOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneSourceOperator.java @@ -68,6 +68,7 @@ public Factory( List contexts, Function> queryFunction, DataPartitioning dataPartitioning, + DataPartitioning.AutoStrategy autoStrategy, int taskConcurrency, int maxPageSize, int limit, @@ -77,7 +78,7 @@ public Factory( contexts, queryFunction, dataPartitioning, - autoStrategy(limit), + autoStrategy.pickStrategy(limit), taskConcurrency, limit, needsScore, diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java index 8185b045029b3..c247b4765548a 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java @@ -381,6 +381,7 @@ static LuceneOperator.Factory luceneOperatorFactory(IndexReader reader, List queryAndTags, randomFrom(DataPartitioning.values()), + DataPartitioning.AutoStrategy.DEFAULT, randomIntBetween(1, 10), randomPageSize(), limit, diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java index 2ef64623daa74..3879ab117d734 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java @@ -279,6 +279,7 @@ private static LuceneOperator.Factory luceneOperatorFactory(IndexReader reader, List.of(searchContext), ctx -> List.of(new LuceneSliceQueue.QueryAndTags(query, List.of())), randomFrom(DataPartitioning.values()), + DataPartitioning.AutoStrategy.DEFAULT, randomIntBetween(1, 10), randomPageSize(), LuceneOperator.NO_LIMIT, diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSourceOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSourceOperatorTests.java index 91b8de1a08573..cc21d180113a0 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSourceOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSourceOperatorTests.java @@ -224,6 +224,7 @@ private LuceneSourceOperator.Factory simple(DataPartitioning dataPartitioning, i List.of(ctx), queryFunction, dataPartitioning, + DataPartitioning.AutoStrategy.DEFAULT, taskConcurrency, maxPageSize, limit, diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java index 5a1f2ee7cc949..6de494fe26b6a 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java @@ -277,6 +277,7 @@ private SourceOperator simpleInput(DriverContext context, int size, int commitEv shardContexts, ctx -> List.of(new LuceneSliceQueue.QueryAndTags(new MatchAllDocsQuery(), List.of())), DataPartitioning.SHARD, + DataPartitioning.AutoStrategy.DEFAULT, 1,// randomIntBetween(1, 10), pageSize, LuceneOperator.NO_LIMIT, @@ -1312,6 +1313,7 @@ public void testWithNulls() throws IOException { List.of(shardContext), ctx -> List.of(new LuceneSliceQueue.QueryAndTags(new MatchAllDocsQuery(), List.of())), randomFrom(DataPartitioning.values()), + DataPartitioning.AutoStrategy.DEFAULT, randomIntBetween(1, 10), randomPageSize(), LuceneOperator.NO_LIMIT, @@ -1473,6 +1475,7 @@ public void testManyShards() throws IOException { contexts, ctx -> List.of(new LuceneSliceQueue.QueryAndTags(new MatchAllDocsQuery(), List.of())), DataPartitioning.SHARD, + DataPartitioning.AutoStrategy.DEFAULT, randomIntBetween(1, 10), 1000, LuceneOperator.NO_LIMIT, diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java index 19a645c146242..2dd0e7c4de41b 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java @@ -195,6 +195,7 @@ private SourceOperator sourceOperator(DriverContext context, int pageSize) { List.of(new LuceneSourceOperatorTests.MockShardContext(reader, 0)), ctx -> List.of(new LuceneSliceQueue.QueryAndTags(new MatchAllDocsQuery(), List.of())), DataPartitioning.SHARD, + DataPartitioning.AutoStrategy.DEFAULT, randomIntBetween(1, 10), pageSize, LuceneOperator.NO_LIMIT, @@ -1506,6 +1507,7 @@ public void testWithNulls() throws IOException { List.of(new LuceneSourceOperatorTests.MockShardContext(reader, 0)), ctx -> List.of(new LuceneSliceQueue.QueryAndTags(new MatchAllDocsQuery(), List.of())), randomFrom(DataPartitioning.values()), + DataPartitioning.AutoStrategy.DEFAULT, randomIntBetween(1, 10), randomPageSize(), LuceneOperator.NO_LIMIT, @@ -1755,6 +1757,7 @@ public void testManyShards() throws IOException { contexts, ctx -> List.of(new LuceneSliceQueue.QueryAndTags(new MatchAllDocsQuery(), List.of())), DataPartitioning.SHARD, + DataPartitioning.AutoStrategy.DEFAULT, randomIntBetween(1, 10), 1000, LuceneOperator.NO_LIMIT, diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupFromIndexIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupFromIndexIT.java index dc5e815a85697..72427a5afc9d6 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupFromIndexIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupFromIndexIT.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.esql.action; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.action.ActionListener; @@ -25,6 +24,7 @@ import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.LongVector; import org.elasticsearch.compute.lucene.DataPartitioning; +import org.elasticsearch.compute.lucene.LuceneOperator; import org.elasticsearch.compute.lucene.LuceneSliceQueue; import org.elasticsearch.compute.lucene.LuceneSourceOperator; import org.elasticsearch.compute.lucene.ShardContext; @@ -280,9 +280,10 @@ private void runLookup(List keyTypes, PopulateIndices populateIndices) List.of(esqlContext), ctx -> List.of(new LuceneSliceQueue.QueryAndTags(new MatchAllDocsQuery(), List.of())), DataPartitioning.SEGMENT, + DataPartitioning.AutoStrategy.DEFAULT, 1, 10000, - DocIdSetIterator.NO_MORE_DOCS, + LuceneOperator.NO_LIMIT, false // no scoring ); List fieldInfos = new ArrayList<>(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index e0b570267899b..d5101e6b8be7c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -292,6 +292,7 @@ public final PhysicalOperation sourcePhysicalOperation(EsQueryExec esQueryExec, shardContexts, querySupplier(esQueryExec.query()), context.queryPragmas().dataPartitioning(physicalSettings.defaultDataPartitioning()), + context.autoPartitioningStrategy().get(), context.queryPragmas().taskConcurrency(), context.pageSize(rowEstimatedSize), limit, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index ec1539fa3ae38..5ae15f4c0e844 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -20,6 +20,7 @@ import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.compute.data.LocalCircuitBreaker; import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.lucene.DataPartitioning; import org.elasticsearch.compute.lucene.LuceneOperator; import org.elasticsearch.compute.operator.ChangePointOperator; import org.elasticsearch.compute.operator.ColumnExtractOperator; @@ -117,6 +118,7 @@ import org.elasticsearch.xpack.esql.plan.physical.RrfScoreEvalExec; import org.elasticsearch.xpack.esql.plan.physical.SampleExec; import org.elasticsearch.xpack.esql.plan.physical.ShowExec; +import org.elasticsearch.xpack.esql.plan.physical.TimeSeriesAggregateExec; import org.elasticsearch.xpack.esql.plan.physical.TimeSeriesSourceExec; import org.elasticsearch.xpack.esql.plan.physical.TopNExec; import org.elasticsearch.xpack.esql.plan.physical.inference.CompletionExec; @@ -201,6 +203,7 @@ public LocalExecutionPlanner( * turn the given plan into a list of drivers to execute */ public LocalExecutionPlan plan(String description, FoldContext foldCtx, PhysicalPlan localPhysicalPlan) { + var context = new LocalExecutionPlannerContext( description, new ArrayList<>(), @@ -210,6 +213,11 @@ public LocalExecutionPlan plan(String description, FoldContext foldCtx, Physical blockFactory, foldCtx, settings, + new Holder<>( + localPhysicalPlan.anyMatch(p -> p instanceof TimeSeriesAggregateExec) + ? DataPartitioning.AutoStrategy.DEFAULT_TIME_SERIES + : DataPartitioning.AutoStrategy.DEFAULT + ), shardContexts ); @@ -1012,6 +1020,7 @@ public record LocalExecutionPlannerContext( BlockFactory blockFactory, FoldContext foldCtx, Settings settings, + Holder autoPartitioningStrategy, List shardContexts ) { void addDriverFactory(DriverFactory driverFactory) { From 3f3a7c1e2d5a50a3e1c97b47ddacf8128687c23e Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 18 Aug 2025 11:18:05 +0700 Subject: [PATCH 06/33] Streamline and optimize check to use decode or decode ordinals. (#133047) Comparing bitsPerOrd == -1 should be a little bit more optimal compared to maxOrd >= 0 (int vs long comparison). --- .../es819/ES819TSDBDocValuesProducer.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 6d709279902b4..2ef0a5aaa1660 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -1276,10 +1276,10 @@ public long longValue() throws IOException { valuesData.seek(indexReader.get(blockIndex)); } currentBlockIndex = blockIndex; - if (maxOrd >= 0) { - decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); - } else { + if (bitsPerOrd == -1) { decoder.decode(valuesData, currentBlock); + } else { + decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); } return currentBlock[blockInIndex]; } @@ -1347,7 +1347,7 @@ long lookAheadValueAt(int targetDoc) throws IOException { if (lookaheadBlockIndex + 1 != blockIndex) { lookaheadData.seek(indexReader.get(blockIndex)); } - if (maxOrd == -1L) { + if (bitsPerOrd == -1) { decoder.decode(lookaheadData, lookaheadBlock); } else { decoder.decodeOrdinals(lookaheadData, lookaheadBlock, bitsPerOrd); @@ -1417,10 +1417,10 @@ public long longValue() throws IOException { valuesData.seek(indexReader.get(blockIndex)); } currentBlockIndex = blockIndex; - if (maxOrd >= 0) { - decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); - } else { + if (bitsPerOrd == -1) { decoder.decode(valuesData, currentBlock); + } else { + decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); } } return currentBlock[blockInIndex]; @@ -1452,10 +1452,10 @@ long advance(long index) throws IOException { valuesData.seek(indexReader.get(blockIndex)); } currentBlockIndex = blockIndex; - if (bitsPerOrd >= 0) { - decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); - } else { + if (bitsPerOrd == -1) { decoder.decode(valuesData, currentBlock); + } else { + decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); } } return currentBlock[blockInIndex]; From 9089ab59689b0ef2307d7ad252d01d20a01c3848 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Sun, 17 Aug 2025 21:57:02 -0700 Subject: [PATCH 07/33] Fix update expiration for async query (#133021) Async queries in EQL and ES|QL do not create an initial response, and the current logic does not correctly handle expiration updates when the query has already completed. With initial response (no change): First, update the expiration in the async index, then update the task's expiration if the task still exists. Without initial response: First, try to update the task's expiration, then attempt to get the result from the task or async index. If the result is no longer available from the task, update the expiration in the async index before retrieving it (similar to the initial response case). This second step was introduced in this fix. Ideally, we should always create the initial response up front to unify the logic for both async_search and async_query, but this fix is preferred for now as it is more contained. When reviewing the code, I also found a race condition where async-get can return a NOT_FOUND error if the task completes but has not yet stored its result in the async index. This issue would also be resolved by storing an initial response up front. I will open a follow-up issue for it. Closes #130619 --- docs/changelog/133021.yaml | 6 ++ .../xpack/core/async/AsyncResultsService.java | 70 ++++++++------ .../core/async/AsyncResultsServiceTests.java | 16 ++-- .../esql/action/AsyncEsqlQueryActionIT.java | 91 +++++++++++++++++++ 4 files changed, 148 insertions(+), 35 deletions(-) create mode 100644 docs/changelog/133021.yaml diff --git a/docs/changelog/133021.yaml b/docs/changelog/133021.yaml new file mode 100644 index 0000000000000..f25dd723b4102 --- /dev/null +++ b/docs/changelog/133021.yaml @@ -0,0 +1,6 @@ +pr: 133021 +summary: Fix update expiration for async query +area: ES|QL +type: bug +issues: + - 130619 diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/async/AsyncResultsService.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/async/AsyncResultsService.java index 2236304d8c4d0..1a0254f7d8471 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/async/AsyncResultsService.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/async/AsyncResultsService.java @@ -12,6 +12,7 @@ import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.update.UpdateResponse; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.TriFunction; @@ -86,23 +87,10 @@ public void retrieveResult(GetAsyncResultRequest request, ActionListener 0) { - store.updateExpirationTime( - searchId.getDocId(), + updateExpirationTime( + searchId, expirationTime, - ActionListener.wrap(p -> getSearchResponseFromTask(searchId, request, nowInMillis, expirationTime, listener), exc -> { - RestStatus status = ExceptionsHelper.status(ExceptionsHelper.unwrapCause(exc)); - if (status != RestStatus.NOT_FOUND) { - logger.error( - () -> format("failed to update expiration time for async-search [%s]", searchId.getEncoded()), - exc - ); - listener.onFailure(exc); - } else { - // the async search document or its index is not found. - // That can happen if an invalid/deleted search id is provided. - listener.onFailure(new ResourceNotFoundException(searchId.getEncoded())); - } - }) + listener.delegateFailure((l, unused) -> getSearchResponseFromTask(searchId, request, nowInMillis, expirationTime, l)) ); } else { getSearchResponseFromTask(searchId, request, nowInMillis, expirationTime, listener); @@ -122,7 +110,7 @@ private void getSearchResponseFromTask( try { final Task task = store.getTaskAndCheckAuthentication(taskManager, searchId, asyncTaskClass); if (task == null || (updateInitialResultsInStore && task.isCancelled())) { - getSearchResponseFromIndex(searchId, request, nowInMillis, listener); + getSearchResponseFromIndexAndUpdateExpiration(searchId, request, nowInMillis, expirationTimeMillis, listener); return; } @@ -137,30 +125,40 @@ private void getSearchResponseFromTask( if (added == false) { // the task must have completed, since we cannot add a completion listener assert store.getTaskAndCheckAuthentication(taskManager, searchId, asyncTaskClass) == null; - getSearchResponseFromIndex(searchId, request, nowInMillis, listener); + getSearchResponseFromIndexAndUpdateExpiration(searchId, request, nowInMillis, expirationTimeMillis, listener); } } catch (Exception exc) { listener.onFailure(exc); } } - private void getSearchResponseFromIndex( + private void getSearchResponseFromIndexAndUpdateExpiration( AsyncExecutionId searchId, GetAsyncResultRequest request, long nowInMillis, - ActionListener listener + long expirationTime, + ActionListener outListener ) { - store.getResponse(searchId, true, listener.delegateFailure((l, response) -> { - try { - sendFinalResponse(request, response, nowInMillis, l); - } finally { - if (response instanceof StoredAsyncResponse storedAsyncResponse - && storedAsyncResponse.getResponse() instanceof RefCounted refCounted) { - refCounted.decRef(); + var updateListener = outListener.delegateFailure((listener, unused) -> { + store.getResponse(searchId, true, listener.delegateFailure((l, response) -> { + try { + sendFinalResponse(request, response, nowInMillis, l); + } finally { + if (response instanceof StoredAsyncResponse storedAsyncResponse + && storedAsyncResponse.getResponse() instanceof RefCounted refCounted) { + refCounted.decRef(); + } } - } - })); + })); + }); + // If updateInitialResultsInStore=false, we can't update expiration while the task is running since the document doesn't exist yet. + // So let's update the expiration here when the task has been completed. + if (updateInitialResultsInStore == false && expirationTime != -1) { + updateExpirationTime(searchId, expirationTime, updateListener.map(unused -> null)); + } else { + updateListener.onResponse(null); + } } private void sendFinalResponse(GetAsyncResultRequest request, Response response, long nowInMillis, ActionListener listener) { @@ -172,4 +170,18 @@ private void sendFinalResponse(GetAsyncResultRequest request, Response response, listener.onResponse(response); } + + private void updateExpirationTime(AsyncExecutionId searchId, long expirationTime, ActionListener listener) { + store.updateExpirationTime(searchId.getDocId(), expirationTime, listener.delegateResponse((l, e) -> { + RestStatus status = ExceptionsHelper.status(ExceptionsHelper.unwrapCause(e)); + if (status != RestStatus.NOT_FOUND) { + logger.error(() -> format("failed to update expiration time for async-search [%s]", searchId.getEncoded()), e); + l.onFailure(e); + } else { + // the async search document or its index is not found. + // That can happen if an invalid/deleted search id is provided. + l.onFailure(new ResourceNotFoundException(searchId.getEncoded())); + } + })); + } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/async/AsyncResultsServiceTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/async/AsyncResultsServiceTests.java index 1c69a6a52951a..5e304530b064f 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/async/AsyncResultsServiceTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/async/AsyncResultsServiceTests.java @@ -231,8 +231,11 @@ public void testAssertExpirationPropagation() throws Exception { try { long startTime = System.currentTimeMillis(); task.setExpirationTime(startTime + TimeValue.timeValueMinutes(1).getMillis()); - - if (updateInitialResultsInStore) { + boolean taskCompleted = randomBoolean(); + if (taskCompleted) { + taskManager.unregister(task); + } + if (taskCompleted || updateInitialResultsInStore) { // we need to store initial result PlainActionFuture future = new PlainActionFuture<>(); indexService.createResponse( @@ -249,10 +252,11 @@ public void testAssertExpirationPropagation() throws Exception { // not waiting for completion, so should return immediately with timeout service.retrieveResult(new GetAsyncResultRequest(task.getExecutionId().getEncoded()).setKeepAlive(newKeepAlive), listener); listener.actionGet(TimeValue.timeValueSeconds(10)); - assertThat(task.getExpirationTime(), greaterThanOrEqualTo(startTime + newKeepAlive.getMillis())); - assertThat(task.getExpirationTime(), lessThanOrEqualTo(System.currentTimeMillis() + newKeepAlive.getMillis())); - - if (updateInitialResultsInStore) { + if (taskCompleted == false) { + assertThat(task.getExpirationTime(), greaterThanOrEqualTo(startTime + newKeepAlive.getMillis())); + assertThat(task.getExpirationTime(), lessThanOrEqualTo(System.currentTimeMillis() + newKeepAlive.getMillis())); + } + if (updateInitialResultsInStore || taskCompleted) { PlainActionFuture future = new PlainActionFuture<>(); indexService.getResponse(task.executionId, randomBoolean(), future); TestAsyncResponse response = future.actionGet(TimeValue.timeValueMinutes(10)); diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/AsyncEsqlQueryActionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/AsyncEsqlQueryActionIT.java index 34b94207c5a8d..2aa731eaa5d29 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/AsyncEsqlQueryActionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/AsyncEsqlQueryActionIT.java @@ -8,15 +8,21 @@ package org.elasticsearch.xpack.esql.action; import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.compute.operator.DriverTaskRunner; import org.elasticsearch.compute.operator.exchange.ExchangeService; import org.elasticsearch.core.TimeValue; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.tasks.CancellableTask; import org.elasticsearch.tasks.TaskCancelledException; import org.elasticsearch.tasks.TaskInfo; +import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; +import org.elasticsearch.xpack.core.XPackPlugin; +import org.elasticsearch.xpack.core.async.AsyncExecutionId; +import org.elasticsearch.xpack.core.async.AsyncTaskIndexService; import org.elasticsearch.xpack.core.async.DeleteAsyncResultRequest; import org.elasticsearch.xpack.core.async.GetAsyncResultRequest; import org.elasticsearch.xpack.core.async.TransportDeleteAsyncResultAction; @@ -40,6 +46,7 @@ import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; @@ -260,6 +267,90 @@ private void testFinishingBeforeTimeout(boolean keepOnCompletion) { } } + public void testUpdateKeepAlive() throws Exception { + long nowInMillis = System.currentTimeMillis(); + TimeValue keepAlive = timeValueSeconds(between(30, 60)); + var request = EsqlQueryRequestBuilder.newAsyncEsqlQueryRequestBuilder(client()) + .query("from test | stats sum(pause_me)") + .pragmas(queryPragmas()) + .waitForCompletionTimeout(TimeValue.timeValueMillis(between(1, 10))) + .keepOnCompletion(randomBoolean()) + .keepAlive(keepAlive); + final String asyncId; + long currentExpiration; + try { + try (EsqlQueryResponse initialResponse = request.execute().actionGet(60, TimeUnit.SECONDS)) { + assertThat(initialResponse.isRunning(), is(true)); + assertTrue(initialResponse.asyncExecutionId().isPresent()); + asyncId = initialResponse.asyncExecutionId().get(); + } + currentExpiration = getExpirationFromTask(asyncId); + assertThat(currentExpiration, greaterThanOrEqualTo(nowInMillis + keepAlive.getMillis())); + // update the expiration while the task is still running + int iters = iterations(1, 5); + for (int i = 0; i < iters; i++) { + long extraKeepAlive = randomIntBetween(30, 60); + keepAlive = TimeValue.timeValueSeconds(keepAlive.seconds() + extraKeepAlive); + GetAsyncResultRequest getRequest = new GetAsyncResultRequest(asyncId).setKeepAlive(keepAlive); + try (var resp = client().execute(EsqlAsyncGetResultAction.INSTANCE, getRequest).actionGet()) { + assertThat(resp.asyncExecutionId(), isPresent()); + assertThat(resp.asyncExecutionId().get(), equalTo(asyncId)); + assertTrue(resp.isRunning()); + } + long updatedExpiration = getExpirationFromTask(asyncId); + assertThat(updatedExpiration, greaterThanOrEqualTo(currentExpiration + extraKeepAlive)); + assertThat(updatedExpiration, greaterThanOrEqualTo(nowInMillis + keepAlive.getMillis())); + currentExpiration = updatedExpiration; + } + } finally { + scriptPermits.release(numberOfDocs()); + } + // allow the query to complete, then update the expiration with the result is being stored in the async index + assertBusy(() -> { + GetAsyncResultRequest getRequest = new GetAsyncResultRequest(asyncId); + try (var resp = client().execute(EsqlAsyncGetResultAction.INSTANCE, getRequest).actionGet()) { + assertThat(resp.isRunning(), is(false)); + } + }); + // update the keepAlive after the query has completed + int iters = between(1, 5); + for (int i = 0; i < iters; i++) { + long extraKeepAlive = randomIntBetween(30, 60); + keepAlive = TimeValue.timeValueSeconds(keepAlive.seconds() + extraKeepAlive); + GetAsyncResultRequest getRequest = new GetAsyncResultRequest(asyncId).setKeepAlive(keepAlive); + try (var resp = client().execute(EsqlAsyncGetResultAction.INSTANCE, getRequest).actionGet()) { + assertThat(resp.isRunning(), is(false)); + } + long updatedExpiration = getExpirationFromDoc(asyncId); + assertThat(updatedExpiration, greaterThanOrEqualTo(currentExpiration + extraKeepAlive)); + assertThat(updatedExpiration, greaterThanOrEqualTo(nowInMillis + keepAlive.getMillis())); + currentExpiration = updatedExpiration; + } + } + + private static long getExpirationFromTask(String asyncId) { + List tasks = new ArrayList<>(); + for (TransportService ts : internalCluster().getInstances(TransportService.class)) { + for (CancellableTask task : ts.getTaskManager().getCancellableTasks().values()) { + if (task instanceof EsqlQueryTask queryTask) { + EsqlQueryResponse result = queryTask.getCurrentResult(); + if (result.isAsync() && result.asyncExecutionId().get().equals(asyncId)) { + tasks.add(queryTask); + } + } + } + } + assertThat(tasks, hasSize(1)); + return tasks.getFirst().getExpirationTimeMillis(); + } + + private static long getExpirationFromDoc(String asyncId) { + String docId = AsyncExecutionId.decode(asyncId).getDocId(); + GetResponse doc = client().prepareGet().setIndex(XPackPlugin.ASYNC_RESULTS_INDEX).setId(docId).get(); + assertTrue(doc.isExists()); + return ((Number) doc.getSource().get(AsyncTaskIndexService.EXPIRATION_TIME_FIELD)).longValue(); + } + private List getEsqlQueryTasks() throws Exception { List foundTasks = new ArrayList<>(); assertBusy(() -> { From a10d8e3c316d8327e34318327b756e332d5cac08 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 18 Aug 2025 12:37:49 +0700 Subject: [PATCH 08/33] Slightly improve TrackingPostingsInMemoryBytesCodec (#132905) Replace int hashmap by a counter in TrackingPostingsInMemoryBytesCodec and use int hashset to keep track of seen fields. --- .../TrackingPostingsInMemoryBytesCodec.java | 38 +++++++++++-------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/TrackingPostingsInMemoryBytesCodec.java b/server/src/main/java/org/elasticsearch/index/codec/TrackingPostingsInMemoryBytesCodec.java index 92aebd83398ce..f9cd8e87a7758 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/TrackingPostingsInMemoryBytesCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/TrackingPostingsInMemoryBytesCodec.java @@ -22,7 +22,7 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.internal.hppc.IntIntHashMap; +import org.apache.lucene.internal.hppc.IntHashSet; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.util.FeatureFlag; @@ -63,22 +63,22 @@ public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException static final class TrackingLengthFieldsConsumer extends FieldsConsumer { final SegmentWriteState state; final FieldsConsumer in; - final IntIntHashMap termsBytesPerField; + final IntHashSet seenFields; + final long[] totalBytes; TrackingLengthFieldsConsumer(SegmentWriteState state, FieldsConsumer in) { this.state = state; this.in = in; - this.termsBytesPerField = new IntIntHashMap(state.fieldInfos.size()); + this.totalBytes = new long[1]; + // Alternatively, we can consider using a FixedBitSet here and size to max(fieldNumber). + // This should be faster without worrying too much about memory usage. + this.seenFields = new IntHashSet(state.fieldInfos.size()); } @Override public void write(Fields fields, NormsProducer norms) throws IOException { - in.write(new TrackingLengthFields(fields, termsBytesPerField, state.fieldInfos), norms); - long totalBytes = 0; - for (int bytes : termsBytesPerField.values) { - totalBytes += bytes; - } - state.segmentInfo.putAttribute(IN_MEMORY_POSTINGS_BYTES_KEY, Long.toString(totalBytes)); + in.write(new TrackingLengthFields(fields, state.fieldInfos, seenFields, totalBytes), norms); + state.segmentInfo.putAttribute(IN_MEMORY_POSTINGS_BYTES_KEY, Long.toString(totalBytes[0])); } @Override @@ -88,13 +88,15 @@ public void close() throws IOException { } static final class TrackingLengthFields extends FilterLeafReader.FilterFields { - final IntIntHashMap termsBytesPerField; final FieldInfos fieldInfos; + final IntHashSet seenFields; + final long[] totalBytes; - TrackingLengthFields(Fields in, IntIntHashMap termsBytesPerField, FieldInfos fieldInfos) { + TrackingLengthFields(Fields in, FieldInfos fieldInfos, IntHashSet seenFields, long[] totalBytes) { super(in); - this.termsBytesPerField = termsBytesPerField; + this.seenFields = seenFields; this.fieldInfos = fieldInfos; + this.totalBytes = totalBytes; } @Override @@ -104,10 +106,14 @@ public Terms terms(String field) throws IOException { return null; } int fieldNum = fieldInfos.fieldInfo(field).number; - return new TrackingLengthTerms( - terms, - bytes -> termsBytesPerField.put(fieldNum, Math.max(termsBytesPerField.getOrDefault(fieldNum, 0), bytes)) - ); + if (seenFields.add(fieldNum)) { + return new TrackingLengthTerms(terms, bytes -> totalBytes[0] += bytes); + } else { + // As far as I know only when bloom filter for _id filter gets written this method gets invoked twice for the same field. + // So maybe we can get rid of the seenFields here? And just keep track of whether _id field has been seen? However, this + // is fragile and could make us vulnerable to tricky bugs in the future if this is no longer the case. + return terms; + } } } From 88c3bcd4aad5e1d86b91ffa441fd0f817159233d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Mon, 18 Aug 2025 08:36:45 +0200 Subject: [PATCH 09/33] Mute org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT test {p0=search.highlight/50_synthetic_source/text multi fvh source order} #133056 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 176019fd94e4d..97b10d08de5a3 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -573,6 +573,9 @@ tests: - class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT method: test {p0=search.vectors/45_knn_search_bit/Vector rescoring has no effect for non-quantized vectors and provides same results as non-rescored knn} issue: https://github.com/elastic/elasticsearch/issues/133039 +- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT + method: test {p0=search.highlight/50_synthetic_source/text multi fvh source order} + issue: https://github.com/elastic/elasticsearch/issues/133056 # Examples: # From 052a93d62603a9753f0b4ac0f6e0be8d31faf245 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 18 Aug 2025 14:07:40 +0700 Subject: [PATCH 10/33] Remove DocumentParserContext#removeLastIgnoredField(...) (#132644) This is no longer needed as offsets of leaf array fields are stored in a doc values offset field that is controlled by field mapper. --- .../index/mapper/DocumentParser.java | 12 - .../index/mapper/DocumentParserContext.java | 6 - .../SyntheticSourceRollingUpgradeIT.java | 246 ++++++++++++++++++ 3 files changed, 246 insertions(+), 18 deletions(-) create mode 100644 x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/SyntheticSourceRollingUpgradeIT.java diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 15e7ff88350b6..1a248f2dd501e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -702,8 +702,6 @@ private static void parseNonDynamicArray( boolean supportStoringArrayOffsets = mapper != null && mapper.supportStoringArrayOffsets(); String fullPath = context.path().pathAsText(arrayFieldName); - // Check if we need to record the array source. This only applies to synthetic source. - boolean canRemoveSingleLeafElement = false; if (context.canAddIgnoredField() && supportStoringArrayOffsets == false) { Mapper.SourceKeepMode mode = Mapper.SourceKeepMode.NONE; boolean objectWithFallbackSyntheticSource = false; @@ -721,13 +719,6 @@ private static void parseNonDynamicArray( } boolean copyToFieldHasValuesInDocument = context.isWithinCopyTo() == false && context.isCopyToDestinationField(fullPath); - canRemoveSingleLeafElement = mapper instanceof FieldMapper - && mode == Mapper.SourceKeepMode.ARRAYS - && context.inArrayScope() == false - && mapper.leafName().equals(NOOP_FIELD_MAPPER_NAME) == false - && fieldWithFallbackSyntheticSource == false - && copyToFieldHasValuesInDocument == false; - if (objectWithFallbackSyntheticSource || fieldWithFallbackSyntheticSource || fieldWithStoredArraySource @@ -776,9 +767,6 @@ private static void parseNonDynamicArray( && context.isImmediateParentAnArray()) { context.getOffSetContext().maybeRecordEmptyArray(mapper.getOffsetFieldName()); } - if (elements <= 1 && canRemoveSingleLeafElement) { - context.removeLastIgnoredField(fullPath); - } postProcessDynamicArrayMapping(context, lastFieldName); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index b77c0426c23d4..69faec23d2d48 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -335,12 +335,6 @@ public final void addIgnoredField(IgnoredSourceFieldMapper.NameValue values) { } } - final void removeLastIgnoredField(String name) { - if (ignoredFieldValues.isEmpty() == false && ignoredFieldValues.getLast().name().equals(name)) { - ignoredFieldValues.removeLast(); - } - } - /** * Return the collection of values for fields that have been ignored so far. */ diff --git a/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/SyntheticSourceRollingUpgradeIT.java b/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/SyntheticSourceRollingUpgradeIT.java new file mode 100644 index 0000000000000..182beee18f591 --- /dev/null +++ b/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/SyntheticSourceRollingUpgradeIT.java @@ -0,0 +1,246 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.upgrades; + +import com.carrotsearch.randomizedtesting.annotations.Name; + +import org.elasticsearch.client.Request; +import org.elasticsearch.test.rest.ObjectPath; +import org.hamcrest.Matchers; + +import java.time.Instant; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.elasticsearch.upgrades.LogsIndexModeRollingUpgradeIT.enableLogsdbByDefault; +import static org.elasticsearch.upgrades.LogsIndexModeRollingUpgradeIT.getWriteBackingIndex; +import static org.elasticsearch.upgrades.MatchOnlyTextRollingUpgradeIT.createTemplate; +import static org.elasticsearch.upgrades.MatchOnlyTextRollingUpgradeIT.formatInstant; +import static org.elasticsearch.upgrades.MatchOnlyTextRollingUpgradeIT.getIndexSettingsWithDefaults; +import static org.elasticsearch.upgrades.MatchOnlyTextRollingUpgradeIT.startTrial; +import static org.hamcrest.Matchers.both; +import static org.hamcrest.Matchers.emptyIterable; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.not; + +public class SyntheticSourceRollingUpgradeIT extends AbstractRollingUpgradeWithSecurityTestCase { + + static String BULK_ITEM_TEMPLATE = """ + {"@timestamp": "$now", "field1": "$field1", "field2": $field2, "field3": $field3, "field4": $field4} + """; + + private static final String TEMPLATE = """ + { + "mappings": { + "properties": { + "@timestamp" : { + "type": "date" + }, + "field1": { + "type": "keyword" + }, + "field2": { + "type": "keyword" + }, + "field3": { + "type": "long" + }, + "field4": { + "type": "long" + } + } + } + }"""; + + private static final Integer[] VALUES = new Integer[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + + public SyntheticSourceRollingUpgradeIT(@Name("upgradedNodes") int upgradedNodes) { + super(upgradedNodes); + } + + public void testIndexing() throws Exception { + String dataStreamName = "logs-bwc-test"; + if (isOldCluster()) { + startTrial(); + enableLogsdbByDefault(); + createTemplate(dataStreamName, getClass().getSimpleName().toLowerCase(Locale.ROOT), TEMPLATE); + + Instant startTime = Instant.now().minusSeconds(60 * 60); + bulkIndex(dataStreamName, 4, 1024, startTime); + + String firstBackingIndex = getWriteBackingIndex(client(), dataStreamName, 0); + var settings = (Map) getIndexSettingsWithDefaults(firstBackingIndex).get(firstBackingIndex); + assertThat(((Map) settings.get("settings")).get("index.mode"), equalTo("logsdb")); + assertThat(((Map) settings.get("defaults")).get("index.mapping.source.mode"), equalTo("SYNTHETIC")); + + ensureGreen(dataStreamName); + search(dataStreamName); + query(dataStreamName); + } else if (isMixedCluster()) { + Instant startTime = Instant.now().minusSeconds(60 * 30); + bulkIndex(dataStreamName, 4, 1024, startTime); + + ensureGreen(dataStreamName); + search(dataStreamName); + query(dataStreamName); + } else if (isUpgradedCluster()) { + ensureGreen(dataStreamName); + Instant startTime = Instant.now(); + bulkIndex(dataStreamName, 4, 1024, startTime); + search(dataStreamName); + query(dataStreamName); + + var forceMergeRequest = new Request("POST", "/" + dataStreamName + "/_forcemerge"); + forceMergeRequest.addParameter("max_num_segments", "1"); + assertOK(client().performRequest(forceMergeRequest)); + + ensureGreen(dataStreamName); + search(dataStreamName); + query(dataStreamName); + } + } + + static String bulkIndex(String dataStreamName, int numRequest, int numDocs, Instant startTime) throws Exception { + String firstIndex = null; + for (int i = 0; i < numRequest; i++) { + var bulkRequest = new Request("POST", "/" + dataStreamName + "/_bulk"); + StringBuilder requestBody = new StringBuilder(); + for (int j = 0; j < numDocs; j++) { + String field1 = Integer.toString(randomFrom(VALUES)); + var randomArray = randomArray(1, 3, Integer[]::new, () -> randomFrom(VALUES)); + String field2 = Arrays.stream(randomArray).map(s -> "\"" + s + "\"").collect(Collectors.joining(",")); + int field3 = randomFrom(VALUES); + String field4 = Arrays.stream(randomArray).map(String::valueOf).collect(Collectors.joining(",")); + + requestBody.append("{\"create\": {}}"); + requestBody.append('\n'); + requestBody.append( + BULK_ITEM_TEMPLATE.replace("$now", formatInstant(startTime)) + .replace("$field1", field1) + .replace("$field2", "[" + field2 + "]") + .replace("$field3", Long.toString(field3)) + .replace("$field4", "[" + field4 + "]") + ); + requestBody.append('\n'); + + startTime = startTime.plusMillis(1); + } + bulkRequest.setJsonEntity(requestBody.toString()); + bulkRequest.addParameter("refresh", "true"); + var response = client().performRequest(bulkRequest); + assertOK(response); + var responseBody = entityAsMap(response); + assertThat("errors in response:\n " + responseBody, responseBody.get("errors"), equalTo(false)); + if (firstIndex == null) { + firstIndex = (String) ((Map) ((Map) ((List) responseBody.get("items")).get(0)).get("create")).get("_index"); + } + } + return firstIndex; + } + + void search(String dataStreamName) throws Exception { + var searchRequest = new Request("POST", "/" + dataStreamName + "/_search"); + searchRequest.addParameter("pretty", "true"); + searchRequest.setJsonEntity(""" + { + "size": 500 + } + """); + var response = client().performRequest(searchRequest); + assertOK(response); + var responseBody = entityAsMap(response); + logger.info("{}", responseBody); + assertThat(ObjectPath.evaluate(responseBody, "_shards.failed"), Matchers.equalTo(0)); + Integer totalCount = ObjectPath.evaluate(responseBody, "hits.total.value"); + assertThat(totalCount, greaterThanOrEqualTo(512)); + + Map firstSource = ObjectPath.evaluate(responseBody, "hits.hits.0._source"); + Integer field1 = Integer.valueOf((String) firstSource.get("field1")); + assertThat(field1, both(greaterThanOrEqualTo(0)).and(lessThanOrEqualTo(9))); + List field2 = (List) firstSource.get("field2"); + assertThat(field2, not(emptyIterable())); + for (var e : field2) { + Integer value = Integer.valueOf((String) e); + assertThat(value, both(greaterThanOrEqualTo(0)).and(lessThanOrEqualTo(9))); + } + Integer field3 = (Integer) firstSource.get("field3"); + assertThat(field3, both(greaterThanOrEqualTo(0)).and(lessThanOrEqualTo(9))); + List field4 = (List) firstSource.get("field4"); + assertThat(field4, not(emptyIterable())); + for (var e : field4) { + Integer value = (Integer) e; + assertThat(value, both(greaterThanOrEqualTo(0)).and(lessThanOrEqualTo(9))); + } + } + + void query(String dataStreamName) throws Exception { + var queryRequest = new Request("POST", "/_query"); + queryRequest.addParameter("pretty", "true"); + queryRequest.setJsonEntity(""" + { + "query": "FROM $ds | SORT @timestamp | KEEP field1,field2,field3,field4 | LIMIT 5" + } + """.replace("$ds", dataStreamName)); + var response = client().performRequest(queryRequest); + assertOK(response); + var responseBody = entityAsMap(response); + logger.info("{}", responseBody); + + String column1 = ObjectPath.evaluate(responseBody, "columns.0.name"); + String column2 = ObjectPath.evaluate(responseBody, "columns.1.name"); + String column3 = ObjectPath.evaluate(responseBody, "columns.2.name"); + String column4 = ObjectPath.evaluate(responseBody, "columns.3.name"); + assertThat(column1, equalTo("field1")); + assertThat(column2, equalTo("field2")); + assertThat(column3, equalTo("field3")); + assertThat(column4, equalTo("field4")); + + { + var field1 = Integer.valueOf(ObjectPath.evaluate(responseBody, "values.0.0")); + assertThat(field1, both(greaterThanOrEqualTo(0)).and(lessThanOrEqualTo(9))); + } + { + var object = ObjectPath.evaluate(responseBody, "values.0.1"); + if (object instanceof List field2) { + assertThat(field2, not(emptyIterable())); + for (var e : field2) { + Integer value = Integer.valueOf((String) e); + assertThat(value, both(greaterThanOrEqualTo(0)).and(lessThanOrEqualTo(9))); + } + } else { + Integer field2 = Integer.valueOf((String) object); + assertThat(field2, both(greaterThanOrEqualTo(0)).and(lessThanOrEqualTo(9))); + } + } + { + Integer field3 = ObjectPath.evaluate(responseBody, "values.0.2"); + assertThat(field3, both(greaterThanOrEqualTo(0)).and(lessThanOrEqualTo(9))); + } + { + var object = ObjectPath.evaluate(responseBody, "values.0.3"); + if (object instanceof List field4) { + assertThat(field4, not(emptyIterable())); + for (var e : field4) { + Integer value = (Integer) e; + assertThat(value, both(greaterThanOrEqualTo(0)).and(lessThanOrEqualTo(9))); + } + } else { + Integer field4 = (Integer) object; + assertThat(field4, both(greaterThanOrEqualTo(0)).and(lessThanOrEqualTo(9))); + } + } + } + +} From f371deb5d54bcd4ed9d35c782c5585bf1707deef Mon Sep 17 00:00:00 2001 From: Grzegorz Banasiak Date: Mon, 18 Aug 2025 09:41:23 +0200 Subject: [PATCH 11/33] Shorten metadata.name of PR benchmark pipeline (#133054) --- catalog-info.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalog-info.yaml b/catalog-info.yaml index 9e98f56811220..aaf1c4a7bed87 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -306,7 +306,7 @@ spec: apiVersion: backstage.io/v1alpha1 kind: Resource metadata: - name: buildkite-pipeline-elasticsearch-pull-request-performance-benchmark + name: buildkite-pipeline-elasticsearch-pull-request-perf-benchmark description: Elasticsearch pull request performance benchmark links: - title: Pipeline From 896e38339c48afe19e6aaa3466b3abc689f5c73a Mon Sep 17 00:00:00 2001 From: Szymon Bialkowski Date: Mon, 18 Aug 2025 10:08:11 +0200 Subject: [PATCH 12/33] Expose existing DLS cache x-pack usage statistics (#132845) - add already-tracked DLS cache stats into `/xpack/usage?filter_path=security.roles.dls` --- docs/changelog/132845.yaml | 5 ++ .../DocumentSubsetBitsetCache.java | 13 +++- .../DocumentSubsetBitsetCacheTests.java | 66 ++++++++++++++++++- 3 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 docs/changelog/132845.yaml diff --git a/docs/changelog/132845.yaml b/docs/changelog/132845.yaml new file mode 100644 index 0000000000000..ff15b91a9113a --- /dev/null +++ b/docs/changelog/132845.yaml @@ -0,0 +1,5 @@ +pr: 132845 +summary: Expose existing DLS cache x-pack usage statistics +area: Authorization +type: enhancement +issues: [] diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java index 8bd64086a5c4e..1f0d2598d9928 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java @@ -40,6 +40,8 @@ import java.io.Closeable; import java.io.IOException; +import java.util.Collections; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -320,7 +322,16 @@ public static List> getSettings() { public Map usageStats() { final ByteSizeValue ram = ByteSizeValue.ofBytes(ramBytesUsed()); - return Map.of("count", entryCount(), "memory", ram.toString(), "memory_in_bytes", ram.getBytes()); + final Cache.Stats cacheStats = bitsetCache.stats(); + + final Map stats = new LinkedHashMap<>(); + stats.put("count", entryCount()); + stats.put("memory", ram.toString()); + stats.put("memory_in_bytes", ram.getBytes()); + stats.put("hits", cacheStats.getHits()); + stats.put("misses", cacheStats.getMisses()); + stats.put("evictions", cacheStats.getEvictions()); + return Collections.unmodifiableMap(stats); } private static final class BitsetCacheKey { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java index 0645ea8b43b16..19c48c182b85b 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java @@ -53,6 +53,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.IdentityHashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -62,8 +63,10 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; @@ -396,9 +399,9 @@ public void testCacheUnderConcurrentAccess() throws Exception { cache.verifyInternalConsistency(); // Due to cache evictions, we must get more bitsets than fields - assertThat(uniqueBitSets.size(), Matchers.greaterThan(FIELD_COUNT)); + assertThat(uniqueBitSets.size(), greaterThan(FIELD_COUNT)); // Due to cache evictions, we must have seen more bitsets than the cache currently holds - assertThat(uniqueBitSets.size(), Matchers.greaterThan(cache.entryCount())); + assertThat(uniqueBitSets.size(), greaterThan(cache.entryCount())); // Even under concurrent pressure, the cache should hit the expected size assertThat(cache.entryCount(), is(maxCacheCount)); assertThat(cache.ramBytesUsed(), is(maxCacheBytes)); @@ -517,6 +520,64 @@ public void testEquivalentMatchAllDocsQuery() { assertFalse(DocumentSubsetBitsetCache.isEffectiveMatchAllDocsQuery(new TermQuery(new Term("term")))); } + public void testHitsMissesAndEvictionsStats() throws Exception { + // cache that will evict all-but-one element, to test evictions + final long maxCacheBytes = EXPECTED_BYTES_PER_BIT_SET + (EXPECTED_BYTES_PER_BIT_SET / 2); + final Settings settings = Settings.builder() + .put(DocumentSubsetBitsetCache.CACHE_SIZE_SETTING.getKey(), maxCacheBytes + "b") + .build(); + final DocumentSubsetBitsetCache cache = newCache(settings); + + final Supplier> emptyStatsSupplier = () -> { + final Map stats = new LinkedHashMap<>(); + stats.put("count", 0); + stats.put("memory", "0b"); + stats.put("memory_in_bytes", 0L); + stats.put("hits", 0L); + stats.put("misses", 0L); + stats.put("evictions", 0L); + return stats; + }; + + final Map expectedStats = emptyStatsSupplier.get(); + assertThat(cache.usageStats(), equalTo(expectedStats)); + + runTestOnIndex((searchExecutionContext, leafContext) -> { + // first lookup - miss + final Query query1 = QueryBuilders.termQuery("field-1", "value-1").toQuery(searchExecutionContext); + final BitSet bitSet1 = cache.getBitSet(query1, leafContext); + assertThat(bitSet1, notNullValue()); + expectedStats.put("count", 1); + expectedStats.put("misses", 1L); + expectedStats.put("memory", EXPECTED_BYTES_PER_BIT_SET + "b"); + expectedStats.put("memory_in_bytes", EXPECTED_BYTES_PER_BIT_SET); + assertThat(cache.usageStats(), equalTo(expectedStats)); + + // second same lookup - hit + final BitSet bitSet1Again = cache.getBitSet(query1, leafContext); + assertThat(bitSet1Again, sameInstance(bitSet1)); + expectedStats.put("hits", 1L); + assertThat(cache.usageStats(), equalTo(expectedStats)); + + // second query - miss, should evict the first one + final Query query2 = QueryBuilders.termQuery("field-2", "value-2").toQuery(searchExecutionContext); + final BitSet bitSet2 = cache.getBitSet(query2, leafContext); + assertThat(bitSet2, notNullValue()); + // surprisingly, the eviction callback can call `get` on the cache (asynchronously) which causes another miss (or hit) + // so this assertion is about the current state of the code, rather than the expected or desired state. + // see https://github.com/elastic/elasticsearch/issues/132842 + expectedStats.put("misses", 3L); + expectedStats.put("evictions", 1L); + assertBusy(() -> { assertThat(cache.usageStats(), equalTo(expectedStats)); }, 200, TimeUnit.MILLISECONDS); + }); + + final Map finalStats = emptyStatsSupplier.get(); + finalStats.put("hits", 1L); + finalStats.put("misses", 3L); + finalStats.put("evictions", 2L); + assertThat(cache.usageStats(), equalTo(finalStats)); + } + private void runTestOnIndex(CheckedBiConsumer body) throws Exception { runTestOnIndices(1, ctx -> { final TestIndexContext indexContext = ctx.get(0); @@ -638,5 +699,4 @@ private void runTestOnIndices(int numberIndices, CheckedConsumer Date: Mon, 18 Aug 2025 11:07:45 +0200 Subject: [PATCH 13/33] Mute org.elasticsearch.upgrades.SyntheticSourceRollingUpgradeIT testIndexing {upgradedNodes=1} #133060 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 97b10d08de5a3..eaa2ac0374024 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -576,6 +576,9 @@ tests: - class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT method: test {p0=search.highlight/50_synthetic_source/text multi fvh source order} issue: https://github.com/elastic/elasticsearch/issues/133056 +- class: org.elasticsearch.upgrades.SyntheticSourceRollingUpgradeIT + method: testIndexing {upgradedNodes=1} + issue: https://github.com/elastic/elasticsearch/issues/133060 # Examples: # From 905372db5d9a2a536de8d8d63d8f14cebab48fee Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Mon, 18 Aug 2025 11:07:53 +0200 Subject: [PATCH 14/33] Mute org.elasticsearch.upgrades.SyntheticSourceRollingUpgradeIT testIndexing {upgradedNodes=0} #133061 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index eaa2ac0374024..b8284de09b161 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -579,6 +579,9 @@ tests: - class: org.elasticsearch.upgrades.SyntheticSourceRollingUpgradeIT method: testIndexing {upgradedNodes=1} issue: https://github.com/elastic/elasticsearch/issues/133060 +- class: org.elasticsearch.upgrades.SyntheticSourceRollingUpgradeIT + method: testIndexing {upgradedNodes=0} + issue: https://github.com/elastic/elasticsearch/issues/133061 # Examples: # From b2bdea833c940490c85f782d881b13a7d4be5525 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 18 Aug 2025 16:49:01 +0700 Subject: [PATCH 15/33] Document index setting around source metadata field. (#127000) --- .../elasticsearch/index-settings/source.md | 24 +++++++++++++++++++ docs/reference/elasticsearch/toc.yml | 3 ++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 docs/reference/elasticsearch/index-settings/source.md diff --git a/docs/reference/elasticsearch/index-settings/source.md b/docs/reference/elasticsearch/index-settings/source.md new file mode 100644 index 0000000000000..53c2d10795e74 --- /dev/null +++ b/docs/reference/elasticsearch/index-settings/source.md @@ -0,0 +1,24 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/source-index-settings.html +navigation_title: Source settings +--- + +# Source index settings [source-index-settings] + +All settings around the _source metadata field. + +$$$source-mode$$$ + +`index.source.mode` +: (Static, string) The source mode for the index. Valid values are [`synthetic`](/reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source), [`disabled`](/reference/elasticsearch/mapping-reference/mapping-source-field.md#disable-source-field) or `stored`. Defaults to `stored`. The `stored` source mode always stores the source metadata field on disk. + +$$$recovery-use_synthetic_source$$$ + +`index.recovery.use_synthetic_source` +: (Static, boolean) If synthetic source mode is used, whether the recovery source should also be synthesized instead of stored to disk. Defaults to `true`. This setting can only be configured if synthetic source mode is enabled. + +$$$synthetic-source-keep$$$ + +`index.mapping.synthetic_source_keep` +: (Static, string) Controls how to retain accuracy of fields at the index level. Valid values are `none` or `arrays`.This is a subset of [synthetic source keep mapping attribute](/reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source-keep). Defaults to `arrays` if `index.mode` is `logsdb` or otherwise `none`. diff --git a/docs/reference/elasticsearch/toc.yml b/docs/reference/elasticsearch/toc.yml index 5a28f17beb243..0806a2437ee1d 100644 --- a/docs/reference/elasticsearch/toc.yml +++ b/docs/reference/elasticsearch/toc.yml @@ -56,6 +56,7 @@ toc: children: - file: index-settings/preloading-data-into-file-system-cache.md - file: index-settings/time-series.md + - file: index-settings/source.md - file: index-settings/translog.md - file: index-settings/pressure.md - file: index-settings/path.md @@ -214,4 +215,4 @@ toc: - file: command-line-tools/setup-passwords.md - file: command-line-tools/shard-tool.md - file: command-line-tools/syskeygen.md - - file: command-line-tools/users-command.md \ No newline at end of file + - file: command-line-tools/users-command.md From a654a3318fa76f210b158af1bd20c00327ea2d54 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Mon, 18 Aug 2025 12:24:48 +0200 Subject: [PATCH 16/33] Fix typos in plugin-management.md (#133062) --- docs/reference/elasticsearch-plugins/plugin-management.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/elasticsearch-plugins/plugin-management.md b/docs/reference/elasticsearch-plugins/plugin-management.md index 939362c1f5dc7..fb43f09020951 100644 --- a/docs/reference/elasticsearch-plugins/plugin-management.md +++ b/docs/reference/elasticsearch-plugins/plugin-management.md @@ -116,7 +116,7 @@ If Elasticsearch was installed using the deb or rpm package then run `/usr/share For detailed instructions on installing, managing, and configuring plugins, see the following: -* [Intalling Plugings](./installation.md) +* [Installing Plugins](./installation.md) * [Custom URL or file system](./plugin-management-custom-url.md) * [Installing multiple plugins](./installing-multiple-plugins.md) * [Mandatory plugins](./mandatory-plugins.md) From e96297afadddb371fd8a11ec367a2df5f5392650 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 18 Aug 2025 17:48:14 +0700 Subject: [PATCH 17/33] Text field dynamic field tweak (#129332) Pass down actual index version when creating text field mapper builder when mapping dynamic field. Otherwise, bwc logic always has the wrong index version in mixed cluster scenario and dynamic mapping occurs. --- .../index/mapper/DynamicFieldsBuilder.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java b/server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java index fc0b0d864547b..71c6a4e7a9fbc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java @@ -334,11 +334,17 @@ public boolean newDynamicStringField(DocumentParserContext context, String name) mapperBuilderContext ); } else { + var indexSettings = context.indexSettings(); return createDynamicField( - new TextFieldMapper.Builder(name, context.indexAnalyzers(), SourceFieldMapper.isSynthetic(context.indexSettings())) - .addMultiField( - new KeywordFieldMapper.Builder("keyword", context.indexSettings().getIndexVersionCreated()).ignoreAbove(256) - ), + new TextFieldMapper.Builder( + name, + indexSettings.getIndexVersionCreated(), + context.indexAnalyzers(), + SourceFieldMapper.isSynthetic(indexSettings), + false + ).addMultiField( + new KeywordFieldMapper.Builder("keyword", context.indexSettings().getIndexVersionCreated()).ignoreAbove(256) + ), context ); } From 499bba4ea52387efdc5fc6ba1973382b0864f715 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 18 Aug 2025 14:13:00 +0200 Subject: [PATCH 18/33] Adds BBQ documentation to the Reference content (#133066) --- .../elasticsearch/index-settings/bbq.md | 162 ++++++++++++++++++ docs/reference/elasticsearch/toc.yml | 1 + 2 files changed, 163 insertions(+) create mode 100644 docs/reference/elasticsearch/index-settings/bbq.md diff --git a/docs/reference/elasticsearch/index-settings/bbq.md b/docs/reference/elasticsearch/index-settings/bbq.md new file mode 100644 index 0000000000000..a53cd6e9c83c6 --- /dev/null +++ b/docs/reference/elasticsearch/index-settings/bbq.md @@ -0,0 +1,162 @@ +--- +navigation_title: Better Binary Quantization (BBQ) +applies_to: + stack: all + serverless: all +--- + +# Better Binary Quantization (BBQ) [bbq] + +Better Binary Quantization (BBQ) is an advanced vector quantization method, designed for large-scale similarity search. BBQ is a form of lossy compression for [`dense_vector` fields](https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/dense-vector) that enables efficient storage and retrieval of large numbers of vectors, while keeping results close to those from the original uncompressed vectors. + +BBQ offers significant improvements over scalar quantization by relying on optimized `bit` level computations to reduce memory usage and computational costs while maintaining high search relevance using pre-computed corrective factors. BBQ is designed to work in combination with [oversampling](#bbq-oversampling) and reranking, and is compatible with various [vector search algorithms](#bbq-vector-search-algorithms), such as [HNSW](#bbq-hnsw) and [brute force (flat)](#bbq-flat). + +## How BBQ works [bbq-how-it-works] + +BBQ retains the original vector’s dimensionality but transforms the datatype of the dimensions from the original `float32` to `bit` effectively compressing each vector by 32x plus an additional 14 bytes of corrective data per vector. BBQ uses these pre-computed corrective factors as partial distance calculations to help realize impressively robust approximations of the original vector. + +Measuring vector similarity with BBQ vectors requires much less computing effort, allowing more candidates to be considered when using the HNSW algorithm. This often results in better ranking quality and improved relevance compared to the original `float32` vectors. + +## Supported vector search algorithms [bbq-vector-search-algorithms] + +BBQ currently supports two vector search algorithms, each suited to different scenarios. You can configure them by setting the dense vector field’s `index_type`. + +### `bbq_hnsw` [bbq-hnsw] + +When you set a dense vector field’s `index_options` parameter to `type: bbq_hnsw`, {{es}} uses the HNSW algorithm for fast [kNN search](https://www.elastic.co/docs//solutions/search/vector/knn) on compressed vectors. With the default [oversampling](#bbq-oversampling) applied, it delivers better cost efficiency, lower latency, and improved relevance ranking, making it the best choice for large-scale similarity search. + +:::{note} +Starting in version 9.1, `bbq_hnsw` is the default indexing method for new `dense_vector` fields with greater than 384 dimensions, so you typically don’t need to specify it explicitly when creating an index. + +Datasets with less than 384 dimensions may see less accuracy and incur a higher overhead cost related to the corrective factors, but we have observed some production datasets perform well even at fairly low dimensions including [tests on e5-small](https://www.elastic.co/search-labs/blog/better-binary-quantization-lucene-elasticsearch). +::: + +The following example creates an index with a `dense_vector` field configured to use the `bbq_hnsw` algorithm. + +```console +PUT bbq_hnsw-index +{ + "mappings": { + "properties": { + "my_vector": { + "type": "dense_vector", + "dims": 64, + "index": true, + "index_options": { + "type": "bbq_hnsw" + } + } + } + } +} +``` + +To change an existing index to use `bbq_hnsw`, update the field mapping: + +```console +PUT bbq_hnsw-index/_mapping +{ + "properties": { + "my_vector": { + "type": "dense_vector", + "dims": 64, + "index": true, + "index_options": { + "type": "bbq_hnsw" + } + } + } +} +``` + +After this change, all newly created segments will use the `bbq_hnsw` algorithm. As you add or update documents, the index will gradually convert to `bbq_hnsw`. + +To apply `bbq_hnsw` to all vectors at once, reindex them into a new index where the `index_options` parameter's `type` is set to `bbq_hnsw`: + +:::::{stepper} +::::{step} Create a destination index +```console +PUT my-index-bbq +{ + "mappings": { + "properties": { + "my_vector": { + "type": "dense_vector", + "dims": 64, + "index": true, + "index_options": { + "type": "bbq_hnsw" + } + } + } + } +} +``` +:::: + +::::{step} Reindex the data +```console +POST _reindex +{ + "source": { "index": "my-index" }, <1> + "dest": { "index": "my-index-bbq" } +} +``` +1. The existing index to be reindexed into the newly created index with the `bbq_hnsw` algorithm. +:::: + +::::: + +### `bbq_flat` [bbq-flat] + +When you set a dense vector field’s `index_options` parameter to `type: bbq_flat`, {{es}} uses the BBQ algorithm without HNSW. This option generally requires fewer computing resources and works best when the number of vectors being searched is relatively low. + +The following example creates an index with a `dense_vector` field configured to use the `bbq_flat` algorithm. + +```console +PUT bbq_flat-index +{ + "mappings": { + "properties": { + "my_vector": { + "type": "dense_vector", + "dims": 64, + "index": true, + "index_options": { + "type": "bbq_flat" + } + } + } + } +} +``` + +## Oversampling [bbq-oversampling] + +Oversampling is a technique used with BBQ searches to reduce the accuracy loss from compression. Compression lowers the memory footprint by over 95% and improves query latency, at the cost of decreased result accuracy. This decrease can be mitigated by oversampling during query time and reranking the top results using the full vector. + +When you run a kNN search on a BBQ-indexed field, {{es}} automatically retrieves more candidate vectors than the number of results you request. This oversampling improves accuracy by giving the system more vectors to re-rank using their full-precision values before returning the top results. + +```console +GET bbq-index/_search +{ + "knn": { + "field": "my_vector", + "query_vector": [0.12, -0.45, ...], + "k": 10, + "num_candidates": 100 + } +} +``` + +By default, oversampling is set to 3×, meaning if you request k:10, {{es}} retrieves 30 candidates for re-ranking. You don’t need to configure this behavior; it’s applied automatically for BBQ searches. + +:::{note} +You can change oversampling from the default 3× to another value. Refer to [Oversampling and rescoring for quantized vectors](https://www.elastic.co/docs/solutions/search/vector/knn#dense-vector-knn-search-rescoring) for details. +::: + +## Learn more [bbq-learn-more] + +- [Better Binary Quantization (BBQ) in Lucene and {{es}}](https://www.elastic.co/search-labs/blog/better-binary-quantization-lucene-elasticsearch) - Learn how BBQ works, its benefits, and how it reduces memory usage while preserving search accuracy. +- [Dense vector field type](https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/dense-vector) - Find code examples for using `bbq_hnsw` `index_type`. +- [kNN search](https://www.elastic.co/docs/solutions/search/vector/knn) - Learn about the search algorithm that BBQ works with. \ No newline at end of file diff --git a/docs/reference/elasticsearch/toc.yml b/docs/reference/elasticsearch/toc.yml index 0806a2437ee1d..9e534a54fa20a 100644 --- a/docs/reference/elasticsearch/toc.yml +++ b/docs/reference/elasticsearch/toc.yml @@ -37,6 +37,7 @@ toc: - file: index-settings/index.md children: - file: index-settings/serverless.md + - file: index-settings/bbq.md - file: index-settings/index-modules.md - file: index-settings/shard-allocation.md children: From 4b10b7a22b072e1b3bff8159d1126851bd5cc058 Mon Sep 17 00:00:00 2001 From: Pat Whelan Date: Mon, 18 Aug 2025 09:36:03 -0400 Subject: [PATCH 19/33] [ML] Disable child span for streaming tasks (#132945) There is the potential for a memory leak, depending on which thread handles the onComplete message. Currently, the child span does not add anything to debugging, so we will disable it until we can figure out a clean way to propagate the child span's context throughout the stream. In any case, it would be better to replace it entirely with a child span capturing the outbound service call and response for both streaming and non-streaming requests, so this may remain disabled in the long run anyway in favor of that child span. --- docs/changelog/132945.yaml | 5 +++++ .../xpack/inference/action/task/StreamingTaskManager.java | 2 +- .../inference/action/task/StreamingTaskManagerTests.java | 8 ++++---- 3 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 docs/changelog/132945.yaml diff --git a/docs/changelog/132945.yaml b/docs/changelog/132945.yaml new file mode 100644 index 0000000000000..a7ba515a02725 --- /dev/null +++ b/docs/changelog/132945.yaml @@ -0,0 +1,5 @@ +pr: 132945 +summary: Disable child span for streaming tasks +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/task/StreamingTaskManager.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/task/StreamingTaskManager.java index 8aa437c773608..3ddb9883ba007 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/task/StreamingTaskManager.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/task/StreamingTaskManager.java @@ -105,7 +105,7 @@ public Task createTask(long id, String type, String action, TaskId parentTaskId, flowTask.addListener(TaskBackedProcessor.this::cancelTask); return flowTask; } - }); + }, false); } } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/task/StreamingTaskManagerTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/task/StreamingTaskManagerTests.java index 8ca4a5f2aa309..ba070c2caa56b 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/task/StreamingTaskManagerTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/task/StreamingTaskManagerTests.java @@ -54,7 +54,7 @@ public void setUp() throws Exception { doAnswer(ans -> { TaskAwareRequest taskAwareRequest = ans.getArgument(2); return taskAwareRequest.createTask(1L, taskType, taskAction, TaskId.EMPTY_TASK_ID, Map.of()); - }).when(taskManager).register(any(), any(), any()); + }).when(taskManager).register(any(), any(), any(), eq(false)); } @After @@ -67,7 +67,7 @@ public void testSubscribeRegistersTask() { processor.subscribe(mock()); - verify(taskManager, only()).register(eq(taskType), eq(taskAction), any()); + verify(taskManager, only()).register(eq(taskType), eq(taskAction), any(), eq(false)); } public void testCancelPropagatesUpstreamAndDownstream() { @@ -77,7 +77,7 @@ public void testCancelPropagatesUpstreamAndDownstream() { var registeredTask = (CancellableTask) taskAwareRequest.createTask(1L, taskType, taskAction, TaskId.EMPTY_TASK_ID, Map.of()); task.set(registeredTask); return registeredTask; - }).when(taskManager).register(any(), any(), any()); + }).when(taskManager).register(any(), any(), any(), eq(false)); Flow.Subscriber downstream = mock(); Flow.Subscription upstream = mock(); @@ -173,7 +173,7 @@ public void testOnNextAfterCancelDoesNotForwardItem() { var registeredTask = (CancellableTask) taskAwareRequest.createTask(1L, taskType, taskAction, TaskId.EMPTY_TASK_ID, Map.of()); task.set(registeredTask); return registeredTask; - }).when(taskManager).register(any(), any(), any()); + }).when(taskManager).register(any(), any(), any(), eq(false)); var processor = streamingTaskManager.create(taskType, taskAction); var downstream = establishFlow(processor); From 754696b22da35bab8a6d8452239d978b1e63b84c Mon Sep 17 00:00:00 2001 From: Sean Zatz Date: Mon, 18 Aug 2025 10:06:35 -0400 Subject: [PATCH 20/33] Add index mode to resolve index response. (#132858) * Add index mode to resolve index response. * Add yaml rest test * Update docs/changelog/132858.yaml --------- Co-authored-by: elasticsearchmachine --- docs/changelog/132858.yaml | 5 ++ .../25_resolve_index_with_mode.yml | 74 +++++++++++++++++++ server/src/main/java/module-info.java | 3 +- .../org/elasticsearch/TransportVersions.java | 1 + .../indices/resolve/ResolveIndexAction.java | 31 +++++++- .../indices/resolve/ResolveIndexFeatures.java | 32 ++++++++ ...lasticsearch.features.FeatureSpecification | 1 + .../resolve/ResolveIndexResponseTests.java | 9 ++- .../indices/resolve/ResolveIndexTests.java | 44 +++++++---- .../ResolvedIndexSerializingTests.java | 66 +++++++++++++++++ .../TransportResolveClusterActionTests.java | 16 ++-- 11 files changed, 252 insertions(+), 30 deletions(-) create mode 100644 docs/changelog/132858.yaml create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.resolve_index/25_resolve_index_with_mode.yml create mode 100644 server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexFeatures.java create mode 100644 server/src/test/java/org/elasticsearch/action/admin/indices/resolve/ResolvedIndexSerializingTests.java diff --git a/docs/changelog/132858.yaml b/docs/changelog/132858.yaml new file mode 100644 index 0000000000000..73d08deb3a5b6 --- /dev/null +++ b/docs/changelog/132858.yaml @@ -0,0 +1,5 @@ +pr: 132858 +summary: Add index mode to resolve index response +area: Indices APIs +type: feature +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.resolve_index/25_resolve_index_with_mode.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.resolve_index/25_resolve_index_with_mode.yml new file mode 100644 index 0000000000000..8627597d79268 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.resolve_index/25_resolve_index_with_mode.yml @@ -0,0 +1,74 @@ +--- +setup: + - do: + indices.delete: + index: my-std-index + ignore_unavailable: true + - do: + indices.delete: + index: my-ts-index + ignore_unavailable: true + +# Only run this test if the cluster supports time series indexing. +# If your project uses a different feature flag name, adjust it here. +--- +"resolve index returns mode for standard and time_series indices": + - requires: + cluster_features: ["gte_v8.5.0", "resolve_index_returns_mode"] + reason: "Requires time series indexing support introduced in v8.5.0 & Node must support returning 'mode' in indices.resolve_index response" + + # Create a standard index + - do: + indices.create: + index: my-std-index + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + + # Create a time-series index + - do: + indices.create: + index: my-ts-index + body: + settings: + index.mode: time_series + number_of_shards: 1 + number_of_replicas: 0 + index.routing_path: ["host"] + mappings: + properties: + "@timestamp": + type: date + host: + type: keyword + time_series_dimension: true + metric: + type: keyword + value: + type: double + + # Resolve standard index and verify mode + - do: + indices.resolve_index: + name: my-std-index + - match: { indices.0.name: "my-std-index" } + - match: { indices.0.mode: "standard" } + + # Resolve time-series index and verify mode + - do: + indices.resolve_index: + name: my-ts-index + - match: { indices.0.name: "my-ts-index" } + - match: { indices.0.mode: "time_series" } + +--- +teardown: + - do: + indices.delete: + index: my-std-index + ignore_unavailable: true + - do: + indices.delete: + index: my-ts-index + ignore_unavailable: true diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index c3d6d2f4d97df..549c603b13980 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -434,7 +434,8 @@ org.elasticsearch.script.ScriptFeatures, org.elasticsearch.search.retriever.RetrieversFeatures, org.elasticsearch.action.admin.cluster.stats.ClusterStatsFeatures, - org.elasticsearch.ingest.IngestFeatures; + org.elasticsearch.ingest.IngestFeatures, + org.elasticsearch.action.admin.indices.resolve.ResolveIndexFeatures; uses org.elasticsearch.plugins.internal.SettingsExtension; uses RestExtension; diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index f5283510bd1c9..572407ef41ad5 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -365,6 +365,7 @@ static TransportVersion def(int id) { public static final TransportVersion SIMULATE_INGEST_MAPPING_MERGE_TYPE = def(9_138_0_00); public static final TransportVersion ESQL_LOOKUP_JOIN_ON_MANY_FIELDS = def(9_139_0_00); public static final TransportVersion SIMULATE_INGEST_EFFECTIVE_MAPPING = def(9_140_0_00); + public static final TransportVersion RESOLVE_INDEX_MODE_ADDED = def(9_141_0_00); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexAction.java index 67e19f0d4459a..a105a570864f4 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexAction.java @@ -9,6 +9,7 @@ package org.elasticsearch.action.admin.indices.resolve; +import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.ActionResponse; @@ -39,6 +40,7 @@ import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.injection.guice.Inject; import org.elasticsearch.search.SearchService; import org.elasticsearch.tasks.Task; @@ -176,27 +178,35 @@ public static class ResolvedIndex extends ResolvedIndexAbstraction implements Wr static final ParseField ALIASES_FIELD = new ParseField("aliases"); static final ParseField ATTRIBUTES_FIELD = new ParseField("attributes"); static final ParseField DATA_STREAM_FIELD = new ParseField("data_stream"); + static final ParseField MODE_FIELD = new ParseField("mode"); private final String[] aliases; private final String[] attributes; private final String dataStream; + private final IndexMode mode; ResolvedIndex(StreamInput in) throws IOException { setName(in.readString()); this.aliases = in.readStringArray(); this.attributes = in.readStringArray(); this.dataStream = in.readOptionalString(); + if (in.getTransportVersion().onOrAfter(TransportVersions.RESOLVE_INDEX_MODE_ADDED)) { + this.mode = IndexMode.readFrom(in); + } else { + this.mode = null; + } } - ResolvedIndex(String name, String[] aliases, String[] attributes, @Nullable String dataStream) { + ResolvedIndex(String name, String[] aliases, String[] attributes, @Nullable String dataStream, IndexMode mode) { super(name); this.aliases = aliases; this.attributes = attributes; this.dataStream = dataStream; + this.mode = mode; } public ResolvedIndex copy(String newName) { - return new ResolvedIndex(newName, aliases, attributes, dataStream); + return new ResolvedIndex(newName, aliases, attributes, dataStream, mode); } public String[] getAliases() { @@ -211,12 +221,19 @@ public String getDataStream() { return dataStream; } + public IndexMode getMode() { + return mode; + } + @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(getName()); out.writeStringArray(aliases); out.writeStringArray(attributes); out.writeOptionalString(dataStream); + if (out.getTransportVersion().onOrAfter(TransportVersions.RESOLVE_INDEX_MODE_ADDED)) { + IndexMode.writeTo(mode, out); + } } @Override @@ -230,6 +247,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (Strings.isNullOrEmpty(dataStream) == false) { builder.field(DATA_STREAM_FIELD.getPreferredName(), dataStream); } + if (mode != null) { + builder.field(MODE_FIELD.getPreferredName(), mode.toString()); + } builder.endObject(); return builder; } @@ -242,12 +262,14 @@ public boolean equals(Object o) { return getName().equals(index.getName()) && Objects.equals(dataStream, index.dataStream) && Arrays.equals(aliases, index.aliases) - && Arrays.equals(attributes, index.attributes); + && Arrays.equals(attributes, index.attributes) + && Objects.equals(mode, index.mode); } @Override public int hashCode() { int result = Objects.hash(getName(), dataStream); + result = 31 * result + Objects.hashCode(mode); result = 31 * result + Arrays.hashCode(aliases); result = 31 * result + Arrays.hashCode(attributes); return result; @@ -639,7 +661,8 @@ private static void enrichIndexAbstraction( ia.getName(), aliasNames, attributes.stream().map(Enum::name).map(e -> e.toLowerCase(Locale.ROOT)).toArray(String[]::new), - ia.getParentDataStream() == null ? null : ia.getParentDataStream().getName() + ia.getParentDataStream() == null ? null : ia.getParentDataStream().getName(), + writeIndex.getIndexMode() == null ? IndexMode.STANDARD : writeIndex.getIndexMode() ) ); } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexFeatures.java b/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexFeatures.java new file mode 100644 index 0000000000000..994f128484b66 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexFeatures.java @@ -0,0 +1,32 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.action.admin.indices.resolve; + +import org.elasticsearch.features.FeatureSpecification; +import org.elasticsearch.features.NodeFeature; + +import java.util.Set; + +public class ResolveIndexFeatures implements FeatureSpecification { + + // Feature published by nodes that return "mode" in indices.resolve_index responses. + public static final NodeFeature RESOLVE_INDEX_RETURNS_MODE = new NodeFeature("resolve_index_returns_mode"); + + @Override + public Set getFeatures() { + return Set.of(RESOLVE_INDEX_RETURNS_MODE); + } + + @Override + public Set getTestFeatures() { + return Set.of(RESOLVE_INDEX_RETURNS_MODE); + } + +} diff --git a/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification b/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification index 677a5a96891b5..42bf3c942daaf 100644 --- a/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification +++ b/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification @@ -20,3 +20,4 @@ org.elasticsearch.script.ScriptFeatures org.elasticsearch.cluster.routing.RoutingFeatures org.elasticsearch.action.admin.cluster.stats.ClusterStatsFeatures org.elasticsearch.ingest.IngestFeatures +org.elasticsearch.action.admin.indices.resolve.ResolveIndexFeatures diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexResponseTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexResponseTests.java index 5707101b5e5ef..184a5b8b18783 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexResponseTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexResponseTests.java @@ -15,6 +15,7 @@ import org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction.Response; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.test.AbstractXContentSerializingTestCase; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.XContentParser; @@ -28,6 +29,7 @@ import static org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction.ResolvedIndex.ALIASES_FIELD; import static org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction.ResolvedIndex.ATTRIBUTES_FIELD; import static org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction.ResolvedIndex.DATA_STREAM_FIELD; +import static org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction.ResolvedIndex.MODE_FIELD; import static org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction.ResolvedIndexAbstraction.NAME_FIELD; import static org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction.Response.DATA_STREAMS_FIELD; import static org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction.Response.INDICES_FIELD; @@ -76,8 +78,9 @@ private static ResolvedIndex createTestResolvedIndexInstance() { String[] aliases = randomStringArray(0, 5); String[] attributes = randomSubsetOf(List.of("open", "hidden", "frozen")).toArray(Strings.EMPTY_ARRAY); String dataStream = randomBoolean() ? randomAlphaOfLength(6) : null; + IndexMode mode = randomFrom(IndexMode.values()); - return new ResolvedIndex(name, aliases, attributes, dataStream); + return new ResolvedIndex(name, aliases, attributes, dataStream, mode); } private static ResolvedAlias createTestResolvedAliasInstance() { @@ -109,7 +112,8 @@ static String[] randomStringArray(int minLength, int maxLength) { (String) args[0], args[1] != null ? ((List) args[1]).toArray(Strings.EMPTY_ARRAY) : new String[0], ((List) args[2]).toArray(Strings.EMPTY_ARRAY), - (String) args[3] + (String) args[3], + IndexMode.fromString((String) args[4]) ) ); @SuppressWarnings("unchecked") @@ -133,6 +137,7 @@ static String[] randomStringArray(int minLength, int maxLength) { INDEX_PARSER.declareStringArray(ConstructingObjectParser.optionalConstructorArg(), ALIASES_FIELD); INDEX_PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), ATTRIBUTES_FIELD); INDEX_PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), DATA_STREAM_FIELD); + INDEX_PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), MODE_FIELD); ALIAS_PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME_FIELD); ALIAS_PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), INDICES_FIELD); RESPONSE_PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), (p, c) -> indexFromXContent(p), INDICES_FIELD); diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexTests.java index 576d6c2d03b35..8dbc50aae0eb7 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/ResolveIndexTests.java @@ -32,6 +32,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.common.util.concurrent.ThreadContext; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.indices.SystemIndexDescriptor; @@ -68,14 +69,22 @@ public class ResolveIndexTests extends ESTestCase { private final Object[][] indices = new Object[][] { - // name, isClosed, isHidden, isSystem, isFrozen, dataStream, aliases - { "logs-pgsql-prod-20200101", false, false, false, true, null, new String[] { "logs-pgsql-prod" } }, - { "logs-pgsql-prod-20200102", false, false, false, true, null, new String[] { "logs-pgsql-prod", "one-off-alias" } }, - { "logs-pgsql-prod-20200103", false, false, false, false, null, new String[] { "logs-pgsql-prod" } }, - { "logs-pgsql-test-20200101", true, false, false, false, null, new String[] { "logs-pgsql-test" } }, - { "logs-pgsql-test-20200102", false, false, false, false, null, new String[] { "logs-pgsql-test" } }, - { "logs-pgsql-test-20200103", false, false, false, false, null, new String[] { "logs-pgsql-test" } }, - { ".test-system-index", false, false, true, false, null, new String[] {} } }; + // name, isClosed, isHidden, isSystem, isFrozen, dataStream, aliases, mode + { "logs-pgsql-prod-20200101", false, false, false, true, null, new String[] { "logs-pgsql-prod" }, IndexMode.STANDARD }, + { + "logs-pgsql-prod-20200102", + false, + false, + false, + true, + null, + new String[] { "logs-pgsql-prod", "one-off-alias" }, + IndexMode.TIME_SERIES }, + { "logs-pgsql-prod-20200103", false, false, false, false, null, new String[] { "logs-pgsql-prod" }, IndexMode.STANDARD }, + { "logs-pgsql-test-20200101", true, false, false, false, null, new String[] { "logs-pgsql-test" }, IndexMode.STANDARD }, + { "logs-pgsql-test-20200102", false, false, false, false, null, new String[] { "logs-pgsql-test" }, IndexMode.STANDARD }, + { "logs-pgsql-test-20200103", false, false, false, false, null, new String[] { "logs-pgsql-test" }, IndexMode.STANDARD }, + { ".test-system-index", false, false, true, false, null, new String[] {}, IndexMode.STANDARD } }; private final Object[][] dataStreams = new Object[][] { // name, numBackingIndices @@ -234,8 +243,8 @@ public void testResolveHiddenProperlyWithDateMath() { String tomorrowSuffix = dateFormatter.format(now.plus(Duration.ofDays(1L))); Object[][] indices = new Object[][] { // name, isClosed, isHidden, isFrozen, dataStream, aliases - { "logs-pgsql-prod-" + todaySuffix, false, true, false, false, null, Strings.EMPTY_ARRAY }, - { "logs-pgsql-prod-" + tomorrowSuffix, false, true, false, false, null, Strings.EMPTY_ARRAY } }; + { "logs-pgsql-prod-" + todaySuffix, false, true, false, false, null, Strings.EMPTY_ARRAY, IndexMode.STANDARD }, + { "logs-pgsql-prod-" + tomorrowSuffix, false, true, false, false, null, Strings.EMPTY_ARRAY, IndexMode.STANDARD } }; final ProjectMetadata project = buildProjectMetadata(randomProjectIdOrDefault(), new Object[][] {}, indices).build(); String[] requestedIndex = new String[] { "" }; Set resolvedIndices = resolver.resolveExpressions( @@ -356,6 +365,7 @@ private void validateIndices(List resolvedIndices, String... expe assertThat(resolvedIndex.getAliases(), is(((String[]) indexInfo[6]))); assertThat(resolvedIndex.getAttributes(), is(flagsToAttributes(indexInfo))); assertThat(resolvedIndex.getDataStream(), equalTo((String) indexInfo[5])); + assertThat(resolvedIndex.getMode().toString(), equalTo(((IndexMode) indexInfo[7]).toString())); } } @@ -444,7 +454,8 @@ private ProjectMetadata.Builder buildProjectMetadata(ProjectId projectId, Object boolean hidden = (boolean) indexInfo[2]; boolean system = (boolean) indexInfo[3]; boolean frozen = (boolean) indexInfo[4]; - allIndices.add(createIndexMetadata(indexName, aliases, closed, hidden, system, frozen)); + IndexMode mode = (IndexMode) indexInfo[7]; + allIndices.add(createIndexMetadata(indexName, aliases, closed, hidden, system, frozen, mode)); } for (IndexMetadata index : allIndices) { @@ -460,12 +471,14 @@ private static IndexMetadata createIndexMetadata( boolean closed, boolean hidden, boolean system, - boolean frozen + boolean frozen, + IndexMode mode ) { Settings.Builder settingsBuilder = Settings.builder() .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()) .put("index.hidden", hidden) - .put("index.frozen", frozen); + .put("index.frozen", frozen) + .put("index.mode", mode.toString()); IndexMetadata.Builder indexBuilder = IndexMetadata.builder(name) .settings(settingsBuilder) @@ -482,7 +495,7 @@ private static IndexMetadata createIndexMetadata( } private static IndexMetadata createIndexMetadata(String name, boolean hidden) { - return createIndexMetadata(name, Strings.EMPTY_ARRAY, false, true, false, false); + return createIndexMetadata(name, Strings.EMPTY_ARRAY, false, true, false, false, IndexMode.STANDARD); } private static Object[] findInfo(Object[][] indexSource, String indexName) { @@ -507,7 +520,8 @@ private Object[] findBackingIndexInfo(Object[][] dataStreamSource, String indexN false, false, dataStreamName, - Strings.EMPTY_ARRAY }; + Strings.EMPTY_ARRAY, + IndexMode.STANDARD }; } } } diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/ResolvedIndexSerializingTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/ResolvedIndexSerializingTests.java new file mode 100644 index 0000000000000..079a92b7a5222 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/ResolvedIndexSerializingTests.java @@ -0,0 +1,66 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.action.admin.indices.resolve; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.test.AbstractWireSerializingTestCase; + +import java.util.ArrayList; +import java.util.List; + +public class ResolvedIndexSerializingTests extends AbstractWireSerializingTestCase { + + @Override + protected Writeable.Reader instanceReader() { + return ResolveIndexAction.ResolvedIndex::new; + } + + @Override + protected ResolveIndexAction.ResolvedIndex mutateInstance(ResolveIndexAction.ResolvedIndex instance) { + String name = instance.getName(); + String[] aliases = instance.getAliases(); + String[] attributes = instance.getAttributes(); + String dataStream = instance.getDataStream(); + IndexMode mode = instance.getMode(); + mode = randomValueOtherThan(mode, () -> randomFrom(IndexMode.values())); + return new ResolveIndexAction.ResolvedIndex(name, aliases, attributes, dataStream, mode); + } + + @Override + protected ResolveIndexAction.ResolvedIndex createTestInstance() { + return createTestItem(); + } + + private static ResolveIndexAction.ResolvedIndex createTestItem() { + // Random index name + final String name = randomAlphaOfLengthBetween(5, 20); + + // Random aliases (possibly empty) + final String[] aliases = randomBoolean() + ? new String[0] + : randomArray(0, 4, String[]::new, () -> randomAlphaOfLengthBetween(3, 15)); + + // Attributes: always one of "open"/"closed", plus optional flags + final List attrs = new ArrayList<>(); + attrs.add(randomBoolean() ? "open" : "closed"); + if (randomBoolean()) attrs.add("hidden"); + if (randomBoolean()) attrs.add("system"); + if (randomBoolean()) attrs.add("frozen"); + final String[] attributes = attrs.toArray(new String[0]); + + final String dataStream = randomBoolean() ? randomAlphaOfLengthBetween(3, 15) : null; + + final IndexMode mode = randomFrom(IndexMode.values()); + + return new ResolveIndexAction.ResolvedIndex(name, aliases, attributes, dataStream, mode); + + } +} diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterActionTests.java index 824ad22b1af20..da1e940131d6d 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterActionTests.java @@ -104,30 +104,30 @@ public void testHasNonClosedMatchingIndex() { // as long as there is one non-closed index it should return true indices = new ArrayList<>(); - indices.add(new ResolveIndexAction.ResolvedIndex("foo", null, new String[] { "open" }, ".ds-foo")); + indices.add(new ResolveIndexAction.ResolvedIndex("foo", null, new String[] { "open" }, ".ds-foo", null)); assertThat(TransportResolveClusterAction.hasNonClosedMatchingIndex(indices), equalTo(true)); - indices.add(new ResolveIndexAction.ResolvedIndex("bar", null, new String[] { "system" }, ".ds-bar")); + indices.add(new ResolveIndexAction.ResolvedIndex("bar", null, new String[] { "system" }, ".ds-bar", null)); assertThat(TransportResolveClusterAction.hasNonClosedMatchingIndex(indices), equalTo(true)); - indices.add(new ResolveIndexAction.ResolvedIndex("baz", null, new String[] { "system", "open", "hidden" }, null)); + indices.add(new ResolveIndexAction.ResolvedIndex("baz", null, new String[] { "system", "open", "hidden" }, null, null)); assertThat(TransportResolveClusterAction.hasNonClosedMatchingIndex(indices), equalTo(true)); - indices.add(new ResolveIndexAction.ResolvedIndex("quux", null, new String[0], null)); + indices.add(new ResolveIndexAction.ResolvedIndex("quux", null, new String[0], null, null)); assertThat(TransportResolveClusterAction.hasNonClosedMatchingIndex(indices), equalTo(true)); - indices.add(new ResolveIndexAction.ResolvedIndex("wibble", null, new String[] { "system", "closed" }, null)); + indices.add(new ResolveIndexAction.ResolvedIndex("wibble", null, new String[] { "system", "closed" }, null, null)); assertThat(TransportResolveClusterAction.hasNonClosedMatchingIndex(indices), equalTo(true)); // if only closed indexes are present, should return false indices.clear(); - indices.add(new ResolveIndexAction.ResolvedIndex("wibble", null, new String[] { "system", "closed" }, null)); + indices.add(new ResolveIndexAction.ResolvedIndex("wibble", null, new String[] { "system", "closed" }, null, null)); assertThat(TransportResolveClusterAction.hasNonClosedMatchingIndex(indices), equalTo(false)); - indices.add(new ResolveIndexAction.ResolvedIndex("wobble", null, new String[] { "closed" }, null)); + indices.add(new ResolveIndexAction.ResolvedIndex("wobble", null, new String[] { "closed" }, null, null)); assertThat(TransportResolveClusterAction.hasNonClosedMatchingIndex(indices), equalTo(false)); // now add a non-closed index and should return true - indices.add(new ResolveIndexAction.ResolvedIndex("aaa", null, new String[] { "hidden" }, null)); + indices.add(new ResolveIndexAction.ResolvedIndex("aaa", null, new String[] { "hidden" }, null, null)); assertThat(TransportResolveClusterAction.hasNonClosedMatchingIndex(indices), equalTo(true)); } } From 110f62d1c882067e84ed3d28f46ab365e4cbb95e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Mon, 18 Aug 2025 16:30:47 +0200 Subject: [PATCH 21/33] Mute org.elasticsearch.xpack.test.rest.XPackRestIT test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version} #133014 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index b8284de09b161..68fe2c4f6d9a5 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -582,6 +582,9 @@ tests: - class: org.elasticsearch.upgrades.SyntheticSourceRollingUpgradeIT method: testIndexing {upgradedNodes=0} issue: https://github.com/elastic/elasticsearch/issues/133061 +- class: org.elasticsearch.xpack.test.rest.XPackRestIT + method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version} + issue: https://github.com/elastic/elasticsearch/issues/133014 # Examples: # From 61112c784434355d197ff607f7d74e35d80f1de7 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 18 Aug 2025 10:35:40 -0400 Subject: [PATCH 22/33] Esql dimension aware attributes (#131463) Relates to #128621 This PR adds more building blocks for working with dimensions in ESQL. Specifically, it makes the TSDB metadata available on attributes. --- .../_nightly/esql/QueryPlanningBenchmark.java | 3 +- .../compute/operator/EvalBenchmark.java | 26 ++++++-- .../xpack/esql/core/expression/Attribute.java | 5 ++ .../esql/core/expression/EmptyAttribute.java | 5 ++ .../esql/core/expression/FieldAttribute.java | 5 ++ .../core/expression/MetadataAttribute.java | 5 ++ .../core/expression/ReferenceAttribute.java | 5 ++ .../core/expression/UnresolvedAttribute.java | 5 ++ .../xpack/esql/core/type/DateEsField.java | 4 +- .../xpack/esql/core/type/EsField.java | 66 ++++++++++++++----- .../xpack/esql/core/type/KeywordEsField.java | 14 +--- .../esql/core/type/MultiTypeEsField.java | 13 ++-- .../PotentiallyUnmappedKeywordEsField.java | 3 +- .../xpack/esql/core/type/TextEsField.java | 8 --- .../xpack/esql/core/util/TestUtils.java | 4 +- .../xpack/esql/EsqlTestUtils.java | 4 +- .../elasticsearch/xpack/esql/LoadMapping.java | 8 +-- .../xpack/esql/analysis/Analyzer.java | 9 +-- .../esql/enrich/EnrichPolicyResolver.java | 3 +- .../esql/enrich/ResolvedEnrichPolicy.java | 9 ++- .../xpack/esql/plan/physical/EsQueryExec.java | 8 ++- .../xpack/esql/session/IndexResolver.java | 18 +++-- .../elasticsearch/xpack/esql/CsvTests.java | 8 ++- .../function/AbstractFunctionTestCase.java | 4 +- .../function/fulltext/KnnTests.java | 2 +- .../function/scalar/NamedExpressionTests.java | 6 +- .../function/scalar/nulls/CoalesceTests.java | 6 +- .../scalar/string/EndsWithStaticTests.java | 18 ++++- .../scalar/string/RepeatStaticTests.java | 2 +- .../scalar/string/ReplaceStaticTests.java | 2 +- .../scalar/string/StartsWithStaticTests.java | 18 ++++- .../operator/comparison/InStaticTests.java | 6 +- .../esql/index/EsIndexSerializationTests.java | 8 +-- .../AbstractLogicalPlanOptimizerTests.java | 5 +- .../LocalLogicalPlanOptimizerTests.java | 8 ++- .../LocalPhysicalPlanOptimizerTests.java | 4 +- .../optimizer/PhysicalPlanOptimizerTests.java | 33 +++++----- .../physical/local/PushTopNToSourceTests.java | 5 +- .../xpack/esql/planner/EvalMapperTests.java | 6 +- .../planner/LocalExecutionPlannerTests.java | 12 +++- .../esql/plugin/ClusterRequestTests.java | 1 - .../esql/querydsl/query/MatchQueryTests.java | 4 +- .../esql/tree/EsqlNodeSubclassTests.java | 6 +- .../xpack/esql/type/DateEsFieldTests.java | 13 +++- .../xpack/esql/type/EsFieldTests.java | 9 ++- .../xpack/esql/type/KeywordEsFieldTests.java | 9 ++- .../esql/type/MultiTypeEsFieldTests.java | 11 ++-- .../xpack/esql/type/TextEsFieldTests.java | 9 ++- 48 files changed, 313 insertions(+), 132 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java index 3b4d445002073..8cbb2f7aff068 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java @@ -92,7 +92,8 @@ public void setup() { var fields = 10_000; var mapping = LinkedHashMap.newLinkedHashMap(fields); for (int i = 0; i < fields; i++) { - mapping.put("field" + i, new EsField("field-" + i, TEXT, emptyMap(), true)); + // We're creating a standard index, so none of these fields should be marked as dimensions. + mapping.put("field" + i, new EsField("field-" + i, TEXT, emptyMap(), true, EsField.TimeSeriesFieldType.NONE)); } var esIndex = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD)); diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java index 5bd003fe4271f..6357c0e9c396f 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java @@ -212,7 +212,7 @@ private static EvalOperator.ExpressionEvaluator evaluator(String operation) { FieldAttribute timestamp = new FieldAttribute( Source.EMPTY, "timestamp", - new EsField("timestamp", DataType.DATETIME, Map.of(), true) + new EsField("timestamp", DataType.DATETIME, Map.of(), true, EsField.TimeSeriesFieldType.NONE) ); yield EvalMapper.toEvaluator( FOLD_CONTEXT, @@ -321,19 +321,35 @@ private static EvalOperator.ExpressionEvaluator evaluator(String operation) { } private static FieldAttribute longField() { - return new FieldAttribute(Source.EMPTY, "long", new EsField("long", DataType.LONG, Map.of(), true)); + return new FieldAttribute( + Source.EMPTY, + "long", + new EsField("long", DataType.LONG, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ); } private static FieldAttribute doubleField() { - return new FieldAttribute(Source.EMPTY, "double", new EsField("double", DataType.DOUBLE, Map.of(), true)); + return new FieldAttribute( + Source.EMPTY, + "double", + new EsField("double", DataType.DOUBLE, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ); } private static FieldAttribute intField() { - return new FieldAttribute(Source.EMPTY, "int", new EsField("int", DataType.INTEGER, Map.of(), true)); + return new FieldAttribute( + Source.EMPTY, + "int", + new EsField("int", DataType.INTEGER, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ); } private static FieldAttribute keywordField() { - return new FieldAttribute(Source.EMPTY, "keyword", new EsField("keyword", DataType.KEYWORD, Map.of(), true)); + return new FieldAttribute( + Source.EMPTY, + "keyword", + new EsField("keyword", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ); } private static Configuration configuration() { diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Attribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Attribute.java index 6b700f0ee6a7f..28ea103164c79 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Attribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Attribute.java @@ -149,4 +149,9 @@ public static boolean dataTypeEquals(List left, List right } return true; } + + /** + * @return true if the attribute represents a TSDB dimension type + */ + public abstract boolean isDimension(); } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/EmptyAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/EmptyAttribute.java index 1cacf74a8207c..cf6a696aabb7b 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/EmptyAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/EmptyAttribute.java @@ -44,6 +44,11 @@ protected String label() { return "e"; } + @Override + public boolean isDimension() { + return false; + } + @Override public boolean resolved() { return true; diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java index 5a31be1ef5b70..e9cca5c79c4bf 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java @@ -216,6 +216,11 @@ protected String label() { return "f"; } + @Override + public boolean isDimension() { + return field.getTimeSeriesFieldType() == EsField.TimeSeriesFieldType.DIMENSION; + } + public EsField field() { return field; } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java index 34ff2cec2960a..07fe6b16b3bd1 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java @@ -120,6 +120,11 @@ protected String label() { return "m"; } + @Override + public boolean isDimension() { + return false; + } + @Override protected NodeInfo info() { return NodeInfo.create(this, MetadataAttribute::new, name(), dataType(), nullable(), id(), synthetic(), searchable); diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/ReferenceAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/ReferenceAttribute.java index 404cd75edd5e4..9e203f84b68d9 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/ReferenceAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/ReferenceAttribute.java @@ -117,4 +117,9 @@ protected NodeInfo info() { protected String label() { return "r"; } + + @Override + public boolean isDimension() { + return false; + } } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/UnresolvedAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/UnresolvedAttribute.java index e73ee2f09458d..bcafeb17ac808 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/UnresolvedAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/UnresolvedAttribute.java @@ -107,6 +107,11 @@ protected String label() { return UNRESOLVED_PREFIX; } + @Override + public boolean isDimension() { + return false; + } + @Override public String nodeString() { return toString(); diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DateEsField.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DateEsField.java index bd66519f2f5e2..7eab34f01a1c2 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DateEsField.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DateEsField.java @@ -20,8 +20,8 @@ */ public class DateEsField extends EsField { - public static DateEsField dateEsField(String name, Map properties, boolean hasDocValues) { - return new DateEsField(name, DataType.DATETIME, properties, hasDocValues, TimeSeriesFieldType.UNKNOWN); + public static DateEsField dateEsField(String name, Map properties, boolean hasDocValues, TimeSeriesFieldType tsType) { + return new DateEsField(name, DataType.DATETIME, properties, hasDocValues, tsType); } private DateEsField( diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/EsField.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/EsField.java index daf26a7b5348f..87dadaed4da91 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/EsField.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/EsField.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.core.type; import org.elasticsearch.TransportVersions; +import org.elasticsearch.action.fieldcaps.IndexFieldCapabilities; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -31,10 +32,36 @@ public class EsField implements Writeable { * roles within the ESQL query processing pipeline. */ public enum TimeSeriesFieldType implements Writeable { - UNKNOWN(0), - NONE(1), - METRIC(2), - DIMENSION(3); + UNKNOWN(0) { + @Override + public TimeSeriesFieldType merge(TimeSeriesFieldType other) { + return other; + } + }, + NONE(1) { + @Override + public TimeSeriesFieldType merge(TimeSeriesFieldType other) { + return other; + } + }, + METRIC(2) { + @Override + public TimeSeriesFieldType merge(TimeSeriesFieldType other) { + if (other != DIMENSION) { + return METRIC; + } + throw new IllegalStateException("Time Series Metadata conflict. Cannot merge [" + other + "] with [METRIC]."); + } + }, + DIMENSION(3) { + @Override + public TimeSeriesFieldType merge(TimeSeriesFieldType other) { + if (other != METRIC) { + return DIMENSION; + } + throw new IllegalStateException("Time Series Metadata conflict. Cannot merge [" + other + "] with [DIMENSION]."); + } + }; private final int id; @@ -57,6 +84,19 @@ public static TimeSeriesFieldType readFromStream(StreamInput in) throws IOExcept default -> throw new IOException("Unexpected value for TimeSeriesFieldType: " + id); }; } + + public static TimeSeriesFieldType fromIndexFieldCapabilities(IndexFieldCapabilities capabilities) { + if (capabilities.isDimension()) { + assert capabilities.metricType() == null; + return DIMENSION; + } + if (capabilities.metricType() != null) { + return METRIC; + } + return NONE; + } + + public abstract TimeSeriesFieldType merge(TimeSeriesFieldType other); } private static Map> readers = Map.ofEntries( @@ -83,13 +123,8 @@ public static Reader getReader(String name) { private final Map properties; private final String name; private final boolean isAlias; - // Because the subclasses all reimplement serialization, this needs to be writeable from subclass constructors private final TimeSeriesFieldType timeSeriesFieldType; - public EsField(String name, DataType esDataType, Map properties, boolean aggregatable) { - this(name, esDataType, properties, aggregatable, false, TimeSeriesFieldType.UNKNOWN); - } - public EsField( String name, DataType esDataType, @@ -100,10 +135,6 @@ public EsField( this(name, esDataType, properties, aggregatable, false, timeSeriesFieldType); } - public EsField(String name, DataType esDataType, Map properties, boolean aggregatable, boolean isAlias) { - this(name, esDataType, properties, aggregatable, isAlias, TimeSeriesFieldType.UNKNOWN); - } - public EsField( String name, DataType esDataType, @@ -247,6 +278,10 @@ public Exact getExactInfo() { return Exact.EXACT_FIELD; } + public TimeSeriesFieldType getTimeSeriesFieldType() { + return timeSeriesFieldType; + } + @Override public String toString() { return name + "@" + esDataType.typeName() + "=" + properties; @@ -265,12 +300,13 @@ public boolean equals(Object o) { && isAlias == field.isAlias && esDataType == field.esDataType && Objects.equals(name, field.name) - && Objects.equals(properties, field.properties); + && Objects.equals(properties, field.properties) + && Objects.equals(timeSeriesFieldType, field.timeSeriesFieldType); } @Override public int hashCode() { - return Objects.hash(esDataType, aggregatable, properties, name, isAlias); + return Objects.hash(esDataType, aggregatable, properties, name, isAlias, timeSeriesFieldType); } public static final class Exact { diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/KeywordEsField.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/KeywordEsField.java index e56aafa267400..85a348a7a87d6 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/KeywordEsField.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/KeywordEsField.java @@ -10,7 +10,6 @@ import org.elasticsearch.common.io.stream.StreamOutput; import java.io.IOException; -import java.util.Collections; import java.util.Map; import java.util.Objects; @@ -26,23 +25,16 @@ public class KeywordEsField extends EsField { private final int precision; private final boolean normalized; - public KeywordEsField(String name) { - this(name, Collections.emptyMap(), true, Short.MAX_VALUE, false); - } - - public KeywordEsField(String name, Map properties, boolean hasDocValues, int precision, boolean normalized) { - this(name, properties, hasDocValues, precision, normalized, false); - } - public KeywordEsField( String name, Map properties, boolean hasDocValues, int precision, boolean normalized, - boolean isAlias + boolean isAlias, + TimeSeriesFieldType timeSeriesFieldType ) { - this(name, KEYWORD, properties, hasDocValues, precision, normalized, isAlias, TimeSeriesFieldType.UNKNOWN); + this(name, KEYWORD, properties, hasDocValues, precision, normalized, isAlias, timeSeriesFieldType); } protected KeywordEsField( diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java index 8056580b13431..b26f1f060afc4 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java @@ -33,11 +33,6 @@ public class MultiTypeEsField extends EsField { private final Map indexToConversionExpressions; - public MultiTypeEsField(String name, DataType dataType, boolean aggregatable, Map indexToConversionExpressions) { - super(name, dataType, Map.of(), aggregatable); - this.indexToConversionExpressions = indexToConversionExpressions; - } - public MultiTypeEsField( String name, DataType dataType, @@ -99,7 +94,13 @@ public static MultiTypeEsField resolveFrom( indexToConversionExpressions.put(indexName, convertExpr); } } - return new MultiTypeEsField(invalidMappedField.getName(), resolvedDataType, false, indexToConversionExpressions); + return new MultiTypeEsField( + invalidMappedField.getName(), + resolvedDataType, + false, + indexToConversionExpressions, + invalidMappedField.getTimeSeriesFieldType() + ); } @Override diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/PotentiallyUnmappedKeywordEsField.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/PotentiallyUnmappedKeywordEsField.java index 8672b6b61dee7..679e77ad2c636 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/PotentiallyUnmappedKeywordEsField.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/PotentiallyUnmappedKeywordEsField.java @@ -9,6 +9,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import java.io.IOException; +import java.util.Collections; /** * This class is used as a marker for fields that may be unmapped, where an unmapped field is a field which exists in the _source but is not @@ -17,7 +18,7 @@ */ public class PotentiallyUnmappedKeywordEsField extends KeywordEsField { public PotentiallyUnmappedKeywordEsField(String name) { - super(name); + super(name, Collections.emptyMap(), true, Short.MAX_VALUE, false, false, TimeSeriesFieldType.UNKNOWN); } public PotentiallyUnmappedKeywordEsField(StreamInput in) throws IOException { diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/TextEsField.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/TextEsField.java index 7c9508959dcae..bb05dbd4f9030 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/TextEsField.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/TextEsField.java @@ -25,14 +25,6 @@ */ public class TextEsField extends EsField { - public TextEsField(String name, Map properties, boolean hasDocValues) { - this(name, properties, hasDocValues, false); - } - - public TextEsField(String name, Map properties, boolean hasDocValues, boolean isAlias) { - super(name, TEXT, properties, hasDocValues, isAlias, TimeSeriesFieldType.UNKNOWN); - } - public TextEsField( String name, Map properties, diff --git a/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/TestUtils.java b/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/TestUtils.java index 8f07d40bd0665..c86f704e62cb2 100644 --- a/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/TestUtils.java +++ b/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/TestUtils.java @@ -54,7 +54,7 @@ public static FieldAttribute fieldAttribute() { } public static FieldAttribute fieldAttribute(String name, DataType type) { - return new FieldAttribute(EMPTY, name, new EsField(name, type, emptyMap(), randomBoolean())); + return new FieldAttribute(EMPTY, name, new EsField(name, type, emptyMap(), randomBoolean(), EsField.TimeSeriesFieldType.NONE)); } public static FieldAttribute getFieldAttribute(String name) { @@ -62,7 +62,7 @@ public static FieldAttribute getFieldAttribute(String name) { } public static FieldAttribute getFieldAttribute(String name, DataType dataType) { - return new FieldAttribute(EMPTY, name, new EsField(name + "f", dataType, emptyMap(), true)); + return new FieldAttribute(EMPTY, name, new EsField(name + "f", dataType, emptyMap(), true, EsField.TimeSeriesFieldType.NONE)); } /** diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 7f9f77509310e..49f6b907a850a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -205,7 +205,7 @@ public static FieldAttribute getFieldAttribute(String name) { } public static FieldAttribute getFieldAttribute(String name, DataType dataType) { - return new FieldAttribute(EMPTY, name, new EsField(name + "f", dataType, emptyMap(), true)); + return new FieldAttribute(EMPTY, name, new EsField(name + "f", dataType, emptyMap(), true, EsField.TimeSeriesFieldType.NONE)); } public static FieldAttribute fieldAttribute() { @@ -213,7 +213,7 @@ public static FieldAttribute fieldAttribute() { } public static FieldAttribute fieldAttribute(String name, DataType type) { - return new FieldAttribute(EMPTY, name, new EsField(name, type, emptyMap(), randomBoolean())); + return new FieldAttribute(EMPTY, name, new EsField(name, type, emptyMap(), randomBoolean(), EsField.TimeSeriesFieldType.NONE)); } public static Literal of(Object value) { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/LoadMapping.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/LoadMapping.java index 1146d7729a8d4..29f2f6ed4e927 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/LoadMapping.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/LoadMapping.java @@ -102,19 +102,19 @@ private static void walkMapping(String name, Object value, Map boolean docValues = boolSetting(content.get("doc_values"), esDataType.hasDocValues()); final EsField field; if (esDataType == TEXT) { - field = new TextEsField(name, properties, docValues); + field = new TextEsField(name, properties, docValues, false, EsField.TimeSeriesFieldType.NONE); } else if (esDataType == KEYWORD) { int length = intSetting(content.get("ignore_above"), Short.MAX_VALUE); boolean normalized = Strings.hasText(textSetting(content.get("normalizer"), null)); - field = new KeywordEsField(name, properties, docValues, length, normalized); + field = new KeywordEsField(name, properties, docValues, length, normalized, false, EsField.TimeSeriesFieldType.NONE); } else if (esDataType == DATETIME) { - field = DateEsField.dateEsField(name, properties, docValues); + field = DateEsField.dateEsField(name, properties, docValues, EsField.TimeSeriesFieldType.NONE); } else if (esDataType == UNSUPPORTED) { String type = content.get("type").toString(); field = new UnsupportedEsField(name, List.of(type), null, properties); propagateUnsupportedType(name, type, properties); } else { - field = new EsField(name, esDataType, properties, docValues); + field = new EsField(name, esDataType, properties, docValues, EsField.TimeSeriesFieldType.NONE); } mapping.put(name, field); } else { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index dd8b20412c2b8..b23f8e60fd88d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -311,7 +311,7 @@ private static void mappingAsAttributes(List list, Source source, Str // due to a bug also copy the field since the Attribute hierarchy extracts the data type // directly even if the data type is passed explicitly if (type != t.getDataType()) { - t = new EsField(t.getName(), type, t.getProperties(), t.isAggregatable(), t.isAlias()); + t = new EsField(t.getName(), type, t.getProperties(), t.isAggregatable(), t.isAlias(), t.getTimeSeriesFieldType()); } FieldAttribute attribute = t instanceof UnsupportedEsField uef @@ -453,7 +453,7 @@ private LocalRelation tableMapAsRelation(Source source, Map mapT String name = entry.getKey(); Column column = entry.getValue(); // create a fake ES field - alternative is to use a ReferenceAttribute - EsField field = new EsField(name, column.type(), Map.of(), false, false); + EsField field = new EsField(name, column.type(), Map.of(), false, false, EsField.TimeSeriesFieldType.UNKNOWN); attributes.add(new FieldAttribute(source, null, name, field)); // prepare the block for the supplier blocks[i++] = column.values(); @@ -1809,7 +1809,8 @@ private Expression resolveConvertFunction(ConvertFunction convert, List mergeLookupResults( DataType.fromTypeName(field.getDataType().typeName()), field.getProperties(), field.isAggregatable(), - field.isAlias() + field.isAlias(), + field.getTimeSeriesFieldType() ); EsField old = mappings.putIfAbsent(m.getKey(), field); if (old != null && old.getDataType().equals(field.getDataType()) == false) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/ResolvedEnrichPolicy.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/ResolvedEnrichPolicy.java index 64595e776a96e..a42881a13835d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/ResolvedEnrichPolicy.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/ResolvedEnrichPolicy.java @@ -54,7 +54,14 @@ public void writeTo(StreamOutput out) throws IOException { * as though it were the base class. */ (o, v) -> { - var field = new EsField(v.getName(), v.getDataType(), v.getProperties(), v.isAggregatable(), v.isAlias()); + var field = new EsField( + v.getName(), + v.getDataType(), + v.getProperties(), + v.isAggregatable(), + v.isAlias(), + v.getTimeSeriesFieldType() + ); if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_15_2)) { field.writeTo(o); } else { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java index 2e74c7153f77e..f29554081ed01 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java @@ -46,7 +46,13 @@ public class EsQueryExec extends LeafExec implements EstimatesRowSize { EsQueryExec::readFrom ); - public static final EsField DOC_ID_FIELD = new EsField("_doc", DataType.DOC_DATA_TYPE, Map.of(), false); + public static final EsField DOC_ID_FIELD = new EsField( + "_doc", + DataType.DOC_DATA_TYPE, + Map.of(), + false, + EsField.TimeSeriesFieldType.NONE + ); public static final List NO_SORTS = List.of(); // only exists to mimic older serialization, but we no longer serialize sorts private final String indexPattern; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java index d72f3ef0529ad..2f44db243b85c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java @@ -98,6 +98,7 @@ public static IndexResolution mergedMappings(String indexPattern, FieldCapabilit return IndexResolution.notFound(indexPattern); } + // For each field name, store a list of the field caps responses from each index Map> fieldsCaps = collectFieldCaps(fieldCapsResponse); // Build hierarchical fields - it's easier to do it in sorted order so the object fields come first. @@ -119,7 +120,8 @@ public static IndexResolution mergedMappings(String indexPattern, FieldCapabilit String parent = name.substring(0, nextDot); EsField obj = fields.get(parent); if (obj == null) { - obj = new EsField(parent, OBJECT, new HashMap<>(), false, true); + // Object fields can't be dimensions, so we can safely hard code that here + obj = new EsField(parent, OBJECT, new HashMap<>(), false, true, EsField.TimeSeriesFieldType.NONE); isAlias = true; fields.put(parent, obj); } else if (firstUnsupportedParent == null && obj instanceof UnsupportedEsField unsupportedParent) { @@ -197,11 +199,17 @@ private static EsField createField( List rest = fcs.subList(1, fcs.size()); DataType type = EsqlDataTypeRegistry.INSTANCE.fromEs(first.type(), first.metricType()); boolean aggregatable = first.isAggregatable(); + EsField.TimeSeriesFieldType timeSeriesFieldType = EsField.TimeSeriesFieldType.UNKNOWN; if (rest.isEmpty() == false) { for (IndexFieldCapabilities fc : rest) { if (first.metricType() != fc.metricType()) { return conflictingMetricTypes(name, fullName, fieldCapsResponse); } + try { + timeSeriesFieldType = timeSeriesFieldType.merge(EsField.TimeSeriesFieldType.fromIndexFieldCapabilities(fc)); + } catch (IllegalArgumentException e) { + return new InvalidMappedField(name, e.getMessage()); + } } for (IndexFieldCapabilities fc : rest) { if (type != EsqlDataTypeRegistry.INSTANCE.fromEs(fc.type(), fc.metricType())) { @@ -216,22 +224,22 @@ private static EsField createField( // TODO I think we only care about unmapped fields if we're aggregating on them. do we even then? if (type == TEXT) { - return new TextEsField(name, new HashMap<>(), false, isAlias); + return new TextEsField(name, new HashMap<>(), false, isAlias, timeSeriesFieldType); } if (type == KEYWORD) { int length = Short.MAX_VALUE; // TODO: to check whether isSearchable/isAggregateable takes into account the presence of the normalizer boolean normalized = false; - return new KeywordEsField(name, new HashMap<>(), aggregatable, length, normalized, isAlias); + return new KeywordEsField(name, new HashMap<>(), aggregatable, length, normalized, isAlias, timeSeriesFieldType); } if (type == DATETIME) { - return DateEsField.dateEsField(name, new HashMap<>(), aggregatable); + return DateEsField.dateEsField(name, new HashMap<>(), aggregatable, timeSeriesFieldType); } if (type == UNSUPPORTED) { return unsupported(name, first); } - return new EsField(name, type, new HashMap<>(), aggregatable, isAlias); + return new EsField(name, type, new HashMap<>(), aggregatable, isAlias, timeSeriesFieldType); } private static UnsupportedEsField unsupported(String name, IndexFieldCapabilities fc) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index d149fb012a14b..869a851a1fb34 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -427,7 +427,13 @@ private static Map createMappingForIndex(CsvTestsDataLoader.Tes if (mapping.containsKey(entry.getKey())) { DataType dataType = DataType.fromTypeName(entry.getValue()); EsField field = mapping.get(entry.getKey()); - EsField editedField = new EsField(field.getName(), dataType, field.getProperties(), field.isAggregatable()); + EsField editedField = new EsField( + field.getName(), + dataType, + field.getProperties(), + field.isAggregatable(), + field.getTimeSeriesFieldType() + ); mapping.put(entry.getKey(), editedField); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java index 00f20b9376a6f..be3d70527c3f4 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java @@ -447,7 +447,7 @@ public static Stream validFunctionParameters() { * Build an {@link Attribute} that loads a field. */ public static FieldAttribute field(String name, DataType type) { - return new FieldAttribute(Source.synthetic(name), name, new EsField(name, type, Map.of(), true)); + return new FieldAttribute(Source.synthetic(name), name, new EsField(name, type, Map.of(), true, EsField.TimeSeriesFieldType.NONE)); } /** @@ -456,7 +456,7 @@ public static FieldAttribute field(String name, DataType type) { public static Expression deepCopyOfField(String name, DataType type) { return new DeepCopy( Source.synthetic(name), - new FieldAttribute(Source.synthetic(name), name, new EsField(name, type, Map.of(), true)) + new FieldAttribute(Source.synthetic(name), name, new EsField(name, type, Map.of(), true, EsField.TimeSeriesFieldType.NONE)) ); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java index 9bd4896350ca7..002c519b001f8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java @@ -67,7 +67,7 @@ private static List testCaseSuppliers() { new FieldAttribute( Source.EMPTY, randomIdentifier(), - new EsField(randomIdentifier(), DENSE_VECTOR, Map.of(), false) + new EsField(randomIdentifier(), DENSE_VECTOR, Map.of(), false, EsField.TimeSeriesFieldType.NONE) ), DENSE_VECTOR, "dense_vector field" diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/NamedExpressionTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/NamedExpressionTests.java index 06e60fc437df0..00afd4947c890 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/NamedExpressionTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/NamedExpressionTests.java @@ -53,7 +53,11 @@ public void testArithmeticFunctionName() { } public void testNameForArithmeticFunctionAppliedOnTableColumn() { - FieldAttribute fa = new FieldAttribute(EMPTY, "myField", new EsField("myESField", DataType.INTEGER, emptyMap(), true)); + FieldAttribute fa = new FieldAttribute( + EMPTY, + "myField", + new EsField("myESField", DataType.INTEGER, emptyMap(), true, EsField.TimeSeriesFieldType.NONE) + ); String e = "myField + 10"; Add add = new Add(s(e), fa, l(10)); assertEquals(e, add.sourceText()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java index 1235a175294af..e7d34e17b44c8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java @@ -205,7 +205,11 @@ protected Coalesce build(Source source, List args) { public void testCoalesceIsLazy() { List sub = new ArrayList<>(testCase.getDataAsFields()); - FieldAttribute evil = new FieldAttribute(Source.EMPTY, "evil", new EsField("evil", sub.get(0).dataType(), Map.of(), true)); + FieldAttribute evil = new FieldAttribute( + Source.EMPTY, + "evil", + new EsField("evil", sub.get(0).dataType(), Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ); sub.add(evil); Coalesce exp = build(Source.EMPTY, sub); Layout.Builder builder = new Layout.Builder(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/EndsWithStaticTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/EndsWithStaticTests.java index ddde306deed7a..7d8ab4f65b498 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/EndsWithStaticTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/EndsWithStaticTests.java @@ -33,8 +33,16 @@ public void testLuceneQuery_AllLiterals_NonTranslatable() { public void testLuceneQuery_NonFoldableSuffix_NonTranslatable() { EndsWith function = new EndsWith( Source.EMPTY, - new FieldAttribute(Source.EMPTY, "field", new EsField("field", DataType.KEYWORD, Map.of(), true)), - new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true)) + new FieldAttribute( + Source.EMPTY, + "field", + new EsField("field", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ), + new FieldAttribute( + Source.EMPTY, + "field", + new EsField("suffix", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ) ); assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(TranslationAware.Translatable.NO)); @@ -43,7 +51,11 @@ public void testLuceneQuery_NonFoldableSuffix_NonTranslatable() { public void testLuceneQuery_NonFoldableSuffix_Translatable() { EndsWith function = new EndsWith( Source.EMPTY, - new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true)), + new FieldAttribute( + Source.EMPTY, + "field", + new EsField("suffix", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ), Literal.keyword(Source.EMPTY, "a*b?c\\") ); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RepeatStaticTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RepeatStaticTests.java index 95db9daa21283..539e285b485cf 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RepeatStaticTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RepeatStaticTests.java @@ -83,7 +83,7 @@ private static Page row(List values) { } private static FieldAttribute field(String name, DataType type) { - return new FieldAttribute(Source.synthetic(name), name, new EsField(name, type, Map.of(), true)); + return new FieldAttribute(Source.synthetic(name), name, new EsField(name, type, Map.of(), true, EsField.TimeSeriesFieldType.NONE)); } private DriverContext driverContext() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceStaticTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceStaticTests.java index cac4b5acfa320..c9796e3bc34bb 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceStaticTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceStaticTests.java @@ -120,7 +120,7 @@ private static Page row(List values) { } private static FieldAttribute field(String name, DataType type) { - return new FieldAttribute(Source.synthetic(name), name, new EsField(name, type, Map.of(), true)); + return new FieldAttribute(Source.synthetic(name), name, new EsField(name, type, Map.of(), true, EsField.TimeSeriesFieldType.NONE)); } private DriverContext driverContext() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/StartsWithStaticTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/StartsWithStaticTests.java index 105ce6a9e4142..9c206908d58c5 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/StartsWithStaticTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/StartsWithStaticTests.java @@ -33,8 +33,16 @@ public void testLuceneQuery_AllLiterals_NonTranslatable() { public void testLuceneQuery_NonFoldablePrefix_NonTranslatable() { var function = new StartsWith( Source.EMPTY, - new FieldAttribute(Source.EMPTY, "field", new EsField("field", DataType.KEYWORD, Map.of(), true)), - new FieldAttribute(Source.EMPTY, "field", new EsField("prefix", DataType.KEYWORD, Map.of(), true)) + new FieldAttribute( + Source.EMPTY, + "field", + new EsField("field", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ), + new FieldAttribute( + Source.EMPTY, + "field", + new EsField("prefix", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ) ); assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(TranslationAware.Translatable.NO)); @@ -43,7 +51,11 @@ public void testLuceneQuery_NonFoldablePrefix_NonTranslatable() { public void testLuceneQuery_NonFoldablePrefix_Translatable() { var function = new StartsWith( Source.EMPTY, - new FieldAttribute(Source.EMPTY, "field", new EsField("prefix", DataType.KEYWORD, Map.of(), true)), + new FieldAttribute( + Source.EMPTY, + "field", + new EsField("prefix", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ), Literal.keyword(Source.EMPTY, "a*b?c\\") ); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/InStaticTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/InStaticTests.java index b2fa9f4221769..48bf4f6dbb6fc 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/InStaticTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/InStaticTests.java @@ -59,7 +59,11 @@ public void testHandleNullsOnRightValue() { public void testConvertedNull() { In in = new In( EMPTY, - new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true)), + new FieldAttribute( + Source.EMPTY, + "field", + new EsField("suffix", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ), Arrays.asList(ONE, new Literal(Source.EMPTY, null, randomFrom(DataType.types())), THREE) ); var query = in.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/index/EsIndexSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/index/EsIndexSerializationTests.java index 05a08cc1402c1..22cdef70182cc 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/index/EsIndexSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/index/EsIndexSerializationTests.java @@ -115,11 +115,11 @@ public static EsIndex indexWithManyConflicts(boolean withParent) { } for (int i = 0; i < nonConflictingCount; i++) { String name = String.format(Locale.ROOT, "blah.blah.blah.blah.blah.blah.nonconflict.name%04d", i); - fields.put(name, new EsField(name, DataType.KEYWORD, Map.of(), true)); + fields.put(name, new EsField(name, DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE)); } if (withParent) { - EsField parent = new EsField("parent", DataType.OBJECT, Map.copyOf(fields), false); + EsField parent = new EsField("parent", DataType.OBJECT, Map.copyOf(fields), false, EsField.TimeSeriesFieldType.NONE); fields.put("parent", parent); } @@ -199,7 +199,7 @@ private static EsField fieldWithRecursiveChildren(int depth, int childrenPerLeve if (depth == 1) { for (int i = 0; i < childrenPerLevel; i++) { childName = "leaf" + i; - children.put(childName, new EsField(childName, DataType.KEYWORD, Map.of(), true)); + children.put(childName, new EsField(childName, DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE)); } } else { for (int i = 0; i < childrenPerLevel; i++) { @@ -208,7 +208,7 @@ private static EsField fieldWithRecursiveChildren(int depth, int childrenPerLeve } } - return new EsField(name, DataType.OBJECT, children, false); + return new EsField(name, DataType.OBJECT, children, false, EsField.TimeSeriesFieldType.NONE); } /** diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java index 53efa26043226..ee3ea8112e738 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java @@ -151,7 +151,10 @@ public static void init() { ); var multiIndexMapping = loadMapping("mapping-basic.json"); - multiIndexMapping.put("partial_type_keyword", new EsField("partial_type_keyword", KEYWORD, emptyMap(), true)); + multiIndexMapping.put( + "partial_type_keyword", + new EsField("partial_type_keyword", KEYWORD, emptyMap(), true, EsField.TimeSeriesFieldType.NONE) + ); var multiIndex = IndexResolution.valid( new EsIndex( "multi_index", diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java index 5c26c4eb0e1aa..bc19252d35b0d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java @@ -350,7 +350,11 @@ public void testMissingFieldInNewCommand() { new MockFieldAttributeCommand( EMPTY, new Row(EMPTY, List.of()), - new FieldAttribute(EMPTY, "last_name", new EsField("last_name", DataType.KEYWORD, Map.of(), true)) + new FieldAttribute( + EMPTY, + "last_name", + new EsField("last_name", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ) ), testStats ); @@ -504,7 +508,7 @@ public void testSparseDocument() throws Exception { Map large = Maps.newLinkedHashMapWithExpectedSize(size); for (int i = 0; i < size; i++) { var name = String.format(Locale.ROOT, "field%03d", i); - large.put(name, new EsField(name, DataType.INTEGER, emptyMap(), true, false)); + large.put(name, new EsField(name, DataType.INTEGER, emptyMap(), true, false, EsField.TimeSeriesFieldType.NONE)); } SearchStats searchStats = statsForExistingField("field000", "field001", "field002", "field003", "field004"); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index cd6371e4d4d5e..465226e8b5bf9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -224,8 +224,8 @@ public void init() { List.of("a", "b"), Map.of("", "idx"), Map.ofEntries( - Map.entry("a", new EsField("a", DataType.INTEGER, Map.of(), true)), - Map.entry("b", new EsField("b", DataType.LONG, Map.of(), true)) + Map.entry("a", new EsField("a", DataType.INTEGER, Map.of(), true, EsField.TimeSeriesFieldType.NONE)), + Map.entry("b", new EsField("b", DataType.LONG, Map.of(), true, EsField.TimeSeriesFieldType.NONE)) ) ) ); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 5ff6687145798..9e255d9322e73 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -411,8 +411,8 @@ private static EnrichResolution setupEnrichResolution() { List.of("a", "b"), Map.of("", "idx"), Map.ofEntries( - Map.entry("a", new EsField("a", DataType.INTEGER, Map.of(), true)), - Map.entry("b", new EsField("b", DataType.LONG, Map.of(), true)) + Map.entry("a", new EsField("a", DataType.INTEGER, Map.of(), true, EsField.TimeSeriesFieldType.NONE)), + Map.entry("b", new EsField("b", DataType.LONG, Map.of(), true, EsField.TimeSeriesFieldType.NONE)) ) ) ); @@ -425,10 +425,13 @@ private static EnrichResolution setupEnrichResolution() { List.of("city", "airport", "region", "city_boundary"), Map.of("", "airport_city_boundaries"), Map.ofEntries( - Map.entry("city", new EsField("city", DataType.KEYWORD, Map.of(), true)), - Map.entry("airport", new EsField("airport", DataType.TEXT, Map.of(), false)), - Map.entry("region", new EsField("region", DataType.TEXT, Map.of(), false)), - Map.entry("city_boundary", new EsField("city_boundary", DataType.GEO_SHAPE, Map.of(), false)) + Map.entry("city", new EsField("city", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE)), + Map.entry("airport", new EsField("airport", DataType.TEXT, Map.of(), false, EsField.TimeSeriesFieldType.NONE)), + Map.entry("region", new EsField("region", DataType.TEXT, Map.of(), false, EsField.TimeSeriesFieldType.NONE)), + Map.entry( + "city_boundary", + new EsField("city_boundary", DataType.GEO_SHAPE, Map.of(), false, EsField.TimeSeriesFieldType.NONE) + ) ) ) ); @@ -440,7 +443,7 @@ private static EnrichResolution setupEnrichResolution() { EnrichPolicy.MATCH_TYPE, List.of("department"), Map.of("", ".enrich-departments-1", "cluster_1", ".enrich-departments-2"), - Map.of("department", new EsField("department", DataType.KEYWORD, Map.of(), true)) + Map.of("department", new EsField("department", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE)) ) ); enrichResolution.addResolvedPolicy( @@ -451,7 +454,7 @@ private static EnrichResolution setupEnrichResolution() { EnrichPolicy.MATCH_TYPE, List.of("department"), Map.of("", ".enrich-departments-3"), - Map.of("department", new EsField("department", DataType.KEYWORD, Map.of(), true)) + Map.of("department", new EsField("department", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE)) ) ); enrichResolution.addResolvedPolicy( @@ -462,7 +465,7 @@ private static EnrichResolution setupEnrichResolution() { EnrichPolicy.MATCH_TYPE, List.of("department"), Map.of("cluster_1", ".enrich-departments-2"), - Map.of("department", new EsField("department", DataType.KEYWORD, Map.of(), true)) + Map.of("department", new EsField("department", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE)) ) ); enrichResolution.addResolvedPolicy( @@ -473,7 +476,7 @@ private static EnrichResolution setupEnrichResolution() { EnrichPolicy.MATCH_TYPE, List.of("supervisor"), Map.of("", ".enrich-supervisors-a", "cluster_1", ".enrich-supervisors-b"), - Map.of("supervisor", new EsField("supervisor", DataType.KEYWORD, Map.of(), true)) + Map.of("supervisor", new EsField("supervisor", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE)) ) ); enrichResolution.addResolvedPolicy( @@ -484,7 +487,7 @@ private static EnrichResolution setupEnrichResolution() { EnrichPolicy.MATCH_TYPE, List.of("supervisor"), Map.of("", ".enrich-supervisors-c"), - Map.of("supervisor", new EsField("supervisor", DataType.KEYWORD, Map.of(), true)) + Map.of("supervisor", new EsField("supervisor", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE)) ) ); enrichResolution.addResolvedPolicy( @@ -495,7 +498,7 @@ private static EnrichResolution setupEnrichResolution() { EnrichPolicy.MATCH_TYPE, List.of("supervisor"), Map.of("cluster_1", ".enrich-supervisors-b"), - Map.of("supervisor", new EsField("supervisor", DataType.KEYWORD, Map.of(), true)) + Map.of("supervisor", new EsField("supervisor", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE)) ) ); return enrichResolution; @@ -2994,9 +2997,9 @@ public void testProjectAwayColumns() { "test", Map.of( "some_field1", - new EsField("some_field1", DataType.KEYWORD, Map.of(), true), + new EsField("some_field1", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE), "some_field2", - new EsField("some_field2", DataType.KEYWORD, Map.of(), true) + new EsField("some_field2", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE) ) ), IndexMode.STANDARD @@ -7871,7 +7874,7 @@ private Map fields(Collection fieldNames) { Map fields = new HashMap<>(); for (String fieldName : fieldNames) { - fields.put(fieldName, new EsField(fieldName, DataType.KEYWORD, Map.of(), false)); + fields.put(fieldName, new EsField(fieldName, DataType.KEYWORD, Map.of(), false, EsField.TimeSeriesFieldType.NONE)); } return fields; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java index 00f8dfb9aaacc..15fc4d75f4f13 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java @@ -505,7 +505,10 @@ private static void addSortableFieldAttributes(Map field } private static void addFieldAttribute(Map fields, String name, DataType type) { - fields.put(name, new FieldAttribute(Source.EMPTY, name, new EsField(name, type, new HashMap<>(), true))); + fields.put( + name, + new FieldAttribute(Source.EMPTY, name, new EsField(name, type, new HashMap<>(), true, EsField.TimeSeriesFieldType.NONE)) + ); } static TestPhysicalPlanBuilder from(String index) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/EvalMapperTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/EvalMapperTests.java index 3ea56d6d15abd..21bbedd0313f6 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/EvalMapperTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/EvalMapperTests.java @@ -161,7 +161,11 @@ public void testExpressionSerialization() { } private static FieldAttribute field(String name, DataType type) { - return new FieldAttribute(Source.EMPTY, name, new EsField(name, type, Collections.emptyMap(), false)); + return new FieldAttribute( + Source.EMPTY, + name, + new EsField(name, type, Collections.emptyMap(), false, EsField.TimeSeriesFieldType.NONE) + ); } static DriverContext driverContext() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java index b56f4a3a4898b..f820ee636f630 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java @@ -150,7 +150,11 @@ public void testLuceneSourceOperatorHugeRowSize() throws IOException { public void testLuceneTopNSourceOperator() throws IOException { int estimatedRowSize = randomEstimatedRowSize(estimatedRowSizeIsHuge); - FieldAttribute sortField = new FieldAttribute(Source.EMPTY, "field", new EsField("field", DataType.INTEGER, Map.of(), true)); + FieldAttribute sortField = new FieldAttribute( + Source.EMPTY, + "field", + new EsField("field", DataType.INTEGER, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ); EsQueryExec.FieldSort sort = new EsQueryExec.FieldSort(sortField, Order.OrderDirection.ASC, Order.NullsPosition.LAST); Literal limit = new Literal(Source.EMPTY, 10, DataType.INTEGER); LocalExecutionPlanner.LocalExecutionPlan plan = planner().plan( @@ -177,7 +181,11 @@ public void testLuceneTopNSourceOperator() throws IOException { public void testLuceneTopNSourceOperatorDistanceSort() throws IOException { int estimatedRowSize = randomEstimatedRowSize(estimatedRowSizeIsHuge); - FieldAttribute sortField = new FieldAttribute(Source.EMPTY, "point", new EsField("point", DataType.GEO_POINT, Map.of(), true)); + FieldAttribute sortField = new FieldAttribute( + Source.EMPTY, + "point", + new EsField("point", DataType.GEO_POINT, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + ); EsQueryExec.GeoDistanceSort sort = new EsQueryExec.GeoDistanceSort(sortField, Order.OrderDirection.ASC, 1, -1); Literal limit = new Literal(Source.EMPTY, 10, DataType.INTEGER); LocalExecutionPlanner.LocalExecutionPlan plan = planner().plan( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java index cba3c3a7556e0..17d5407c3c468 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java @@ -165,7 +165,6 @@ public void testFallbackIndicesOptions() throws Exception { assertThat(cloned.clusterAlias(), equalTo(request.clusterAlias())); assertThat(cloned.sessionId(), equalTo(request.sessionId())); RemoteClusterPlan plan = cloned.remoteClusterPlan(); - assertThat(plan.plan(), equalTo(request.remoteClusterPlan().plan())); assertThat(plan.targetIndices(), equalTo(request.remoteClusterPlan().targetIndices())); OriginalIndices originalIndices = plan.originalIndices(); assertThat(originalIndices.indices(), equalTo(request.remoteClusterPlan().originalIndices().indices())); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/MatchQueryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/MatchQueryTests.java index bf3f3baa0b634..2df727f9bc000 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/MatchQueryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/MatchQueryTests.java @@ -65,14 +65,14 @@ public void testQueryBuilding() { private static MatchQueryBuilder getBuilder(Map options) { final Source source = new Source(1, 1, StringUtils.EMPTY); - FieldAttribute fa = new FieldAttribute(EMPTY, "a", new EsField("af", KEYWORD, emptyMap(), true)); + FieldAttribute fa = new FieldAttribute(EMPTY, "a", new EsField("af", KEYWORD, emptyMap(), true, EsField.TimeSeriesFieldType.NONE)); final MatchQuery mmq = new MatchQuery(source, "eggplant", "foo", options); return (MatchQueryBuilder) mmq.asBuilder(); } public void testToString() { final Source source = new Source(1, 1, StringUtils.EMPTY); - FieldAttribute fa = new FieldAttribute(EMPTY, "a", new EsField("af", KEYWORD, emptyMap(), true)); + FieldAttribute fa = new FieldAttribute(EMPTY, "a", new EsField("af", KEYWORD, emptyMap(), true, EsField.TimeSeriesFieldType.NONE)); final MatchQuery mmq = new MatchQuery(source, "eggplant", "foo"); assertEquals("MatchQuery@1:2[eggplant:foo]", mmq.toString()); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java index 716ae56d85d7a..e24e6adc2becb 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java @@ -733,7 +733,11 @@ static EsRelation randomEsRelation() { } static FieldAttribute field(String name, DataType type) { - return new FieldAttribute(Source.EMPTY, name, new EsField(name, type, Collections.emptyMap(), false)); + return new FieldAttribute( + Source.EMPTY, + name, + new EsField(name, type, Collections.emptyMap(), false, EsField.TimeSeriesFieldType.NONE) + ); } public static Set> subclassesOf(Class clazz) throws IOException { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DateEsFieldTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DateEsFieldTests.java index bf0494d5fd043..a7cf5b499e7ee 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DateEsFieldTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DateEsFieldTests.java @@ -14,7 +14,12 @@ public class DateEsFieldTests extends AbstractEsFieldTypeTests { static DateEsField randomDateEsField(int maxPropertiesDepth) { - return DateEsField.dateEsField(randomAlphaOfLength(5), randomProperties(maxPropertiesDepth), randomBoolean()); + return DateEsField.dateEsField( + randomAlphaOfLength(5), + randomProperties(maxPropertiesDepth), + randomBoolean(), + randomFrom(EsField.TimeSeriesFieldType.values()) + ); } @Override @@ -27,12 +32,14 @@ protected DateEsField mutate(DateEsField instance) { String name = instance.getName(); Map properties = instance.getProperties(); boolean aggregatable = instance.isAggregatable(); - switch (between(0, 2)) { + EsField.TimeSeriesFieldType tsType = instance.getTimeSeriesFieldType(); + switch (between(0, 3)) { case 0 -> name = randomAlphaOfLength(name.length() + 1); case 1 -> properties = randomValueOtherThan(properties, () -> randomProperties(4)); case 2 -> aggregatable = false == aggregatable; + case 3 -> tsType = randomValueOtherThan(tsType, () -> randomFrom(EsField.TimeSeriesFieldType.values())); default -> throw new IllegalArgumentException(); } - return DateEsField.dateEsField(name, properties, aggregatable); + return DateEsField.dateEsField(name, properties, aggregatable, tsType); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsFieldTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsFieldTests.java index 18e0405a65892..c29cbde8a7877 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsFieldTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsFieldTests.java @@ -19,7 +19,8 @@ public static EsField randomEsField(int maxPropertiesDepth) { Map properties = randomProperties(maxPropertiesDepth); boolean aggregatable = randomBoolean(); boolean isAlias = randomBoolean(); - return new EsField(name, esDataType, properties, aggregatable, isAlias); + EsField.TimeSeriesFieldType tsType = randomFrom(EsField.TimeSeriesFieldType.values()); + return new EsField(name, esDataType, properties, aggregatable, isAlias, tsType); } @Override @@ -34,14 +35,16 @@ protected EsField mutate(EsField instance) { Map properties = instance.getProperties(); boolean aggregatable = instance.isAggregatable(); boolean isAlias = instance.isAlias(); - switch (between(0, 4)) { + EsField.TimeSeriesFieldType tsType = instance.getTimeSeriesFieldType(); + switch (between(0, 5)) { case 0 -> name = randomAlphaOfLength(name.length() + 1); case 1 -> esDataType = randomValueOtherThan(esDataType, () -> randomFrom(DataType.types())); case 2 -> properties = randomValueOtherThan(properties, () -> randomProperties(4)); case 3 -> aggregatable = false == aggregatable; case 4 -> isAlias = false == isAlias; + case 5 -> tsType = randomValueOtherThan(tsType, () -> randomFrom(EsField.TimeSeriesFieldType.values())); default -> throw new IllegalArgumentException(); } - return new EsField(name, esDataType, properties, aggregatable, isAlias); + return new EsField(name, esDataType, properties, aggregatable, isAlias, tsType); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/KeywordEsFieldTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/KeywordEsFieldTests.java index ef04f0e27c096..9441b2e896f10 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/KeywordEsFieldTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/KeywordEsFieldTests.java @@ -21,7 +21,8 @@ static KeywordEsField randomKeywordEsField(int maxPropertiesDepth) { int precision = randomInt(); boolean normalized = randomBoolean(); boolean isAlias = randomBoolean(); - return new KeywordEsField(name, properties, hasDocValues, precision, normalized, isAlias); + EsField.TimeSeriesFieldType tsType = randomFrom(EsField.TimeSeriesFieldType.values()); + return new KeywordEsField(name, properties, hasDocValues, precision, normalized, isAlias, tsType); } @Override @@ -37,15 +38,17 @@ protected KeywordEsField mutate(KeywordEsField instance) { int precision = instance.getPrecision(); boolean normalized = instance.getNormalized(); boolean isAlias = instance.isAlias(); - switch (between(0, 5)) { + EsField.TimeSeriesFieldType tsType = instance.getTimeSeriesFieldType(); + switch (between(0, 6)) { case 0 -> name = randomAlphaOfLength(name.length() + 1); case 1 -> properties = randomValueOtherThan(properties, () -> randomProperties(4)); case 2 -> hasDocValues = false == hasDocValues; case 3 -> precision = randomValueOtherThan(precision, ESTestCase::randomInt); case 4 -> normalized = false == normalized; case 5 -> isAlias = false == isAlias; + case 6 -> tsType = randomValueOtherThan(tsType, () -> randomFrom(EsField.TimeSeriesFieldType.values())); default -> throw new IllegalArgumentException(); } - return new KeywordEsField(name, properties, hasDocValues, precision, normalized, isAlias); + return new KeywordEsField(name, properties, hasDocValues, precision, normalized, isAlias, tsType); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java index 154605a199d22..f64d2417aff1d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java @@ -72,7 +72,8 @@ protected MultiTypeEsField createTestInstance() { DataType dataType = randomFrom(types()); DataType toType = toString ? DataType.KEYWORD : dataType; Map indexToConvertExpressions = randomConvertExpressions(name, toString, dataType); - return new MultiTypeEsField(name, toType, false, indexToConvertExpressions); + EsField.TimeSeriesFieldType tsType = randomFrom(EsField.TimeSeriesFieldType.values()); + return new MultiTypeEsField(name, toType, false, indexToConvertExpressions, tsType); } @Override @@ -80,13 +81,15 @@ protected MultiTypeEsField mutateInstance(MultiTypeEsField instance) throws IOEx String name = instance.getName(); DataType dataType = instance.getDataType(); Map indexToConvertExpressions = instance.getIndexToConversionExpressions(); - switch (between(0, 2)) { + EsField.TimeSeriesFieldType tsType = instance.getTimeSeriesFieldType(); + switch (between(0, 3)) { case 0 -> name = randomAlphaOfLength(name.length() + 1); case 1 -> dataType = randomValueOtherThan(dataType, () -> randomFrom(DataType.types())); case 2 -> indexToConvertExpressions = mutateConvertExpressions(name, dataType, indexToConvertExpressions); + case 3 -> tsType = randomValueOtherThan(tsType, () -> randomFrom(EsField.TimeSeriesFieldType.values())); default -> throw new IllegalArgumentException(); } - return new MultiTypeEsField(name, dataType, false, indexToConvertExpressions); + return new MultiTypeEsField(name, dataType, false, indexToConvertExpressions, tsType); } @Override @@ -169,6 +172,6 @@ private static Expression testConvertExpression(String name, DataType fromType, } private static FieldAttribute fieldAttribute(String name, DataType dataType) { - return new FieldAttribute(Source.EMPTY, name, new EsField(name, dataType, Map.of(), true)); + return new FieldAttribute(Source.EMPTY, name, new EsField(name, dataType, Map.of(), true, EsField.TimeSeriesFieldType.NONE)); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/TextEsFieldTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/TextEsFieldTests.java index 9af3b7376f2b2..cc2270d94ddfd 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/TextEsFieldTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/TextEsFieldTests.java @@ -18,7 +18,8 @@ static TextEsField randomTextEsField(int maxPropertiesDepth) { Map properties = randomProperties(maxPropertiesDepth); boolean hasDocValues = randomBoolean(); boolean isAlias = randomBoolean(); - return new TextEsField(name, properties, hasDocValues, isAlias); + EsField.TimeSeriesFieldType tsType = randomFrom(EsField.TimeSeriesFieldType.values()); + return new TextEsField(name, properties, hasDocValues, isAlias, tsType); } @Override @@ -32,13 +33,15 @@ protected TextEsField mutate(TextEsField instance) { Map properties = instance.getProperties(); boolean hasDocValues = instance.isAggregatable(); boolean isAlias = instance.isAlias(); - switch (between(0, 3)) { + EsField.TimeSeriesFieldType tsType = instance.getTimeSeriesFieldType(); + switch (between(0, 4)) { case 0 -> name = randomAlphaOfLength(name.length() + 1); case 1 -> properties = randomValueOtherThan(properties, () -> randomProperties(4)); case 2 -> hasDocValues = false == hasDocValues; case 3 -> isAlias = false == isAlias; + case 4 -> tsType = randomValueOtherThan(tsType, () -> randomFrom(EsField.TimeSeriesFieldType.values())); default -> throw new IllegalArgumentException(); } - return new TextEsField(name, properties, hasDocValues, isAlias); + return new TextEsField(name, properties, hasDocValues, isAlias, tsType); } } From d183b39aedaa279a41ea37d0d90a0364998aaf11 Mon Sep 17 00:00:00 2001 From: Ben Chaplin Date: Mon, 18 Aug 2025 11:13:23 -0400 Subject: [PATCH 23/33] Update TasksIT for batched execution (#132762) --- .../action/admin/cluster/node/tasks/TasksIT.java | 12 ++++++++---- .../search/SearchQueryThenFetchAsyncAction.java | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/node/tasks/TasksIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/node/tasks/TasksIT.java index b2ba1d34e3280..f9651c71ecf13 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/node/tasks/TasksIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/node/tasks/TasksIT.java @@ -41,7 +41,6 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.persistent.PersistentTasksCustomMetadata; import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.search.SearchService; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.tasks.RemovedTaskListener; import org.elasticsearch.tasks.Task; @@ -82,6 +81,8 @@ import static java.util.Collections.singleton; import static org.elasticsearch.action.admin.cluster.node.tasks.TestTaskPlugin.TEST_TASK_ACTION; import static org.elasticsearch.action.admin.cluster.node.tasks.TestTaskPlugin.UNBLOCK_TASK_ACTION; +import static org.elasticsearch.action.search.SearchQueryThenFetchAsyncAction.NODE_SEARCH_ACTION_NAME; +import static org.elasticsearch.action.search.SearchTransportService.FREE_CONTEXT_SCROLL_ACTION_NAME; import static org.elasticsearch.core.TimeValue.timeValueMillis; import static org.elasticsearch.core.TimeValue.timeValueSeconds; import static org.elasticsearch.http.HttpTransportSettings.SETTING_HTTP_MAX_HEADER_SIZE; @@ -353,8 +354,6 @@ public void testTransportBulkTasks() { } public void testSearchTaskDescriptions() { - // TODO: enhance this test to also check the tasks created by batched query execution - updateClusterSettings(Settings.builder().put(SearchService.BATCHED_QUERY_PHASE.getKey(), false)); registerTaskManagerListeners(TransportSearchAction.TYPE.name()); // main task registerTaskManagerListeners(TransportSearchAction.TYPE.name() + "[*]"); // shard task createIndex("test"); @@ -380,6 +379,11 @@ public void testSearchTaskDescriptions() { // check that if we have any shard-level requests they all have non-zero length description List shardTasks = findEvents(TransportSearchAction.TYPE.name() + "[*]", Tuple::v1); for (TaskInfo taskInfo : shardTasks) { + // During batched query execution, if a partial reduction was done on the data node, a task will be created to free the reader. + // These tasks don't have descriptions or parent tasks, so they're ignored for this test. + if (taskInfo.action().equals(FREE_CONTEXT_SCROLL_ACTION_NAME)) { + continue; + } assertThat(taskInfo.parentTaskId(), notNullValue()); assertEquals(mainTask.get(0).taskId(), taskInfo.parentTaskId()); assertTaskHeaders(taskInfo); @@ -396,12 +400,12 @@ public void testSearchTaskDescriptions() { taskInfo.description(), Regex.simpleMatch("id[*], size[1], lastEmittedDoc[null]", taskInfo.description()) ); + case NODE_SEARCH_ACTION_NAME -> assertEquals("NodeQueryRequest", taskInfo.description()); default -> fail("Unexpected action [" + taskInfo.action() + "] with description [" + taskInfo.description() + "]"); } // assert that all task descriptions have non-zero length assertThat(taskInfo.description().length(), greaterThan(0)); } - updateClusterSettings(Settings.builder().putNull(SearchService.BATCHED_QUERY_PHASE.getKey())); } public void testSearchTaskHeaderLimit() { diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java index 8d763698c63c0..b2d5693762dca 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java @@ -552,7 +552,7 @@ private void onNodeQueryFailure(Exception e, NodeQueryRequest request, CanMatchP } } - private static final String NODE_SEARCH_ACTION_NAME = "indices:data/read/search[query][n]"; + public static final String NODE_SEARCH_ACTION_NAME = "indices:data/read/search[query][n]"; static void registerNodeSearchAction( SearchTransportService searchTransportService, From 190757e37fea0dffaa0b0dc18ca3f6e2ea3647d0 Mon Sep 17 00:00:00 2001 From: Larry Gregory Date: Mon, 18 Aug 2025 11:48:06 -0400 Subject: [PATCH 24/33] Change reporting_user role to leverage reserved kibana privileges (#132766) * Change reporting_user role to leverage reserved kibana privileges * [CI] Auto commit changes from spotless * Mark reporting_user role as deprecated * Update docs/changelog/132766.yaml * Update release notes --------- Co-authored-by: elasticsearchmachine --- docs/changelog/132766.yaml | 10 ++++++++++ .../authz/store/ReservedRolesStore.java | 18 ++++-------------- .../authz/store/ReservedRolesStoreTests.java | 13 ++----------- 3 files changed, 16 insertions(+), 25 deletions(-) create mode 100644 docs/changelog/132766.yaml diff --git a/docs/changelog/132766.yaml b/docs/changelog/132766.yaml new file mode 100644 index 0000000000000..0744c1c280632 --- /dev/null +++ b/docs/changelog/132766.yaml @@ -0,0 +1,10 @@ +pr: 132766 +summary: Change `reporting_user` role to leverage reserved kibana privileges +area: Authorization +type: deprecation +issues: [] +deprecation: + title: Deprecate the built-in `reporting_user` role. + area: Authorization + details: The `reporting_user` role is deprecated. Administrators should manage access to Kibana's reporting features via custom roles which grant the necessary privileges. + impact: This role will be removed in a future version. Administrators should migrate to custom roles to avoid interruption. diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java index e7a8b4e14707e..dc3db9d5c88df 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java @@ -328,23 +328,13 @@ private static Map initializeReservedRoles() { null, new RoleDescriptor.ApplicationResourcePrivileges[] { RoleDescriptor.ApplicationResourcePrivileges.builder() - .application("kibana-.kibana") + .application("kibana-*") .resources("*") - .privileges( - "feature_discover.minimal_read", - "feature_discover.generate_report", - "feature_dashboard.minimal_read", - "feature_dashboard.generate_report", - "feature_dashboard.download_csv_report", - "feature_canvas.minimal_read", - "feature_canvas.generate_report", - "feature_visualize.minimal_read", - "feature_visualize.generate_report" - ) + .privileges("reserved_reporting_user") .build() }, null, null, - MetadataUtils.DEFAULT_RESERVED_METADATA, + MetadataUtils.getDeprecatedReservedMetadata("Please grant access via Kibana privileges instead."), null, null, null, @@ -353,7 +343,7 @@ private static Map initializeReservedRoles() { + "including generating and downloading reports. " + "This role implicitly grants access to all Kibana reporting features, " + "with each user having access only to their own reports. Note that reporting users should also be assigned " - + "additional roles that grant read access to the indices that will be used to generate reports." + + "additional roles that grant read access to Kibana, and the indices that will be used to generate reports." ) ), entry(KibanaSystemUser.ROLE_NAME, kibanaSystemRoleDescriptor(KibanaSystemUser.ROLE_NAME)), diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java index 34dd684e83166..acf4c423edf04 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java @@ -2769,20 +2769,11 @@ public void testReportingUserRole() { RoleDescriptor roleDescriptor = ReservedRolesStore.roleDescriptor("reporting_user"); assertNotNull(roleDescriptor); assertThat(roleDescriptor.getMetadata(), hasEntry("_reserved", true)); + assertThat(roleDescriptor.getMetadata(), hasEntry("_deprecated", true)); final String applicationName = "kibana-.kibana"; - final Set applicationPrivilegeNames = Set.of( - "feature_discover.minimal_read", - "feature_discover.generate_report", - "feature_dashboard.minimal_read", - "feature_dashboard.generate_report", - "feature_dashboard.download_csv_report", - "feature_canvas.minimal_read", - "feature_canvas.generate_report", - "feature_visualize.minimal_read", - "feature_visualize.generate_report" - ); + final Set applicationPrivilegeNames = Set.of("reserved_reporting_user"); final Set allowedApplicationActionPatterns = Set.of( "login:", From 3d141fecac8bf8e2390b74f7c1e260f9557360e7 Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Mon, 18 Aug 2025 08:49:25 -0700 Subject: [PATCH 25/33] Expose APIs needed by flush during translog replay (#132960) --- .../java/org/elasticsearch/index/engine/InternalEngine.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java index db4686bb4faf1..06b94192f0f2a 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java @@ -620,6 +620,10 @@ public void skipTranslogRecovery() { pendingTranslogRecovery.set(false); // we are good - now we can commit } + protected boolean pendingTranslogRecovery() { + return pendingTranslogRecovery.get(); + } + private void recoverFromTranslogInternal( TranslogRecoveryRunner translogRecoveryRunner, long recoverUpToSeqNo, @@ -3133,7 +3137,7 @@ protected Map getCommitExtraUserData(final long localCheckpoint) return Collections.emptyMap(); } - final void ensureCanFlush() { + protected void ensureCanFlush() { // translog recovery happens after the engine is fully constructed. // If we are in this stage we have to prevent flushes from this // engine otherwise we might loose documents if the flush succeeds From 0ba4a31957a36932cc8fff44ca7f916a1ac4667f Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Mon, 18 Aug 2025 17:18:51 +0100 Subject: [PATCH 26/33] Enable `exclude_source_vectors` by default for new indices (#131907) This commit sets `index.mapping.exclude_source_vectors` to `true` by default for newly created indices. When enabled, vector fields (`dense_vector`, `sparse_vector`, `rank_vector`) are excluded from `_source` on disk and are not returned in API responses unless explicitly requested. The change improves indexing performance, reduces storage size, and avoids unnecessary payload bloat in responses. Vector values continue to be rehydrated transparently for partial updates, reindex, and recovery. Existing indices are not affected and continue to store vectors in `_source` by default. --- docs/changelog/131907.yaml | 26 ++++++ .../mapping-reference/dense-vector.md | 91 +++++++++++++++++-- .../mapping-reference/rank-vectors.md | 74 ++++++++++++++- .../mapping-reference/sparse-vector.md | 82 +++++++++++++++-- .../reindex/ReindexBasicTests.java | 7 +- .../reindex/UpdateByQueryBasicTests.java | 4 +- rest-api-spec/build.gradle | 4 + .../test/get/100_synthetic_source.yml | 14 +++ .../240_source_synthetic_dense_vectors.yml | 4 +- .../250_source_synthetic_sparse_vectors.yml | 4 +- .../test/search.vectors/90_sparse_vector.yml | 52 ++++++++--- .../elasticsearch/search/query/ExistsIT.java | 6 +- .../common/settings/IndexScopedSettings.java | 6 +- .../elasticsearch/index/IndexSettings.java | 10 +- .../elasticsearch/index/IndexVersions.java | 1 + .../engine/TranslogOperationAsserter.java | 8 +- .../index/get/ShardGetService.java | 4 +- .../index/mapper/DocumentParser.java | 2 +- .../vectors/DenseVectorFieldMapper.java | 24 ++--- .../vectors/SparseVectorFieldMapper.java | 85 ++++------------- .../index/translog/Translog.java | 55 ++++++++--- .../action/search/SearchCapabilities.java | 8 +- .../vectors/DenseVectorFieldMapperTests.java | 17 +--- .../vectors/SparseVectorFieldMapperTests.java | 15 +-- .../SyntheticVectorFieldsRecoveryTests.java | 4 +- .../SyntheticVectorsMapperTestCase.java | 9 +- .../index/shard/ShardGetServiceTests.java | 15 ++- x-pack/plugin/build.gradle | 2 + .../ml/qa/ml-with-security/build.gradle | 2 +- .../ml/integration/TextEmbeddingQueryIT.java | 31 ++++++- .../ml/integration/TextExpansionQueryIT.java | 3 + ...rrf_retriever_search_api_compatibility.yml | 12 ++- .../xpack/rank/vectors/RankVectorsPlugin.java | 4 +- .../mapper/RankVectorsFieldMapper.java | 20 ++-- .../test/ml/sparse_vector_search.yml | 29 ++++-- .../rank_vectors_synthetic_vectors.yml | 4 +- 36 files changed, 503 insertions(+), 235 deletions(-) create mode 100644 docs/changelog/131907.yaml diff --git a/docs/changelog/131907.yaml b/docs/changelog/131907.yaml new file mode 100644 index 0000000000000..6e8db5dbb272f --- /dev/null +++ b/docs/changelog/131907.yaml @@ -0,0 +1,26 @@ +pr: 131907 +summary: Enable `exclude_source_vectors` by default for new indices +area: Vector Search +type: breaking +issues: [] +breaking: + title: Enable `exclude_source_vectors` by default for new indices + area: Search + details: |- + The `exclude_source_vectors` setting is now enabled by default for newly created indices. + This means that vector fields (e.g., `dense_vector`) are no longer stored in the `_source` field + by default, although they remain fully accessible through search and retrieval operations. + + Instead of being persisted in `_source`, vectors are now rehydrated on demand from the underlying + index structures when needed. This reduces index size and improves performance for typical vector + search workloads where the original vector values do not need to be part of the `_source`. + + If your use case requires vector fields to be stored in `_source`, you can disable this behavior by + setting `exclude_source_vectors: false` at index creation time. + impact: |- + Vector fields will no longer be stored in `_source` by default for new indices. Applications or tools + that expect to see vector fields in `_source` (for raw document inspection) + may need to be updated or configured to explicitly retain vectors using `exclude_source_vectors: false`. + + Retrieval of vector fields via search or the `_source` API remains fully supported. + notable: true diff --git a/docs/reference/elasticsearch/mapping-reference/dense-vector.md b/docs/reference/elasticsearch/mapping-reference/dense-vector.md index 2cd1613118edb..0dc84889d4799 100644 --- a/docs/reference/elasticsearch/mapping-reference/dense-vector.md +++ b/docs/reference/elasticsearch/mapping-reference/dense-vector.md @@ -102,6 +102,81 @@ PUT my-index-2 {{es}} uses the [HNSW algorithm](https://arxiv.org/abs/1603.09320) to support efficient kNN search. Like most kNN algorithms, HNSW is an approximate method that sacrifices result accuracy for improved speed. +## Accessing `dense_vector` fields in search responses +```{applies_to} +stack: ga 9.2 +serverless: ga +``` + +By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs. +This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output. + +To retrieve vector values explicitly, you can use: + +* The `fields` option to request specific vector fields directly: + +```console +POST my-index-2/_search +{ + "fields": ["my_vector"] +} +``` + +- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses: + +```console +POST my-index-2/_search +{ + "_source": { + "exclude_vectors": false + } +} +``` + +### Storage behavior and `_source` + +By default, `dense_vector` fields are **not stored in `_source`** on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`. +This setting is enabled by default for newly created indices and can only be set at index creation time. + +When enabled: + +* `dense_vector` fields are removed from `_source` and the rest of the `_source` is stored as usual. +* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format. + +This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed. + +### Rehydration and precision + +When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format. Internally, vectors are stored at float precision, so if they were originally indexed as higher-precision types (e.g., `double` or `long`), the rehydrated values will have reduced precision. This lossy representation is intended to save space while preserving search quality. + +### Storing original vectors in `_source` + +If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`: + +```console +PUT my-index-include-vectors +{ + "settings": { + "index.mapping.exclude_source_vectors": false + }, + "mappings": { + "properties": { + "my_vector": { + "type": "dense_vector" + } + } + } +} +``` + +When this setting is disabled: + +* `dense_vector` fields are stored as part of the `_source`, exactly as indexed. +* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage. +* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`. + +This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values. + ## Automatically quantize vectors for kNN search [dense-vector-quantization] The `dense_vector` type supports quantization to reduce the memory footprint required when [searching](docs-content://solutions/search/vector/knn.md#approximate-knn) `float` vectors. The three following quantization strategies are supported: @@ -266,16 +341,16 @@ $$$dense-vector-index-options$$$ `type` : (Required, string) The type of kNN algorithm to use. Can be either any of: * `hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) for scalable approximate kNN search. This supports all `element_type` values. - * `int8_hnsw` - The default index type for some float vectors: - - * {applies_to}`stack: ga 9.1` Default for float vectors with less than 384 dimensions. + * `int8_hnsw` - The default index type for some float vectors: + + * {applies_to}`stack: ga 9.1` Default for float vectors with less than 384 dimensions. * {applies_to}`stack: ga 9.0` Default for float all vectors. - + This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically scalar quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 4x at the cost of some accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization). * `int4_hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically scalar quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 8x at the cost of some accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization). * `bbq_hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically binary quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 32x at the cost of accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization). - - {applies_to}`stack: ga 9.1` `bbq_hnsw` is the default index type for float vectors with greater than or equal to 384 dimensions. + + {applies_to}`stack: ga 9.1` `bbq_hnsw` is the default index type for float vectors with greater than or equal to 384 dimensions. * `flat` - This utilizes a brute-force search algorithm for exact kNN search. This supports all `element_type` values. * `int8_flat` - This utilizes a brute-force search algorithm in addition to automatically scalar quantization. Only supports `element_type` of `float`. * `int4_flat` - This utilizes a brute-force search algorithm in addition to automatically half-byte scalar quantization. Only supports `element_type` of `float`. @@ -295,8 +370,8 @@ $$$dense-vector-index-options$$$ : (Optional, object) An optional section that configures automatic vector rescoring on knn queries for the given field. Only applicable to quantized index types. :::::{dropdown} Properties of rescore_vector `oversample` -: (required, float) The amount to oversample the search results by. This value should be one of the following: - * Greater than `1.0` and less than `10.0` +: (required, float) The amount to oversample the search results by. This value should be one of the following: + * Greater than `1.0` and less than `10.0` * Exactly `0` to indicate no oversampling and rescoring should occur {applies_to}`stack: ga 9.1` : The higher the value, the more vectors will be gathered and rescored with the raw values per shard. : In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead. diff --git a/docs/reference/elasticsearch/mapping-reference/rank-vectors.md b/docs/reference/elasticsearch/mapping-reference/rank-vectors.md index 20683e89b164c..2c22dd08ae1b0 100644 --- a/docs/reference/elasticsearch/mapping-reference/rank-vectors.md +++ b/docs/reference/elasticsearch/mapping-reference/rank-vectors.md @@ -108,11 +108,81 @@ $$$rank-vectors-element-type$$$ `dims` : (Optional, integer) Number of vector dimensions. Can’t exceed `4096`. If `dims` is not specified, it will be set to the length of the first vector added to the field. +## Accessing `dense_vector` fields in search responses +```{applies_to} +stack: ga 9.2 +serverless: ga +``` + +By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs. +This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output. + +To retrieve vector values explicitly, you can use: + +* The `fields` option to request specific vector fields directly: + +```console +POST my-index-2/_search +{ + "fields": ["my_vector"] +} +``` + +- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses: + +```console +POST my-index-2/_search +{ + "_source": { + "exclude_vectors": false + } +} +``` + +### Storage behavior and `_source` + +By default, `rank_vectors` fields are not stored in `_source` on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`. +This setting is enabled by default for newly created indices and can only be set at index creation time. + +When enabled: + +* `rank_vectors` fields are removed from `_source` and the rest of the `_source` is stored as usual. +* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format. + +This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed. + +### Rehydration and precision + +When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format. Internally, vectors are stored at float precision, so if they were originally indexed as higher-precision types (e.g., `double` or `long`), the rehydrated values will have reduced precision. This lossy representation is intended to save space while preserving search quality. + +### Storing original vectors in `_source` + +If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`: + +```console +PUT my-index-include-vectors +{ + "settings": { + "index.mapping.exclude_source_vectors": false + }, + "mappings": { + "properties": { + "my_vector": { + "type": "rank_vectors", + "dims": 128 + } + } + } +} +``` -## Synthetic `_source` [rank-vectors-synthetic-source] +When this setting is disabled: -`rank_vectors` fields support [synthetic `_source`](mapping-source-field.md#synthetic-source) . +* `rank_vectors` fields are stored as part of the `_source`, exactly as indexed. +* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage. +* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`. +This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values. ## Scoring with rank vectors [rank-vectors-scoring] diff --git a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md index 3a65ea4fc5ff8..5cb009ddcf302 100644 --- a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md +++ b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md @@ -57,12 +57,6 @@ See [semantic search with ELSER](docs-content://solutions/search/semantic-search The following parameters are accepted by `sparse_vector` fields: -[store](/reference/elasticsearch/mapping-reference/mapping-store.md) -: Indicates whether the field value should be stored and retrievable independently of the [_source](/reference/elasticsearch/mapping-reference/mapping-source-field.md) field. Accepted values: true or false (default). The field’s data is stored using term vectors, a disk-efficient structure compared to the original JSON input. The input map can be retrieved during a search request via the [`fields` parameter](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#search-fields-param). To benefit from reduced disk usage, you must either: - - * Exclude the field from [_source](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#source-filtering). - * Use [synthetic `_source`](/reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source). - index_options {applies_to}`stack: ga 9.1` : (Optional, object) You can set index options for your `sparse_vector` field to determine if you should prune tokens, and the parameter configurations for the token pruning. If pruning options are not set in your [`sparse_vector` query](/reference/query-languages/query-dsl/query-dsl-sparse-vector-query.md), Elasticsearch will use the default options configured for the field, if any. @@ -96,6 +90,82 @@ This ensures that: * The tokens that are kept are frequent enough and have significant scoring. * Very infrequent tokens that may not have as high of a score are removed. +## Accessing `dense_vector` fields in search responses +```{applies_to} +stack: ga 9.2 +serverless: ga +``` + +By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs. +This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output. + +To retrieve vector values explicitly, you can use: + +* The `fields` option to request specific vector fields directly: + +```console +POST my-index-2/_search +{ + "fields": ["my_vector"] +} +``` + +- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses: + +```console +POST my-index-2/_search +{ + "_source": { + "exclude_vectors": false + } +} +``` + +### Storage behavior and `_source` + +By default, `sparse_vector` fields are not stored in `_source` on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`. +This setting is enabled by default for newly created indices and can only be set at index creation time. + +When enabled: + +* `sparse_vector` fields are removed from `_source` and the rest of the `_source` is stored as usual. +* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format. + +This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed. + +### Rehydration and precision + +When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format. +Internally, vectors are stored as floats with 9 significant bits for the precision, so the rehydrated values will have reduced precision. +This lossy representation is intended to save space while preserving search quality. + +### Storing original vectors in `_source` + +If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`: + +```console +PUT my-index-include-vectors +{ + "settings": { + "index.mapping.exclude_source_vectors": false + }, + "mappings": { + "properties": { + "my_vector": { + "type": "sparse_vector" + } + } + } +} +``` + +When this setting is disabled: + +* `sparse_vector` fields are stored as part of the `_source`, exactly as indexed. +* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage. +* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`. + +This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values. ## Multi-value sparse vectors [index-multi-value-sparse-vectors] diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java index 96c7ef49f6956..92aa897bf6287 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java @@ -23,7 +23,6 @@ import java.util.Map; import java.util.stream.Collectors; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; import static org.elasticsearch.index.query.QueryBuilders.termQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; @@ -182,14 +181,13 @@ public void testReindexFromComplexDateMathIndexName() throws Exception { } public void testReindexIncludeVectors() throws Exception { - assumeTrue("This test requires synthetic vectors to be enabled", SYNTHETIC_VECTORS); var resp1 = prepareCreate("test").setSettings( - Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build() + Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build() ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get(); assertAcked(resp1); var resp2 = prepareCreate("test_reindex").setSettings( - Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build() + Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build() ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get(); assertAcked(resp2); @@ -237,5 +235,4 @@ public void testReindexIncludeVectors() throws Exception { searchResponse.decRef(); } } - } diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java index 33c80e9138d28..2ff1a258afb1d 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java @@ -24,7 +24,6 @@ import java.util.Map; import java.util.stream.Collectors; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; import static org.elasticsearch.index.query.QueryBuilders.termQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; @@ -158,9 +157,8 @@ public void testMissingSources() { } public void testUpdateByQueryIncludeVectors() throws Exception { - assumeTrue("This test requires synthetic vectors to be enabled", SYNTHETIC_VECTORS); var resp1 = prepareCreate("test").setSettings( - Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build() + Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build() ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get(); assertAcked(resp1); diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index f2142e8ba1c8d..c11259b202002 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -90,6 +90,10 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task -> task.skipTest("indices.create/21_synthetic_source_stored/field param - keep root array", "Synthetic source keep arrays now stores leaf arrays natively") task.skipTest("cluster.info/30_info_thread_pool/Cluster HTTP Info", "The search_throttled thread pool has been removed") task.skipTest("synonyms/80_synonyms_from_index/Fail loading synonyms from index if synonyms_set doesn't exist", "Synonyms do no longer fail if the synonyms_set doesn't exist") + task.skipTest("get/100_synthetic_source/indexed dense vectors", "Vectors are not returned by default") + task.skipTest("get/100_synthetic_source/non-indexed dense vectors", "Vectors are not returned by default") + task.skipTest("search.vectors/90_sparse_vector/stored sparse_vector synthetic source", "Vectors are not returned by default") + task.skipTest("search.vectors/90_sparse_vector/sparse_vector synthetic source", "Vectors are not returned by default") task.skipTest("update/100_synthetic_source/keyword", "synthetic recovery source means _recovery_source field will not be present") task.skipTest("update/100_synthetic_source/stored text", "synthetic recovery source means _recovery_source field will not be present") }) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml index a0061272a2c23..91a1d1bf9ef40 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml @@ -427,6 +427,11 @@ indexed dense vectors: - requires: cluster_features: ["gte_v8.5.0"] reason: introduced in 8.5.0 + test_runner_features: [ capabilities ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_source_vectors_setting ] - do: indices.create: @@ -457,6 +462,8 @@ indexed dense vectors: get: index: test id: 1 + _source_exclude_vectors: false + - match: {_index: "test"} - match: {_id: "1"} - match: {_version: 1} @@ -472,6 +479,11 @@ non-indexed dense vectors: - requires: cluster_features: ["gte_v8.5.0"] reason: introduced in 8.5.0 + test_runner_features: [ capabilities ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_source_vectors_setting ] - do: indices.create: @@ -501,6 +513,8 @@ non-indexed dense vectors: get: index: test id: 1 + _source_exclude_vectors: false + - match: {_index: "test"} - match: {_id: "1"} - match: {_version: 1} diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml index 414f6cfdad645..68f8c868b4e7e 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml @@ -5,7 +5,7 @@ setup: capabilities: - method: GET path: /_search - capabilities: [ synthetic_vectors_setting ] + capabilities: [ exclude_source_vectors_setting ] - skip: features: "headers" @@ -13,8 +13,6 @@ setup: indices.create: index: test body: - settings: - index.mapping.synthetic_vectors: true mappings: properties: name: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml index 53f0cd33da7d3..8397b48866204 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml @@ -5,7 +5,7 @@ setup: capabilities: - method: GET path: /_search - capabilities: [ synthetic_vectors_setting ] + capabilities: [ exclude_source_vectors_setting ] - skip: features: "headers" @@ -13,8 +13,6 @@ setup: indices.create: index: test body: - settings: - index.mapping.synthetic_vectors: true mappings: properties: name: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml index 0b65a69bf500e..b521b2866f9c5 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml @@ -387,8 +387,13 @@ "sparse_vector synthetic source": - requires: - cluster_features: [ "mapper.source.mode_from_index_setting" ] + cluster_features: [ "mapper.source.mode_from_index_setting"] reason: "Source mode configured through index setting" + test_runner_features: [ capabilities, "close_to" ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_vectors_param, exclude_source_vectors_setting ] - do: indices.create: @@ -402,6 +407,18 @@ ml.tokens: type: sparse_vector + - do: + indices.create: + index: test_include_vectors + body: + settings: + index: + mapping.exclude_source_vectors: false + mappings: + properties: + ml.tokens: + type: sparse_vector + - match: { acknowledged: true } - do: @@ -421,7 +438,7 @@ - do: index: - index: test + index: test_include_vectors id: "2" body: ml: @@ -431,7 +448,7 @@ - do: index: - index: test + index: test_include_vectors id: "3" body: ml: @@ -446,20 +463,17 @@ get: index: test id: "1" + _source_exclude_vectors: false - - match: - _source: - ml: - tokens: - running: 2.4097164 - good: 2.170997 - run: 2.052153 - race: 1.4575411 - for: 1.1908325 + - close_to: { _source.ml.tokens.running: { value: 2.4097164, error: 0.01 } } + - close_to: { _source.ml.tokens.good: { value: 2.170997, error: 0.01 } } + - close_to: { _source.ml.tokens.run: { value: 2.052153, error: 0.01 } } + - close_to: { _source.ml.tokens.race: { value: 1.4575411, error: 0.01 } } + - close_to: { _source.ml.tokens.for: { value: 1.1908325, error: 0.01 } } - do: get: - index: test + index: test_include_vectors id: "2" - match: @@ -467,7 +481,7 @@ - do: get: - index: test + index: test_include_vectors id: "3" - match: @@ -527,8 +541,14 @@ "stored sparse_vector synthetic source": - requires: - cluster_features: [ "mapper.source.mode_from_index_setting", "mapper.sparse_vector.store_support" ] reason: "sparse_vector supports store parameter" + cluster_features: [ "mapper.source.mode_from_index_setting", "mapper.sparse_vector.store_support" ] + test_runner_features: [ capabilities, "close_to" ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_vectors_param ] + - do: indices.create: @@ -567,6 +587,8 @@ search: index: test body: + _source: + exclude_vectors: false fields: [ "ml.tokens" ] - match: diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/ExistsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/ExistsIT.java index 26b040e2309c2..bb48e6c70b4a4 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/query/ExistsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/query/ExistsIT.java @@ -88,9 +88,9 @@ public void testExists() throws Exception { // object fields singletonMap("bar", barObject), singletonMap("bar", singletonMap("baz", 42)), - // sparse_vector field empty - singletonMap("vec", emptyMap()), - // sparse_vector field non-empty + // sparse_vector field + singletonMap("vec", singletonMap("6", 100)), + // sparse_vector field singletonMap("vec", singletonMap("1", 100)), // empty doc emptyMap() }; diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 9f4c5b80ccf23..93ddb5d3fc485 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -49,8 +49,6 @@ import java.util.Map; import java.util.Set; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; - /** * Encapsulates all valid index level settings. * @see Property#IndexScope @@ -243,9 +241,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { if (IndexSettings.DOC_VALUES_SKIPPER) { settings.add(IndexSettings.USE_DOC_VALUES_SKIPPER); } - if (SYNTHETIC_VECTORS) { - settings.add(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING); - } + settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING); BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings); }; diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index cd78d4323f44b..a6335ca6666b0 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -848,12 +848,12 @@ private static String getIgnoreAboveDefaultValue(final Settings settings) { Property.Final ); - public static final boolean SYNTHETIC_VECTORS = new FeatureFlag("mapping_synthetic_vectors").isEnabled(); - public static final Setting INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING = Setting.boolSetting( - "index.mapping.synthetic_vectors", - false, + public static final Setting INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING = Setting.boolSetting( + "index.mapping.exclude_source_vectors", + settings -> String.valueOf(SETTING_INDEX_VERSION_CREATED.get(settings).onOrAfter(IndexVersions.EXCLUDE_SOURCE_VECTORS_DEFAULT)), Property.IndexScope, - Property.Final + Property.Final, + Property.ServerlessPublic ); private final Index index; diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 57fcc2bc763be..221bc9264b100 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -181,6 +181,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2); public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES = def(9_033_0_00, Version.LUCENE_10_2_2); public static final IndexVersion IGNORED_SOURCE_FIELDS_PER_ENTRY_WITH_FF = def(9_034_0_00, Version.LUCENE_10_2_2); + public static final IndexVersion EXCLUDE_SOURCE_VECTORS_DEFAULT = def(9_035_0_00, Version.LUCENE_10_2_2); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java b/server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java index 2cd3a9f755ffb..0898711aee809 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java +++ b/server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java @@ -41,8 +41,10 @@ public boolean assertSameIndexOperation(Translog.Index o1, Translog.Index o2) th if (engineConfig.getIndexSettings().isRecoverySourceSyntheticEnabled() || engineConfig.getMapperService().mappingLookup().inferenceFields().isEmpty() == false || engineConfig.getMapperService().mappingLookup().syntheticVectorFields().isEmpty() == false) { - return super.assertSameIndexOperation(synthesizeSource(engineConfig, o1), o2) - || super.assertSameIndexOperation(o1, synthesizeSource(engineConfig, o2)); + // for synthetic source and synthetic fields, we check that the resulting source map is equivalent + // since ordering might not be preserved. + return Translog.Index.equalsWithoutAutoGeneratedTimestamp(synthesizeSource(engineConfig, o1), o2, false) + || Translog.Index.equalsWithoutAutoGeneratedTimestamp(o1, synthesizeSource(engineConfig, o2), false); } return false; } @@ -99,6 +101,6 @@ static Translog.Snapshot newSnapshot(EngineConfig engineConfig, Translog.Index o } public boolean assertSameIndexOperation(Translog.Index o1, Translog.Index o2) throws IOException { - return Translog.Index.equalsWithoutAutoGeneratedTimestamp(o1, o2); + return Translog.Index.equalsWithoutAutoGeneratedTimestamp(o1, o2, true); } } diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java index ec0ad1acd917f..6688523bfe668 100644 --- a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -60,7 +60,7 @@ import java.util.function.Function; import java.util.stream.Collectors; -import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING; +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; @@ -418,7 +418,7 @@ private GetResult innerGetFetch( */ public static boolean shouldExcludeVectorsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) { if (fetchSourceContext == null || fetchSourceContext.excludeVectors() == null) { - return INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(indexSettings.getSettings()); + return INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(indexSettings.getSettings()); } return fetchSourceContext.excludeVectors(); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 1a248f2dd501e..87017a24765dc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -794,7 +794,7 @@ private static void postProcessDynamicArrayMapping(DocumentParserContext context DenseVectorFieldMapper.Builder builder = new DenseVectorFieldMapper.Builder( fieldName, context.indexSettings().getIndexVersionCreated(), - IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(context.indexSettings().getSettings()) + IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(context.indexSettings().getSettings()) ); builder.dimensions(mappers.size()); DenseVectorFieldMapper denseVectorFieldMapper = builder.build(builderContext); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index cde64f54c80d5..9c5d28a4942d3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -117,7 +117,7 @@ import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_INDEX_VERSION_CREATED; import static org.elasticsearch.common.Strings.format; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; -import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING; +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MAX_VECTORS_PER_CLUSTER; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MIN_VECTORS_PER_CLUSTER; @@ -255,9 +255,9 @@ public static class Builder extends FieldMapper.Builder { private final Parameter> meta = Parameter.metaParam(); final IndexVersion indexVersionCreated; - final boolean isSyntheticVector; + final boolean isExcludeSourceVectors; - public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheticVector) { + public Builder(String name, IndexVersion indexVersionCreated, boolean isExcludeSourceVectors) { super(name); this.indexVersionCreated = indexVersionCreated; // This is defined as updatable because it can be updated once, from [null] to a valid dim size, @@ -289,7 +289,7 @@ public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheti } } }); - this.isSyntheticVector = isSyntheticVector; + this.isExcludeSourceVectors = isExcludeSourceVectors; final boolean indexedByDefault = indexVersionCreated.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION); final boolean defaultInt8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW); final boolean defaultBBQ8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW); @@ -431,7 +431,7 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { // Validate again here because the dimensions or element type could have been set programmatically, // which affects index option validity validate(); - boolean isSyntheticVectorFinal = (context.isSourceSynthetic() == false) && indexed.getValue() && isSyntheticVector; + boolean isExcludeSourceVectorsFinal = context.isSourceSynthetic() == false && indexed.getValue() && isExcludeSourceVectors; return new DenseVectorFieldMapper( leafName(), new DenseVectorFieldType( @@ -448,7 +448,7 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { builderParams(this, context), indexOptions.getValue(), indexVersionCreated, - isSyntheticVectorFinal + isExcludeSourceVectorsFinal ); } } @@ -2391,7 +2391,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws (n, c) -> new Builder( n, c.getIndexSettings().getIndexVersionCreated(), - INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) + INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) ), notInMultiFields(CONTENT_TYPE) ); @@ -2850,7 +2850,7 @@ public List fetchValues(Source source, int doc, List ignoredValu private final DenseVectorIndexOptions indexOptions; private final IndexVersion indexCreatedVersion; - private final boolean isSyntheticVector; + private final boolean isExcludeSourceVectors; private DenseVectorFieldMapper( String simpleName, @@ -2858,12 +2858,12 @@ private DenseVectorFieldMapper( BuilderParams params, DenseVectorIndexOptions indexOptions, IndexVersion indexCreatedVersion, - boolean isSyntheticVector + boolean isExcludeSourceVectorsFinal ) { super(simpleName, mappedFieldType, params); this.indexOptions = indexOptions; this.indexCreatedVersion = indexCreatedVersion; - this.isSyntheticVector = isSyntheticVector; + this.isExcludeSourceVectors = isExcludeSourceVectorsFinal; } @Override @@ -2985,7 +2985,7 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, isSyntheticVector).init(this); + return new Builder(leafName(), indexCreatedVersion, isExcludeSourceVectors).init(this); } private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Object propNode, IndexVersion indexVersion) { @@ -3041,7 +3041,7 @@ public String toString() { @Override public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() { - if (isSyntheticVector) { + if (isExcludeSourceVectors) { var syntheticField = new IndexedSyntheticFieldLoader(indexCreatedVersion, fieldType().similarity); return new SyntheticVectorsPatchFieldLoader(syntheticField, syntheticField::copyVectorAsList); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index a91c84405b295..ef76540525898 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -12,11 +12,8 @@ import org.apache.lucene.document.FeatureField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.TermVectors; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -42,8 +39,6 @@ import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.inference.WeightedToken; import org.elasticsearch.inference.WeightedTokensUtils; -import org.elasticsearch.search.fetch.StoredFieldsSpec; -import org.elasticsearch.search.lookup.Source; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.DeprecationHandler; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -62,7 +57,7 @@ import java.util.Objects; import java.util.stream.Stream; -import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING; +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; import static org.elasticsearch.index.query.AbstractQueryBuilder.DEFAULT_BOOST; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; @@ -94,8 +89,7 @@ private static SparseVectorFieldMapper toType(FieldMapper in) { public static class Builder extends FieldMapper.Builder { private final IndexVersion indexVersionCreated; - - private final Parameter stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false); + private final Parameter stored; private final Parameter> meta = Parameter.metaParam(); private final Parameter indexOptions = new Parameter<>( SPARSE_VECTOR_INDEX_OPTIONS, @@ -107,12 +101,13 @@ public static class Builder extends FieldMapper.Builder { Objects::toString ).acceptsNull().setSerializerCheck(this::indexOptionsSerializerCheck); - private boolean isSyntheticVector; + private final boolean isExcludeSourceVectors; - public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheticVector) { + public Builder(String name, IndexVersion indexVersionCreated, boolean isExcludeSourceVectors) { super(name); + this.stored = Parameter.boolParam("store", false, m -> toType(m).fieldType().isStored(), () -> isExcludeSourceVectors); this.indexVersionCreated = indexVersionCreated; - this.isSyntheticVector = isSyntheticVector; + this.isExcludeSourceVectors = isExcludeSourceVectors; } public Builder setStored(boolean value) { @@ -132,19 +127,18 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) { builderIndexOptions = SparseVectorIndexOptions.getDefaultIndexOptions(indexVersionCreated); } - final boolean syntheticVectorFinal = context.isSourceSynthetic() == false && isSyntheticVector; - final boolean storedFinal = stored.getValue() || syntheticVectorFinal; + final boolean isExcludeSourceVectorsFinal = isExcludeSourceVectors && context.isSourceSynthetic() == false && stored.get(); return new SparseVectorFieldMapper( leafName(), new SparseVectorFieldType( indexVersionCreated, context.buildFullName(leafName()), - storedFinal, + stored.get(), meta.getValue(), builderIndexOptions ), builderParams(this, context), - syntheticVectorFinal + isExcludeSourceVectorsFinal ); } @@ -206,7 +200,7 @@ private static SparseVectorIndexOptions parseIndexOptions(MappingParserContext c return new Builder( n, c.indexVersionCreated(), - INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) + INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) ); }, notInMultiFields(CONTENT_TYPE)); @@ -251,9 +245,6 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - if (isStored()) { - return new SparseVectorValueFetcher(name()); - } return SourceValueFetcher.identity(name(), context, format); } @@ -313,16 +304,17 @@ private static String indexedValueForSearch(Object value) { } } - private final boolean isSyntheticVector; + private final boolean isExcludeSourceVectors; private SparseVectorFieldMapper( String simpleName, MappedFieldType mappedFieldType, BuilderParams builderParams, - boolean isSyntheticVector + boolean isExcludeSourceVectors ) { super(simpleName, mappedFieldType, builderParams); - this.isSyntheticVector = isSyntheticVector; + assert isExcludeSourceVectors == false || fieldType().isStored(); + this.isExcludeSourceVectors = isExcludeSourceVectors; } @Override @@ -335,7 +327,7 @@ protected SyntheticSourceSupport syntheticSourceSupport() { @Override public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() { - if (isSyntheticVector) { + if (isExcludeSourceVectors) { var syntheticField = new SparseVectorSyntheticFieldLoader(fullPath(), leafName()); return new SyntheticVectorsPatchFieldLoader(syntheticField, syntheticField::copyAsMap); } @@ -349,7 +341,7 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), this.fieldType().indexVersionCreated, this.isSyntheticVector).init(this); + return new Builder(leafName(), this.fieldType().indexVersionCreated, this.isExcludeSourceVectors).init(this); } @Override @@ -433,51 +425,6 @@ private static boolean indexVersionSupportsDefaultPruningConfig(IndexVersion ind || indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)); } - private static class SparseVectorValueFetcher implements ValueFetcher { - private final String fieldName; - private TermVectors termVectors; - - private SparseVectorValueFetcher(String fieldName) { - this.fieldName = fieldName; - } - - @Override - public void setNextReader(LeafReaderContext context) { - try { - termVectors = context.reader().termVectors(); - } catch (IOException exc) { - throw new UncheckedIOException(exc); - } - } - - @Override - public List fetchValues(Source source, int doc, List ignoredValues) throws IOException { - if (termVectors == null) { - return List.of(); - } - var terms = termVectors.get(doc, fieldName); - if (terms == null) { - return List.of(); - } - - var termsEnum = terms.iterator(); - PostingsEnum postingsScratch = null; - Map result = new LinkedHashMap<>(); - while (termsEnum.next() != null) { - postingsScratch = termsEnum.postings(postingsScratch); - postingsScratch.nextDoc(); - result.put(termsEnum.term().utf8ToString(), XFeatureField.decodeFeatureValue(postingsScratch.freq())); - assert postingsScratch.nextDoc() == DocIdSetIterator.NO_MORE_DOCS; - } - return List.of(result); - } - - @Override - public StoredFieldsSpec storedFieldsSpec() { - return StoredFieldsSpec.NO_REQUIREMENTS; - } - } - private static class SparseVectorSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader { private final String fullPath; private final String leafName; diff --git a/server/src/main/java/org/elasticsearch/index/translog/Translog.java b/server/src/main/java/org/elasticsearch/index/translog/Translog.java index b1a203616b120..6e83a684cfa82 100644 --- a/server/src/main/java/org/elasticsearch/index/translog/Translog.java +++ b/server/src/main/java/org/elasticsearch/index/translog/Translog.java @@ -22,6 +22,7 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.IOUtils; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; @@ -35,6 +36,8 @@ import org.elasticsearch.index.shard.AbstractIndexShardComponent; import org.elasticsearch.index.shard.IndexShardComponent; import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.search.lookup.Source; +import org.elasticsearch.xcontent.XContentParserConfiguration; import java.io.Closeable; import java.io.EOFException; @@ -1226,9 +1229,9 @@ public Type opType() { @Override public long estimateSize() { return (2 * id.length()) + source.length() + (routing != null ? 2 * routing.length() : 0) + (4 * Long.BYTES); // timestamp, - // seq_no, - // primary_term, - // and version + // seq_no, + // primary_term, + // and version } public String id() { @@ -1275,7 +1278,7 @@ public boolean equals(Object o) { } Index other = (Index) o; - return autoGeneratedIdTimestamp == other.autoGeneratedIdTimestamp && equalsWithoutAutoGeneratedTimestamp(this, other); + return autoGeneratedIdTimestamp == other.autoGeneratedIdTimestamp && equalsWithoutAutoGeneratedTimestamp(this, other, true); } @Override @@ -1311,15 +1314,43 @@ public long getAutoGeneratedIdTimestamp() { return autoGeneratedIdTimestamp; } - public static boolean equalsWithoutAutoGeneratedTimestamp(Translog.Index o1, Translog.Index o2) { - return o1.version == o2.version - && o1.seqNo == o2.seqNo - && o1.primaryTerm == o2.primaryTerm - && o1.id.equals(o2.id) - && o1.source.equals(o2.source) - && Objects.equals(o1.routing, o2.routing); - } + public static boolean equalsWithoutAutoGeneratedTimestamp(Translog.Index o1, Translog.Index o2, boolean checkSourceBytes) { + if (o1.version != o2.version + || o1.seqNo != o2.seqNo + || o1.primaryTerm != o2.primaryTerm + || o1.id.equals(o2.id) == false + || Objects.equals(o1.routing, o2.routing) == false) { + return false; + } + + if (checkSourceBytes) { + return o1.source.equals(o2.source); + } + var s1 = Source.fromBytes(o1.source); + var s2 = Source.fromBytes(o2.source); + try ( + var actualParser = XContentHelper.createParserNotCompressed( + XContentParserConfiguration.EMPTY, + s1.internalSourceRef(), + s1.sourceContentType() + ) + ) { + var actualMap = actualParser.map(); + try ( + var expectedParser = XContentHelper.createParserNotCompressed( + XContentParserConfiguration.EMPTY, + s2.internalSourceRef(), + s2.sourceContentType() + ) + ) { + var expectedMap = expectedParser.map(); + return expectedMap.equals(actualMap); + } + } catch (IOException exc) { + return false; + } + } } public static final class Delete extends Operation { diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java index 6b44b787d1dbf..82410a76a8c75 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java @@ -12,8 +12,6 @@ import java.util.HashSet; import java.util.Set; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; - /** * A {@link Set} of "capabilities" supported by the {@link RestSearchAction}. */ @@ -55,10 +53,10 @@ private SearchCapabilities() {} private static final String EXCLUDE_VECTORS_PARAM = "exclude_vectors_param"; private static final String DENSE_VECTOR_UPDATABLE_BBQ = "dense_vector_updatable_bbq"; private static final String FIELD_EXISTS_QUERY_FOR_TEXT_FIELDS_NO_INDEX_OR_DV = "field_exists_query_for_text_fields_no_index_or_dv"; - private static final String SYNTHETIC_VECTORS_SETTING = "synthetic_vectors_setting"; private static final String UPDATE_FIELD_TO_BBQ_DISK = "update_field_to_bbq_disk"; private static final String KNN_FILTER_ON_NESTED_FIELDS_CAPABILITY = "knn_filter_on_nested_fields"; private static final String BUCKET_SCRIPT_PARENT_MULTI_BUCKET_ERROR = "bucket_script_parent_multi_bucket_error"; + private static final String EXCLUDE_SOURCE_VECTORS_SETTING = "exclude_source_vectors_setting"; public static final Set CAPABILITIES; static { @@ -86,9 +84,7 @@ private SearchCapabilities() {} capabilities.add(UPDATE_FIELD_TO_BBQ_DISK); capabilities.add(KNN_FILTER_ON_NESTED_FIELDS_CAPABILITY); capabilities.add(BUCKET_SCRIPT_PARENT_MULTI_BUCKET_ERROR); - if (SYNTHETIC_VECTORS) { - capabilities.add(SYNTHETIC_VECTORS_SETTING); - } + capabilities.add(EXCLUDE_SOURCE_VECTORS_SETTING); CAPABILITIES = Set.copyOf(capabilities); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 09d1ad47a1083..ad5f5bdc1eedb 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -2479,22 +2479,7 @@ protected Object generateRandomInputValue(MappedFieldType ft) { DenseVectorFieldType vectorFieldType = (DenseVectorFieldType) ft; return switch (vectorFieldType.getElementType()) { case BYTE -> randomByteArrayOfLength(vectorFieldType.getVectorDimensions()); - case FLOAT -> { - float[] floats = new float[vectorFieldType.getVectorDimensions()]; - float magnitude = 0; - for (int i = 0; i < floats.length; i++) { - float f = randomFloat(); - floats[i] = f; - magnitude += f * f; - } - magnitude = (float) Math.sqrt(magnitude); - if (VectorSimilarity.DOT_PRODUCT.equals(vectorFieldType.getSimilarity())) { - for (int i = 0; i < floats.length; i++) { - floats[i] /= magnitude; - } - } - yield floats; - } + case FLOAT -> randomNormalizedVector(vectorFieldType.getVectorDimensions()); case BIT -> randomByteArrayOfLength(vectorFieldType.getVectorDimensions() / 8); }; } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index 3d8eb1b454a56..45fe7a16048a6 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -118,7 +118,7 @@ protected void minimalFieldMappingPreviousIndexDefaultsIncluded(XContentBuilder protected void minimalMappingWithExplicitDefaults(XContentBuilder b) throws IOException { b.field("type", "sparse_vector"); - b.field("store", false); + b.field("store", true); b.startObject("meta"); b.endObject(); @@ -421,7 +421,7 @@ public void testStoreIsNotUpdateable() throws IOException { .startObject("properties") .startObject("field") .field("type", "sparse_vector") - .field("store", true) + .field("store", false) .endObject() .endObject() .endObject() @@ -474,23 +474,16 @@ protected boolean allowsNullValues() { @Override protected SyntheticSourceSupport syntheticSourceSupport(boolean syntheticSource) { - boolean withStore = randomBoolean(); return new SyntheticSourceSupport() { @Override public boolean preservesExactSource() { - return withStore == false; + return false; } @Override public SyntheticSourceExample example(int maxValues) { var sample = getSampleValueForDocument(); - return new SyntheticSourceExample(sample, sample, b -> { - if (withStore) { - minimalStoreMapping(b); - } else { - minimalMapping(b); - } - }); + return new SyntheticSourceExample(sample, sample, b -> { minimalMapping(b); }); } @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java index 138d138b741e5..3718dae1c7d3e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java @@ -38,7 +38,6 @@ import java.util.ArrayList; import java.util.List; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; import static org.hamcrest.Matchers.equalTo; public class SyntheticVectorFieldsRecoveryTests extends EngineTestCase { @@ -69,7 +68,7 @@ protected Settings indexSettings() { builder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), SourceFieldMapper.Mode.SYNTHETIC.name()); builder.put(IndexSettings.RECOVERY_USE_SYNTHETIC_SOURCE_SETTING.getKey(), useSyntheticRecovery); } - builder.put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true); + builder.put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true); return builder.build(); } @@ -113,7 +112,6 @@ protected String defaultMapping() { } public void testSnapshotRecovery() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); List expectedOperations = new ArrayList<>(); int size = randomIntBetween(10, 50); for (int i = 0; i < size; i++) { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorsMapperTestCase.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorsMapperTestCase.java index ebb4fe788fea3..f7a23383f4e92 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorsMapperTestCase.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorsMapperTestCase.java @@ -26,12 +26,10 @@ import java.io.IOException; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent; public abstract class SyntheticVectorsMapperTestCase extends MapperTestCase { public void testSyntheticVectorsMinimalValidDocument() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); for (XContentType type : XContentType.values()) { BytesReference source = generateRandomDoc(type, true, true, false, false, false); assertSyntheticVectors(buildVectorMapping(), source, type); @@ -39,7 +37,6 @@ public void testSyntheticVectorsMinimalValidDocument() throws IOException { } public void testSyntheticVectorsFullDocument() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); for (XContentType type : XContentType.values()) { BytesReference source = generateRandomDoc(type, true, true, true, true, false); assertSyntheticVectors(buildVectorMapping(), source, type); @@ -47,7 +44,6 @@ public void testSyntheticVectorsFullDocument() throws IOException { } public void testSyntheticVectorsWithUnmappedFields() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); for (XContentType type : XContentType.values()) { BytesReference source = generateRandomDoc(type, true, true, true, true, true); assertSyntheticVectors(buildVectorMapping(), source, type); @@ -55,7 +51,6 @@ public void testSyntheticVectorsWithUnmappedFields() throws IOException { } public void testSyntheticVectorsMissingRootFields() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); for (XContentType type : XContentType.values()) { BytesReference source = generateRandomDoc(type, false, false, false, false, false); assertSyntheticVectors(buildVectorMapping(), source, type); @@ -63,7 +58,6 @@ public void testSyntheticVectorsMissingRootFields() throws IOException { } public void testSyntheticVectorsPartialNestedContent() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); for (XContentType type : XContentType.values()) { BytesReference source = generateRandomDoc(type, true, true, true, false, false); assertSyntheticVectors(buildVectorMapping(), source, type); @@ -71,7 +65,6 @@ public void testSyntheticVectorsPartialNestedContent() throws IOException { } public void testFlatPathDocument() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); for (XContentType type : XContentType.values()) { BytesReference source = generateRandomDocWithFlatPath(type); assertSyntheticVectors(buildVectorMapping(), source, type); @@ -248,7 +241,7 @@ private BytesReference generateRandomDocWithFlatPath(XContentType xContentType) } private void assertSyntheticVectors(String mapping, BytesReference source, XContentType xContentType) throws IOException { - var settings = Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build(); + var settings = Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build(); MapperService mapperService = createMapperService(settings, mapping); var parsedDoc = mapperService.documentMapper().parse(new SourceToParse("0", source, xContentType)); try (var directory = newDirectory()) { diff --git a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java index 41f70541dbf1b..385f333f0b020 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java @@ -36,7 +36,6 @@ import java.util.Arrays; import java.util.function.LongSupplier; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; import static org.hamcrest.Matchers.equalTo; @@ -138,11 +137,19 @@ public void testGetFromTranslogWithDenseVector() throws IOException { "foo": "foo" } """, Arrays.toString(vector)); - runGetFromTranslogWithOptions(docToIndex, "\"enabled\": true", null, docToIndex, "\"text\"", "foo", "\"dense_vector\"", false); + runGetFromTranslogWithOptions( + docToIndex, + "\"enabled\": true", + Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), false).build(), + docToIndex, + "\"text\"", + "foo", + "\"dense_vector\"", + false + ); } public void testGetFromTranslogWithSyntheticVector() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); float[] vector = new float[2048]; for (int i = 0; i < vector.length; i++) { vector[i] = randomByte(); @@ -156,7 +163,7 @@ public void testGetFromTranslogWithSyntheticVector() throws IOException { runGetFromTranslogWithOptions( docToIndex, "\"enabled\": true", - Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build(), + Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build(), docToIndex, "\"text\"", "foo", diff --git a/x-pack/plugin/build.gradle b/x-pack/plugin/build.gradle index 7057fc41d834c..1e89582ba87e9 100644 --- a/x-pack/plugin/build.gradle +++ b/x-pack/plugin/build.gradle @@ -139,6 +139,8 @@ tasks.named("yamlRestCompatTestTransform").configure({ task -> task.skipTest("esql/192_lookup_join_on_aliases/alias-pattern-multiple", "Error message changed") task.skipTest("esql/192_lookup_join_on_aliases/fails when alias or pattern resolves to multiple", "Error message changed") task.skipTest("esql/10_basic/Test wrong LIMIT parameter", "Error message changed") + task.skipTest("ml/sparse_vector_search/Search on a sparse_vector field with dots in the field names", "Vectors are no longer returned by default") + task.skipTest("ml/sparse_vector_search/Search on a nested sparse_vector field with dots in the field names and conflicting child fields", "Vectors are no longer returned by default") task.skipTest("esql/190_lookup_join/lookup-no-key-only-key", "Requires the fix") }) diff --git a/x-pack/plugin/ml/qa/ml-with-security/build.gradle b/x-pack/plugin/ml/qa/ml-with-security/build.gradle index d18f6da13cad2..97407b882651b 100644 --- a/x-pack/plugin/ml/qa/ml-with-security/build.gradle +++ b/x-pack/plugin/ml/qa/ml-with-security/build.gradle @@ -8,7 +8,7 @@ dependencies { // bring in machine learning rest test suite restResources { restApi { - include '_common', 'cluster', 'nodes', 'indices', 'index', 'search', 'get', 'count', 'ingest', 'bulk', 'ml', 'cat' + include '_common', 'capabilities', 'cluster', 'nodes', 'indices', 'index', 'search', 'get', 'count', 'ingest', 'bulk', 'ml', 'cat' } restTests { includeXpack 'ml' diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextEmbeddingQueryIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextEmbeddingQueryIT.java index 620819a8898dd..e21fc7f9be1ab 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextEmbeddingQueryIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextEmbeddingQueryIT.java @@ -21,6 +21,7 @@ import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; /** @@ -100,6 +101,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { private static final String TOP_LEVEL_KNN_TEMPLATE = """ { + "_source": { + "exclude_vectors": false + }, "knn": { "field": "%s", "k": 5, @@ -114,6 +118,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { }"""; private static final String TOP_LEVEL_KNN_FILTER_TEMPLATE = """ { + "_source": { + "exclude_vectors": false + }, "knn": { "field": "%s", "k": 5, @@ -129,6 +136,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { }"""; private static final String TOP_LEVEL_KNN_HYBRID_ALL = """ { + "_source": { + "exclude_vectors": false + }, "knn": { "field": "embedding", "k": 3, @@ -146,6 +156,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { }"""; private static final String TOP_LEVEL_KNN_HYBRID_MATCH = """ { + "_source": { + "exclude_vectors": false + }, "knn": { "field": "embedding", "k": 3, @@ -163,6 +176,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { private static final String QUERY_DSL_KNN_TEMPLATE = """ { + "_source": { + "exclude_vectors": false + }, "query": { "knn" : { "field": "%s", @@ -178,6 +194,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { }"""; private static final String QUERY_DSL_KNN_FILTER_TEMPLATE = """ { + "_source": { + "exclude_vectors": false + }, "query": { "knn" : { "field": "%s", @@ -194,6 +213,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { }"""; private static final String QUERY_DSL_KNN_HYBRID_ALL = """ { + "_source": { + "exclude_vectors": false + }, "query": { "bool": { "should": [ @@ -220,6 +242,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { }"""; private static final String QUERY_DSL_KNN_HYBRID_MATCH = """ { + "_source": { + "exclude_vectors": false + }, "query": { "bool": { "should": [ @@ -554,7 +579,11 @@ public void testModelWithPrefixStrings() throws IOException { // The top hit should have the search prefix assertEquals(searchPrefix + "my words", sourceText); List foundEmbedding = (List) MapHelper.dig("_source.embedding", topHit); - assertEquals(embeddings.get(0), foundEmbedding); + var expectedEmbeddings = embeddings.get(0); + assertThat(foundEmbedding.size(), equalTo(expectedEmbeddings.size())); + for (int i = 0; i < foundEmbedding.size(); i++) { + assertEquals(expectedEmbeddings.get(i), foundEmbedding.get(i), 0.01f); + } } } diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextExpansionQueryIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextExpansionQueryIT.java index f1e8c9a67df44..58108b2c70b38 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextExpansionQueryIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextExpansionQueryIT.java @@ -276,6 +276,9 @@ protected Response textExpansionSearch(String index, String modelText, String mo request.setJsonEntity(Strings.format(""" { + "_source": { + "exclude_vectors": false + }, "query": { "text_expansion": { "%s": { diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml index 01d645fbfb4f5..89d4cb74a6210 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml @@ -980,19 +980,25 @@ setup: --- "rrf retriever with inner_hits for sub-retriever": + - skip: + features: [ "headers" ] - requires: capabilities: - method: POST path: /_search - capabilities: [ nested_retriever_inner_hits_support ] + capabilities: [ nested_retriever_inner_hits_support, exclude_source_vectors_setting ] test_runner_features: capabilities reason: "Support for propagating nested retrievers' inner hits to the top-level compound retriever is required" - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json search: - _source: false index: test body: + _source: + exclude_vectors: false retriever: rrf: retrievers: [ @@ -1058,7 +1064,7 @@ setup: - match: { hits.hits.0.inner_hits.nested_data_field.hits.total.value: 1 } - match: { hits.hits.0.inner_hits.nested_data_field.hits.hits.0.fields.nested_inner_hits.0.data.0: foo } - match: { hits.hits.0.inner_hits.nested_vector_field.hits.total.value: 1 } - - match: { hits.hits.0.inner_hits.nested_vector_field.hits.hits.0.fields.nested_inner_hits.0.paragraph_id: [ 1 ] } + - match: { hits.hits.0.inner_hits.nested_vector_field.hits.hits.0.fields.nested_inner_hits.0.paragraph_id: [ 1.0 ] } - match: { hits.hits.1.inner_hits.nested_data_field.hits.total.value: 3 } - match: { hits.hits.1.inner_hits.nested_data_field.hits.hits.0.fields.nested_inner_hits.0.data.0: bar } diff --git a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java index dd38367125692..cf302a4bebe86 100644 --- a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java +++ b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java @@ -20,7 +20,7 @@ import java.util.Map; -import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING; +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; import static org.elasticsearch.index.mapper.FieldMapper.notInMultiFields; import static org.elasticsearch.xpack.rank.vectors.mapper.RankVectorsFieldMapper.CONTENT_TYPE; @@ -41,7 +41,7 @@ public Map getMappers() { n, c.indexVersionCreated(), getLicenseState(), - INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) + INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) ); }, notInMultiFields(CONTENT_TYPE))); } diff --git a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java index a79fb4f304f6a..e2f314abc553f 100644 --- a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java +++ b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java @@ -113,13 +113,13 @@ public static class Builder extends FieldMapper.Builder { private final IndexVersion indexCreatedVersion; private final XPackLicenseState licenseState; - private final boolean isSyntheticVector; + private final boolean isExcludeSourceVectors; - public Builder(String name, IndexVersion indexCreatedVersion, XPackLicenseState licenseState, boolean isSyntheticVector) { + public Builder(String name, IndexVersion indexCreatedVersion, XPackLicenseState licenseState, boolean isExcludeSourceVectors) { super(name); this.indexCreatedVersion = indexCreatedVersion; this.licenseState = licenseState; - this.isSyntheticVector = isSyntheticVector; + this.isExcludeSourceVectors = isExcludeSourceVectors; } public Builder dimensions(int dimensions) { @@ -141,7 +141,7 @@ public RankVectorsFieldMapper build(MapperBuilderContext context) { // Validate again here because the dimensions or element type could have been set programmatically, // which affects index option validity validate(); - boolean isSyntheticVectorFinal = context.isSourceSynthetic() == false && isSyntheticVector; + boolean isExcludeSourceVectorsFinal = context.isSourceSynthetic() == false && isExcludeSourceVectors; return new RankVectorsFieldMapper( leafName(), new RankVectorsFieldType( @@ -154,7 +154,7 @@ public RankVectorsFieldMapper build(MapperBuilderContext context) { builderParams(this, context), indexCreatedVersion, licenseState, - isSyntheticVectorFinal + isExcludeSourceVectorsFinal ); } } @@ -252,7 +252,7 @@ DenseVectorFieldMapper.ElementType getElementType() { private final IndexVersion indexCreatedVersion; private final XPackLicenseState licenseState; - private final boolean isSyntheticVector; + private final boolean isExcludeSourceVectors; private RankVectorsFieldMapper( String simpleName, @@ -260,12 +260,12 @@ private RankVectorsFieldMapper( BuilderParams params, IndexVersion indexCreatedVersion, XPackLicenseState licenseState, - boolean isSyntheticVector + boolean isExcludeSourceVectors ) { super(simpleName, fieldType, params); this.indexCreatedVersion = indexCreatedVersion; this.licenseState = licenseState; - this.isSyntheticVector = isSyntheticVector; + this.isExcludeSourceVectors = isExcludeSourceVectors; } @Override @@ -396,7 +396,7 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, licenseState, isSyntheticVector).init(this); + return new Builder(leafName(), indexCreatedVersion, licenseState, isExcludeSourceVectors).init(this); } @Override @@ -406,7 +406,7 @@ protected SyntheticSourceSupport syntheticSourceSupport() { @Override public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() { - if (isSyntheticVector) { + if (isExcludeSourceVectors) { var syntheticField = new DocValuesSyntheticFieldLoader(); return new SyntheticVectorsPatchFieldLoader(syntheticField, syntheticField::copyVectorsAsList); } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml index 408ddd1ec50c6..83f62f4382431 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml @@ -372,6 +372,11 @@ teardown: - requires: cluster_features: [ "gte_v8.16.0" ] reason: dots in field names allowed starting in in 8.16.0 + test_runner_features: [ capabilities, "close_to" ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_source_vectors_setting ] - do: indices.create: @@ -409,17 +414,14 @@ teardown: get: index: index-with-sparse-vector2 id: "has-dots" + _source_exclude_vectors: false - - match: - _source: - ml: - tokens: - running: 2.4097164 - good: 2.170997 - run: 2.052153 - race: 1.4575411 - for: 1.1908325 - 5.0k: 2.489943 + - close_to: { _source.ml.tokens.running: { value: 2.4097164, error: 0.01 } } + - close_to: { _source.ml.tokens.good: { value: 2.170997, error: 0.01 } } + - close_to: { _source.ml.tokens.run: { value: 2.052153, error: 0.01 } } + - close_to: { _source.ml.tokens.race: { value: 1.4575411, error: 0.01 } } + - close_to: { _source.ml.tokens.for: { value: 1.1908325, error: 0.01 } } + - close_to: { _source.ml.tokens.5\\.0k: { value: 2.489943, error: 0.01 } } - do: search: @@ -439,6 +441,11 @@ teardown: - requires: cluster_features: [ "gte_v8.16.0" ] reason: dots in field names allowed starting in in 8.16.0 + test_runner_features: [ capabilities, "close_to" ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_source_vectors_setting ] - do: indices.create: @@ -479,6 +486,7 @@ teardown: get: index: index-with-sparse-vector3 id: "parent-foo" + _source_exclude_vectors: false - match: _source: @@ -491,6 +499,7 @@ teardown: get: index: index-with-sparse-vector3 id: "parent-foo-bar" + _source_exclude_vectors: false - match: _source: diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml index c0df9d6a79d38..b39325d5147a8 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml @@ -6,7 +6,7 @@ setup: capabilities: - method: GET path: /_search - capabilities: [ synthetic_vectors_setting ] + capabilities: [ exclude_source_vectors_setting ] - skip: features: "headers" @@ -14,8 +14,6 @@ setup: indices.create: index: test body: - settings: - index.mapping.synthetic_vectors: true mappings: properties: name: From ca2b79d283e813dbbd7de2f3d44c949d6a62195f Mon Sep 17 00:00:00 2001 From: Jack Conradson Date: Mon, 18 Aug 2025 09:32:30 -0700 Subject: [PATCH 27/33] migrate ml_rollover_legacy_indices transport version (#133008) --- .../src/main/java/org/elasticsearch/TransportVersions.java | 1 - .../definitions/named/ml_rollover_legacy_indices.csv | 1 + .../org/elasticsearch/xpack/ml/MlAnomaliesIndexUpdate.java | 5 +++-- .../java/org/elasticsearch/xpack/ml/MlIndexRollover.java | 5 +++-- 4 files changed, 7 insertions(+), 5 deletions(-) create mode 100644 server/src/main/resources/transport/definitions/named/ml_rollover_legacy_indices.csv diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 572407ef41ad5..5a026b6e1660b 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -132,7 +132,6 @@ static TransportVersion def(int id) { public static final TransportVersion ESQL_SKIP_ES_INDEX_SERIALIZATION = def(8_827_0_00); public static final TransportVersion ADD_INDEX_BLOCK_TWO_PHASE = def(8_828_0_00); public static final TransportVersion RESOLVE_CLUSTER_NO_INDEX_EXPRESSION = def(8_829_0_00); - public static final TransportVersion ML_ROLLOVER_LEGACY_INDICES = def(8_830_0_00); public static final TransportVersion ADD_INCLUDE_FAILURE_INDICES_OPTION = def(8_831_0_00); public static final TransportVersion ESQL_RESPONSE_PARTIAL = def(8_832_0_00); public static final TransportVersion RANK_DOC_OPTIONAL_METADATA_FOR_EXPLAIN = def(8_833_0_00); diff --git a/server/src/main/resources/transport/definitions/named/ml_rollover_legacy_indices.csv b/server/src/main/resources/transport/definitions/named/ml_rollover_legacy_indices.csv new file mode 100644 index 0000000000000..2b049c72d4950 --- /dev/null +++ b/server/src/main/resources/transport/definitions/named/ml_rollover_legacy_indices.csv @@ -0,0 +1 @@ +8830000 diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAnomaliesIndexUpdate.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAnomaliesIndexUpdate.java index 26e0246312e1c..fb20a22a258d5 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAnomaliesIndexUpdate.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAnomaliesIndexUpdate.java @@ -11,7 +11,6 @@ import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.ResourceAlreadyExistsException; import org.elasticsearch.TransportVersion; -import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest; import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequestBuilder; @@ -53,6 +52,8 @@ public class MlAnomaliesIndexUpdate implements MlAutoUpdateService.UpdateAction private static final Logger logger = LogManager.getLogger(MlAnomaliesIndexUpdate.class); + private static final TransportVersion ML_ROLLOVER_LEGACY_INDICES = TransportVersion.fromName("ml_rollover_legacy_indices"); + private final IndexNameExpressionResolver expressionResolver; private final OriginSettingClient client; @@ -65,7 +66,7 @@ public MlAnomaliesIndexUpdate(IndexNameExpressionResolver expressionResolver, Cl public boolean isMinTransportVersionSupported(TransportVersion minTransportVersion) { // Automatic rollover does not require any new features // but wait for all nodes to be upgraded anyway - return minTransportVersion.onOrAfter(TransportVersions.ML_ROLLOVER_LEGACY_INDICES); + return minTransportVersion.supports(ML_ROLLOVER_LEGACY_INDICES); } @Override diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlIndexRollover.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlIndexRollover.java index 05eefe174dbab..ba47fa5558cf9 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlIndexRollover.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlIndexRollover.java @@ -10,7 +10,6 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.TransportVersion; -import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.indices.rollover.RolloverRequest; import org.elasticsearch.action.support.IndicesOptions; @@ -44,6 +43,8 @@ public class MlIndexRollover implements MlAutoUpdateService.UpdateAction { private static final Logger logger = LogManager.getLogger(MlIndexRollover.class); + private static final TransportVersion ML_ROLLOVER_LEGACY_INDICES = TransportVersion.fromName("ml_rollover_legacy_indices"); + public record IndexPatternAndAlias(String indexPattern, String alias) {} private final IndexNameExpressionResolver expressionResolver; @@ -60,7 +61,7 @@ public MlIndexRollover(List indicesToRollover, IndexNameEx public boolean isMinTransportVersionSupported(TransportVersion minTransportVersion) { // Wait for all nodes to be upgraded to ensure that the // newly created index will be of the latest version. - return minTransportVersion.onOrAfter(TransportVersions.ML_ROLLOVER_LEGACY_INDICES); + return minTransportVersion.supports(ML_ROLLOVER_LEGACY_INDICES); } @Override From 09b6d9cbc894d759c5a597f1698634570e32da59 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Mon, 18 Aug 2025 10:57:12 -0700 Subject: [PATCH 28/33] address feedback --- .../TransportVersionResourcesService.java | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java index 5e234aa38ed7f..ad28610406538 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java @@ -32,6 +32,22 @@ import javax.inject.Inject; +/** + * An encapsulation of operations on transport version resources. + * + *

These are resource files to describe transport versions that will be loaded at Elasticsearch runtime. They exist + * as jar resource files at runtime, and as a directory of resources at build time. + * + *

The layout of the transport version resources are as follows: + *

    + *
  • /transport/definitions/named/ + * - Definitions that can be looked up by name. The name is the filename before the .csv suffix.
  • + *
  • /transport/definitions/unreferenced/ + * - Definitions which contain ids that are known at runtime, but cannot be looked up by name.
  • + *
  • /transport/latest/ + * - The latest transport version definition for each release branch.
  • + *
+ */ public abstract class TransportVersionResourcesService implements BuildService { public interface Parameters extends BuildServiceParameters { @@ -168,7 +184,7 @@ private Set getChangedResources() { return changedResources.get(); } - // Read a trasnport version resource from the main branch, or return null if it doesn't exist on main + // Read a transport version resource from the main branch, or return null if it doesn't exist on main private T getMainFile(String resourcePath, BiFunction parser) { if (getMainResources().contains(resourcePath) == false) { return null; From e6cec60ab2c9c267e3a01bdd800d07f6016cd76a Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Mon, 18 Aug 2025 11:01:45 -0700 Subject: [PATCH 29/33] reword --- .../internal/transport/TransportVersionResourcesService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java index ad28610406538..2dcd595e7fb8d 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java @@ -76,7 +76,7 @@ public TransportVersionResourcesService(Parameters params) { } /** - * Return the transport version resources directory for this repository. + * Return the directory for this repository which contains transport version resources. * This should be an input to any tasks reading resources from this service. */ Path getResourcesDir() { From 2874ff0a093c8b103e6312faffcd8acc13ab6974 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 18 Aug 2025 20:10:54 +0000 Subject: [PATCH 30/33] [CI] Auto commit changes from spotless --- .../transport/ValidateTransportVersionResourcesTask.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java index 98b62f3c8e94d..e8f474a72867c 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java @@ -68,7 +68,7 @@ public void validateTransportVersions() throws IOException { for (var definition : definitions.values()) { validateDefinition(definition, referencedNames); } - + for (var entry : idsByBase.entrySet()) { validateBase(entry.getKey(), entry.getValue()); } From e5f51010374bd023794a1d45bd6f806b129feca9 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Tue, 19 Aug 2025 09:46:00 -0700 Subject: [PATCH 31/33] fix --- .../TransportVersionResourcesPlugin.java | 5 ++-- .../TransportVersionResourcesService.java | 27 +++++++++---------- ...ValidateTransportVersionResourcesTask.java | 2 +- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java index 2647a188c638d..4804b810e1d59 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesPlugin.java @@ -31,10 +31,9 @@ public void apply(Project project) { project.getGradle() .getSharedServices() .registerIfAbsent("transportVersionResources", TransportVersionResourcesService.class, spec -> { - Directory transportResources = project.getLayout().getProjectDirectory().dir("src/main/resources/transport"); - spec.getParameters().getResourcesDirectory().set(transportResources); + Directory transportResources = project.getLayout().getProjectDirectory().dir("src/main/resources/" + resourceRoot); + spec.getParameters().getTransportResourcesDirectory().set(transportResources); spec.getParameters().getRootDirectory().set(project.getRootProject().getRootDir()); - spec.getParameters().getResourceRoot().set(resourceRoot); }); DependencyHandler depsHandler = project.getDependencies(); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java index 98385bf338222..2158f8620ef07 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java @@ -10,7 +10,6 @@ package org.elasticsearch.gradle.internal.transport; import org.gradle.api.file.DirectoryProperty; -import org.gradle.api.provider.Property; import org.gradle.api.services.BuildService; import org.gradle.api.services.BuildServiceParameters; import org.gradle.process.ExecOperations; @@ -52,11 +51,9 @@ public abstract class TransportVersionResourcesService implements BuildService { public interface Parameters extends BuildServiceParameters { - DirectoryProperty getResourcesDirectory(); + DirectoryProperty getTransportResourcesDirectory(); DirectoryProperty getRootDirectory(); - - Property getResourceRoot(); } @Inject @@ -67,14 +64,14 @@ public interface Parameters extends BuildServiceParameters { private static final Path UNREFERENCED_DIR = DEFINITIONS_DIR.resolve("unreferenced"); private static final Path LATEST_DIR = Path.of("latest"); - private final Path resourcesDir; + private final Path transportResourcesDir; private final Path rootDir; private final AtomicReference> mainResources = new AtomicReference<>(null); private final AtomicReference> changedResources = new AtomicReference<>(null); @Inject public TransportVersionResourcesService(Parameters params) { - this.resourcesDir = params.getResourcesDirectory().get().getAsFile().toPath(); + this.transportResourcesDir = params.getTransportResourcesDirectory().get().getAsFile().toPath(); this.rootDir = params.getRootDirectory().get().getAsFile().toPath(); } @@ -82,8 +79,8 @@ public TransportVersionResourcesService(Parameters params) { * Return the directory for this repository which contains transport version resources. * This should be an input to any tasks reading resources from this service. */ - Path getResourcesDir() { - return resourcesDir; + Path getTransportResourcesDir() { + return transportResourcesDir; } /** @@ -91,7 +88,7 @@ Path getResourcesDir() { * This should be an input to any tasks that only read definitions from this service. */ Path getDefinitionsDir() { - return resourcesDir.resolve(DEFINITIONS_DIR); + return transportResourcesDir.resolve(DEFINITIONS_DIR); } // return the path, relative to the resources dir, of a named definition @@ -104,7 +101,7 @@ Map getNamedDefinitions() throws IOException Map definitions = new HashMap<>(); // temporarily include unreferenced in named until validation understands the distinction for (var dir : List.of(NAMED_DIR, UNREFERENCED_DIR)) { - try (var definitionsStream = Files.list(resourcesDir.resolve(dir))) { + try (var definitionsStream = Files.list(transportResourcesDir.resolve(dir))) { for (var definitionFile : definitionsStream.toList()) { String contents = Files.readString(definitionFile, StandardCharsets.UTF_8).strip(); var definition = TransportVersionDefinition.fromString(definitionFile.getFileName().toString(), contents); @@ -123,18 +120,18 @@ TransportVersionDefinition getNamedDefinitionFromMain(String name) { /** Test whether the given named definition exists */ boolean namedDefinitionExists(String name) { - return Files.exists(resourcesDir.resolve(getNamedDefinitionRelativePath(name))); + return Files.exists(transportResourcesDir.resolve(getNamedDefinitionRelativePath(name))); } /** Return the path within the repository of the given named definition */ Path getRepositoryPath(TransportVersionDefinition definition) { - return rootDir.relativize(resourcesDir.resolve(getNamedDefinitionRelativePath(definition.name()))); + return rootDir.relativize(transportResourcesDir.resolve(getNamedDefinitionRelativePath(definition.name()))); } /** Read all latest files and return them mapped by their release branch */ Map getLatestByReleaseBranch() throws IOException { Map latests = new HashMap<>(); - try (var stream = Files.list(resourcesDir.resolve(LATEST_DIR))) { + try (var stream = Files.list(transportResourcesDir.resolve(LATEST_DIR))) { for (var latestFile : stream.toList()) { String contents = Files.readString(latestFile, StandardCharsets.UTF_8).strip(); var latest = TransportVersionLatest.fromString(latestFile.getFileName().toString(), contents); @@ -152,7 +149,7 @@ TransportVersionLatest getLatestFromMain(String releaseBranch) { /** Return the path within the repository of the given latest */ Path getRepositoryPath(TransportVersionLatest latest) { - return rootDir.relativize(resourcesDir.resolve(getLatestRelativePath(latest.branch()))); + return rootDir.relativize(transportResourcesDir.resolve(getLatestRelativePath(latest.branch()))); } private Path getLatestRelativePath(String releaseBranch) { @@ -201,7 +198,7 @@ private String gitCommand(String... args) { ByteArrayOutputStream stdout = new ByteArrayOutputStream(); List command = new ArrayList<>(); - Collections.addAll(command, "git", "-C", getResourcesDir().toString()); + Collections.addAll(command, "git", "-C", getTransportResourcesDir().toString()); Collections.addAll(command, args); ExecResult result = getExecOperations().exec(spec -> { diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java index e8f474a72867c..0b4f15eb5ccaa 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java @@ -44,7 +44,7 @@ public abstract class ValidateTransportVersionResourcesTask extends DefaultTask @Optional @PathSensitive(PathSensitivity.RELATIVE) public Path getResourcesDir() { - return getResources().get().getResourcesDir(); + return getResources().get().getTransportResourcesDir(); } @InputFiles From c67a16460088d2db8c5a1908b8b27e6d80043e80 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Tue, 19 Aug 2025 12:04:29 -0700 Subject: [PATCH 32/33] skip non-existing --- .../internal/transport/TransportVersionResourcesService.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java index 2158f8620ef07..8332f13477ce7 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java @@ -101,6 +101,9 @@ Map getNamedDefinitions() throws IOException Map definitions = new HashMap<>(); // temporarily include unreferenced in named until validation understands the distinction for (var dir : List.of(NAMED_DIR, UNREFERENCED_DIR)) { + if (Files.exists(dir) == false) { + continue; + } try (var definitionsStream = Files.list(transportResourcesDir.resolve(dir))) { for (var definitionFile : definitionsStream.toList()) { String contents = Files.readString(definitionFile, StandardCharsets.UTF_8).strip(); From ac9ce1b99e5712ce42e2493d218441b3a6e22de9 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Tue, 19 Aug 2025 12:52:51 -0700 Subject: [PATCH 33/33] use correct path --- .../internal/transport/TransportVersionResourcesService.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java index 8332f13477ce7..7101e8d9b8f18 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/TransportVersionResourcesService.java @@ -101,10 +101,11 @@ Map getNamedDefinitions() throws IOException Map definitions = new HashMap<>(); // temporarily include unreferenced in named until validation understands the distinction for (var dir : List.of(NAMED_DIR, UNREFERENCED_DIR)) { - if (Files.exists(dir) == false) { + Path path = transportResourcesDir.resolve(dir); + if (Files.isDirectory(path) == false) { continue; } - try (var definitionsStream = Files.list(transportResourcesDir.resolve(dir))) { + try (var definitionsStream = Files.list(path)) { for (var definitionFile : definitionsStream.toList()) { String contents = Files.readString(definitionFile, StandardCharsets.UTF_8).strip(); var definition = TransportVersionDefinition.fromString(definitionFile.getFileName().toString(), contents);