Skip to content

Commit

Permalink
Automatically map floats as dense vector (#98512)
Browse files Browse the repository at this point in the history
  • Loading branch information
kderusso committed Sep 6, 2023
1 parent 1925712 commit 258d0cb
Show file tree
Hide file tree
Showing 10 changed files with 729 additions and 40 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/98512.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 98512
summary: Automatically map float arrays of lengths 128 - 2048 as dense_vector
area: Application
type: feature
issues:
- 97532
8 changes: 6 additions & 2 deletions docs/reference/mapping/types/dense-vector.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ In many cases, a brute-force kNN search is not efficient enough. For this
reason, the `dense_vector` type supports indexing vectors into a specialized
data structure to support fast kNN retrieval through the <<search-api-knn, `knn` option>> in the search API

Unmapped array fields of float elements with size between 128 and 2048 are dynamically mapped as `dense_vector` with a default similariy of `cosine`.
You can override the default similarity by explicitly mapping the field as `dense_vector` with the desired similarity.

Indexing is enabled by default for dense vector fields.
When indexing is enabled, you can define the vector similarity to use in kNN search:

Expand Down Expand Up @@ -128,8 +131,9 @@ trade off of lower precision. Vectors using `byte` require dimensions with
integer values between -128 to 127, inclusive for both indexing and searching.

`dims`::
(Required, integer)
Number of vector dimensions. Can't exceed `2048`.
(Optional, integer)
Number of vector dimensions. Can't exceed `2048`. If `dims` is not specified,
it will be set to the length of the first vector added to the field.

`index`::
(Optional, Boolean)
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@

import static org.elasticsearch.index.mapper.MapperService.INDEX_MAPPING_NESTED_FIELDS_LIMIT_SETTING;
import static org.elasticsearch.index.mapper.MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
import static org.hamcrest.Matchers.containsString;
Expand Down Expand Up @@ -662,4 +663,32 @@ public void testSubobjectsFalse() throws Exception {
assertNotNull(properties.get("time.max"));
});
}

public void testKnnSubObject() throws Exception {
assertAcked(indicesAdmin().prepareCreate("test").setMapping("""
{
"properties": {
"obj": {
"type": "object",
"dynamic": "true"
},
"mapped_obj": {
"type": "object",
"dynamic": "true",
"properties": {
"vector": {
"type": "dense_vector"
}
}
}
}
}""").get());

client().index(new IndexRequest("test").source("mapped_obj.vector", Randomness.get().doubles(3, 0.0, 5.0).toArray())).get();

client().index(
new IndexRequest("test").source("obj.vector", Randomness.get().doubles(MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING, 0.0, 5.0).toArray())
).get();

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.plugins.internal.DocumentParsingObserver;
Expand All @@ -40,11 +41,16 @@
import java.util.function.Consumer;
import java.util.function.Supplier;

import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING;

/**
* A parser for documents
*/
public final class DocumentParser {

public static final IndexVersion DYNAMICALLY_MAP_DENSE_VECTORS_INDEX_VERSION = IndexVersion.V_8_11_0;

private final XContentParserConfiguration parserConfiguration;
private final Supplier<DocumentParsingObserver> documentParsingObserverSupplier;
private final MappingParserContext mappingParserContext;
Expand Down Expand Up @@ -244,9 +250,8 @@ static Mapping createDynamicUpdate(DocumentParserContext context) {
return null;
}
RootObjectMapper.Builder rootBuilder = context.updateRoot();
for (Mapper mapper : context.getDynamicMappers()) {
rootBuilder.addDynamic(mapper.name(), null, mapper, context);
}
context.getDynamicMappers().forEach(mapper -> rootBuilder.addDynamic(mapper.name(), null, mapper, context));

for (RuntimeField runtimeField : context.getDynamicRuntimeFields()) {
rootBuilder.addRuntimeField(runtimeField);
}
Expand Down Expand Up @@ -588,6 +593,33 @@ private static void parseNonDynamicArray(DocumentParserContext context, final St
parseValue(context, lastFieldName);
}
}
postProcessDynamicArrayMapping(context, lastFieldName);
}

/**
* Arrays that have been classified as floats and meet specific criteria are re-mapped to dense_vector.
*/
private static void postProcessDynamicArrayMapping(DocumentParserContext context, String fieldName) {
if (context.indexSettings().getIndexVersionCreated().onOrAfter(DYNAMICALLY_MAP_DENSE_VECTORS_INDEX_VERSION)) {
final MapperBuilderContext builderContext = context.createDynamicMapperBuilderContext();
final String fullFieldName = builderContext.buildFullName(fieldName);
final List<Mapper> mappers = context.getDynamicMappers(fullFieldName);
if (mappers == null
|| context.isFieldAppliedFromTemplate(fullFieldName)
|| context.isCopyToField(fullFieldName)
|| mappers.size() < MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING
|| mappers.size() > MAX_DIMS_COUNT
|| mappers.stream().allMatch(m -> m instanceof NumberFieldMapper && "float".equals(m.typeName())) == false) {
return;
}

DenseVectorFieldMapper.Builder builder = new DenseVectorFieldMapper.Builder(
fieldName,
context.indexSettings().getIndexVersionCreated()
);
DenseVectorFieldMapper denseVectorFieldMapper = builder.build(builderContext);
context.updateDynamicMappers(fullFieldName, List.of(denseVectorFieldMapper));
}
}

private static void throwEOFOnParseArray(String arrayFieldName, DocumentParserContext context) {
Expand Down Expand Up @@ -677,6 +709,7 @@ private static void parseCopyFields(DocumentParserContext context, List<String>
assert targetDoc != null;
final DocumentParserContext copyToContext = context.createCopyToContext(field, targetDoc);
innerParseObject(copyToContext);
context.markFieldAsCopyTo(field);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ protected void addDoc(LuceneDocument doc) {
private final MappingParserContext mappingParserContext;
private final SourceToParse sourceToParse;
private final Set<String> ignoredFields;
private final List<Mapper> dynamicMappers;
private final Map<String, List<Mapper>> dynamicMappers;
private final Set<String> newFieldsSeen;
private final Map<String, ObjectMapper> dynamicObjectMappers;
private final List<RuntimeField> dynamicRuntimeFields;
Expand All @@ -94,13 +94,15 @@ protected void addDoc(LuceneDocument doc) {
private String id;
private Field version;
private SeqNoFieldMapper.SequenceIDFields seqID;
private final Set<String> fieldsAppliedFromTemplates;
private final Set<String> copyToFields;

private DocumentParserContext(
MappingLookup mappingLookup,
MappingParserContext mappingParserContext,
SourceToParse sourceToParse,
Set<String> ignoreFields,
List<Mapper> dynamicMappers,
Map<String, List<Mapper>> dynamicMappers,
Set<String> newFieldsSeen,
Map<String, ObjectMapper> dynamicObjectMappers,
List<RuntimeField> dynamicRuntimeFields,
Expand All @@ -109,7 +111,9 @@ private DocumentParserContext(
SeqNoFieldMapper.SequenceIDFields seqID,
DocumentDimensions dimensions,
ObjectMapper parent,
ObjectMapper.Dynamic dynamic
ObjectMapper.Dynamic dynamic,
Set<String> fieldsAppliedFromTemplates,
Set<String> copyToFields
) {
this.mappingLookup = mappingLookup;
this.mappingParserContext = mappingParserContext;
Expand All @@ -125,6 +129,8 @@ private DocumentParserContext(
this.dimensions = dimensions;
this.parent = parent;
this.dynamic = dynamic;
this.fieldsAppliedFromTemplates = fieldsAppliedFromTemplates;
this.copyToFields = copyToFields;
}

private DocumentParserContext(ObjectMapper parent, ObjectMapper.Dynamic dynamic, DocumentParserContext in) {
Expand All @@ -142,7 +148,9 @@ private DocumentParserContext(ObjectMapper parent, ObjectMapper.Dynamic dynamic,
in.seqID,
in.dimensions,
parent,
dynamic
dynamic,
in.fieldsAppliedFromTemplates,
in.copyToFields
);
}

Expand All @@ -158,7 +166,7 @@ protected DocumentParserContext(
mappingParserContext,
source,
new HashSet<>(),
new ArrayList<>(),
new HashMap<>(),
new HashSet<>(),
new HashMap<>(),
new ArrayList<>(),
Expand All @@ -167,7 +175,9 @@ protected DocumentParserContext(
null,
DocumentDimensions.fromIndexSettings(mappingParserContext.getIndexSettings()),
parent,
dynamic
dynamic,
new HashSet<>(),
new HashSet<>()
);
}

Expand Down Expand Up @@ -275,6 +285,22 @@ public ObjectMapper.Dynamic dynamic() {
return dynamic;
}

public void markFieldAsAppliedFromTemplate(String fieldName) {
fieldsAppliedFromTemplates.add(fieldName);
}

public boolean isFieldAppliedFromTemplate(String name) {
return fieldsAppliedFromTemplates.contains(name);
}

public void markFieldAsCopyTo(String fieldName) {
copyToFields.add(fieldName);
}

public boolean isCopyToField(String name) {
return copyToFields.contains(name);
}

/**
* Add a new mapper dynamically created while parsing.
*/
Expand All @@ -283,6 +309,7 @@ public final void addDynamicMapper(Mapper mapper) {
if (mapper instanceof ObjectMapper) {
MappingLookup.checkObjectDepthLimit(indexSettings().getMappingDepthLimit(), mapper.name());
}

// eagerly check field name limit here to avoid OOM errors
// only check fields that are not already mapped or tracked in order to avoid hitting field limit too early via double-counting
// note that existing fields can also receive dynamic mapping updates (e.g. constant_keyword to fix the value)
Expand All @@ -302,23 +329,39 @@ public final void addDynamicMapper(Mapper mapper) {
addDynamicMapper(submapper);
}
}

// TODO we may want to stop adding object mappers to the dynamic mappers list: most times they will be mapped when parsing their
// sub-fields (see ObjectMapper.Builder#addDynamic), which causes extra work as the two variants of the same object field
// will be merged together when creating the final dynamic update. The only cases where object fields need extra treatment are
// dynamically mapped objects when the incoming document defines no sub-fields in them:
// 1) by default, they would be empty containers in the mappings, is it then important to map them?
// 2) they can be the result of applying a dynamic template which may define sub-fields or set dynamic, enabled or subobjects.
dynamicMappers.add(mapper);
dynamicMappers.computeIfAbsent(mapper.name(), k -> new ArrayList<>()).add(mapper);
}

/**
* Get dynamic mappers created as a result of parsing an incoming document. Responsible for exposing all the newly created
* fields that need to be merged into the existing mappings. Used to create the required mapping update at the end of document parsing.
* Consists of a flat set of {@link Mapper}s that will need to be added to their respective parent {@link ObjectMapper}s in order
* Consists of a all {@link Mapper}s that will need to be added to their respective parent {@link ObjectMapper}s in order
* to become part of the resulting dynamic mapping update.
*/
public final List<Mapper> getDynamicMappers() {
return dynamicMappers;
return dynamicMappers.values().stream().flatMap(List::stream).toList();
}

/**
* Returns the dynamic Consists of a flat set of {@link Mapper}s associated with a field name that will need to be added to their
* respective parent {@link ObjectMapper}s in order to become part of the resulting dynamic mapping update.
* @param fieldName Full field name with dot-notation.
* @return List of Mappers or null
*/
public final List<Mapper> getDynamicMappers(String fieldName) {
return dynamicMappers.get(fieldName);
}

public void updateDynamicMappers(String name, List<Mapper> mappers) {
dynamicMappers.remove(name);
mappers.forEach(this::addDynamicMapper);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,9 @@ private static void createDynamicField(
DateFormatter dateFormatter,
CheckedRunnable<IOException> dynamicFieldStrategy
) throws IOException {
if (applyMatchingTemplate(context, name, matchType, dateFormatter) == false) {
if (applyMatchingTemplate(context, name, matchType, dateFormatter)) {
context.markFieldAsAppliedFromTemplate(name);
} else {
dynamicFieldStrategy.run();
}
}
Expand Down

0 comments on commit 258d0cb

Please sign in to comment.