diff --git a/docs/changelog/98996.yaml b/docs/changelog/98996.yaml new file mode 100644 index 0000000000000..1f1bdd35ff643 --- /dev/null +++ b/docs/changelog/98996.yaml @@ -0,0 +1,5 @@ +pr: 98996 +summary: Reintroduce `sparse_vector` mapping +area: Mapping +type: enhancement +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml new file mode 100644 index 0000000000000..8e88111ad45be --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml @@ -0,0 +1,143 @@ +--- +"Indexing and searching sparse vectors": + + - skip: + version: " - 8.10.99" + reason: "sparse_vector field type reintroduced in 8.11" + + - do: + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + + - match: { acknowledged: true } + + - do: + index: + index: test + id: "1" + body: + text: "running is good for you" + ml: + tokens: + running: 2.4097164 + good: 2.170997 + run: 2.052153 + race: 1.4575411 + for: 1.1908325 + runner: 1.1803857 + exercise: 1.1652642 + you: 0.9654308 + training: 0.94999343 + sports: 0.93650943 + fitness: 0.83129317 + best: 0.820365 + bad: 0.7385934 + health: 0.7098149 + marathon: 0.61555296 + gym: 0.5652374 + + - match: { result: "created" } + + - do: + index: + index: test + id: "2" + body: + text: "walking is a healthy exercise" + ml: + tokens: + walking: 2.4797723 + exercise: 2.074234 + healthy: 1.971596 + walk: 1.6458614 + health: 1.5291847 + walker: 1.4736869 + activity: 1.0793462 + good: 1.0597849 + fitness: 0.91855437 + training: 0.86342937 + movement: 0.7657065 + normal: 0.6694081 + foot: 0.5892523 + physical: 0.4926789 + + - match: { result: "created" } + + - do: + indices.refresh: { } + + - do: + search: + index: test + body: + query: + bool: + should: + - term: + ml.tokens: + value: "walk" + boost: 1.9790847 + - term: + ml.tokens: + value: "walking" + boost: 1.7092685 + - term: + ml.tokens: + value: "exercise" + boost: 0.84076905 + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "1" } + +--- +"Sparse vector in 7.x": + - skip: + features: allowed_warnings + version: "8.0.0 - " + reason: "sparse_vector field type supported in 7.x" + - do: + allowed_warnings: + - "The [sparse_vector] field type is deprecated and will be removed in 8.0." + - "[sparse_vector] field type in old 7.x indices is allowed to contain [sparse_vector] fields, but they cannot be indexed or searched." + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + + - match: { acknowledged: true } + +--- +"Sparse vector in 8.x": + - skip: + version: " - 7.99.99, 8.11.0 - " + reason: "sparse_vector field type not supported in 8.x until 8.11.0" + - do: + catch: /The \[sparse_vector\] field type is no longer supported/ + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersion.java b/server/src/main/java/org/elasticsearch/index/IndexVersion.java index bf497712b712e..34f415e46462a 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersion.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersion.java @@ -122,6 +122,7 @@ private static IndexVersion registerIndexVersion(int id, Version luceneVersion, * Detached index versions added below here. */ public static final IndexVersion V_8_500_000 = registerIndexVersion(8_500_000, Version.LUCENE_9_7_0, "bf656f5e-5808-4eee-bf8a-e2bf6736ff55"); + public static final IndexVersion V_8_500_001 = registerIndexVersion(8_500_001, Version.LUCENE_9_7_0, "45045a5a-fc57-4462-89f6-6bc04cda6015"); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ @@ -149,7 +150,7 @@ private static IndexVersion registerIndexVersion(int id, Version luceneVersion, */ private static class CurrentHolder { - private static final IndexVersion CURRENT = findCurrent(V_8_500_000); + private static final IndexVersion CURRENT = findCurrent(V_8_500_001); // finds the pluggable current version, or uses the given fallback private static IndexVersion findCurrent(IndexVersion fallback) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index ba45a700eebb5..082c2d898e637 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -8,41 +8,48 @@ package org.elasticsearch.index.mapper.vectors; +import org.apache.lucene.document.FeatureField; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.logging.DeprecationCategory; -import org.elasticsearch.common.logging.DeprecationLogger; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.fielddata.FieldDataContext; +import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.mapper.DocumentParserContext; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperBuilderContext; +import org.elasticsearch.index.mapper.SourceValueFetcher; import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.index.mapper.ValueFetcher; import org.elasticsearch.index.query.SearchExecutionContext; -import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.xcontent.XContentParser.Token; -import java.time.ZoneId; +import java.io.IOException; import java.util.Map; +import static org.elasticsearch.index.query.AbstractQueryBuilder.DEFAULT_BOOST; + /** - * A {@link FieldMapper} for indexing a sparse vector of floats. - * - * @deprecated The sparse_vector type was deprecated in 7.x and removed in 8.0. This mapper - * definition only exists so that 7.x indices can be read without error. - * - * TODO: remove in 9.0. + * A {@link FieldMapper} that exposes Lucene's {@link FeatureField} as a sparse + * vector of features. */ -@Deprecated public class SparseVectorFieldMapper extends FieldMapper { - private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(SparseVectorFieldMapper.class); - static final String ERROR_MESSAGE = "The [sparse_vector] field type is no longer supported."; - static final String ERROR_MESSAGE_7X = "The [sparse_vector] field type is no longer supported. Old 7.x indices are allowed to " - + "contain [sparse_vector] fields, but they cannot be indexed or searched."; + public static final String CONTENT_TYPE = "sparse_vector"; + static final String ERROR_MESSAGE_7X = "[sparse_vector] field type in old 7.x indices is allowed to " + + "contain [sparse_vector] fields, but they cannot be indexed or searched."; + static final String ERROR_MESSAGE_8X = "The [sparse_vector] field type is not supported from 8.0 to 8.10 versions."; + static final IndexVersion PREVIOUS_SPARSE_VECTOR_INDEX_VERSION = IndexVersion.V_8_0_0; + + static final IndexVersion NEW_SPARSE_VECTOR_INDEX_VERSION = IndexVersion.V_8_500_001; + public static class Builder extends FieldMapper.Builder { - final Parameter> meta = Parameter.metaParam(); + private final Parameter> meta = Parameter.metaParam(); public Builder(String name) { super(name); @@ -65,18 +72,19 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) { } public static final TypeParser PARSER = new TypeParser((n, c) -> { - if (c.indexVersionCreated().onOrAfter(IndexVersion.V_8_0_0)) { - throw new IllegalArgumentException(ERROR_MESSAGE); - } else { + if (c.indexVersionCreated().before(PREVIOUS_SPARSE_VECTOR_INDEX_VERSION)) { deprecationLogger.warn(DeprecationCategory.MAPPINGS, "sparse_vector", ERROR_MESSAGE_7X); - return new Builder(n); + } else if (c.indexVersionCreated().before(NEW_SPARSE_VECTOR_INDEX_VERSION)) { + throw new IllegalArgumentException(ERROR_MESSAGE_8X); } - }); + + return new Builder(n); + }, notInMultiFields(CONTENT_TYPE)); public static final class SparseVectorFieldType extends MappedFieldType { public SparseVectorFieldType(String name, Map meta) { - super(name, false, false, false, TextSearchInfo.NONE, meta); + super(name, true, false, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); } @Override @@ -85,28 +93,45 @@ public String typeName() { } @Override - public DocValueFormat docValueFormat(String format, ZoneId timeZone) { - throw new UnsupportedOperationException(ERROR_MESSAGE_7X); + public Query existsQuery(SearchExecutionContext context) { + throw new IllegalArgumentException("[sparse_vector] fields do not support [exists] queries"); } @Override - public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - throw new UnsupportedOperationException(ERROR_MESSAGE_7X); + public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) { + throw new IllegalArgumentException("[sparse_vector] fields do not support sorting, scripting or aggregating"); } @Override - public Query existsQuery(SearchExecutionContext context) { - throw new UnsupportedOperationException(ERROR_MESSAGE_7X); + public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { + return SourceValueFetcher.identity(name(), context, format); } @Override public Query termQuery(Object value, SearchExecutionContext context) { - throw new UnsupportedOperationException(ERROR_MESSAGE_7X); + return FeatureField.newLinearQuery(name(), indexedValueForSearch(value), DEFAULT_BOOST); + } + + private static String indexedValueForSearch(Object value) { + if (value instanceof BytesRef) { + return ((BytesRef) value).utf8ToString(); + } + return value.toString(); } } private SparseVectorFieldMapper(String simpleName, MappedFieldType mappedFieldType, MultiFields multiFields, CopyTo copyTo) { - super(simpleName, mappedFieldType, multiFields, copyTo); + super(simpleName, mappedFieldType, multiFields, copyTo, false, null); + } + + @Override + public Map indexAnalyzers() { + return Map.of(mappedFieldType.name(), Lucene.KEYWORD_ANALYZER); + } + + @Override + public FieldMapper.Builder getMergeBuilder() { + return new Builder(simpleName()).init(this); } @Override @@ -115,13 +140,67 @@ public SparseVectorFieldType fieldType() { } @Override - public void parse(DocumentParserContext context) { - throw new UnsupportedOperationException(ERROR_MESSAGE_7X); + protected boolean supportsParsingObject() { + return true; + } + + @Override + public void parse(DocumentParserContext context) throws IOException { + + // No support for indexing / searching 7.x sparse_vector field types + if (context.indexSettings().getIndexVersionCreated().before(PREVIOUS_SPARSE_VECTOR_INDEX_VERSION)) { + throw new UnsupportedOperationException(ERROR_MESSAGE_7X); + } else if (context.indexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR_INDEX_VERSION)) { + throw new UnsupportedOperationException(ERROR_MESSAGE_8X); + } + + if (context.parser().currentToken() != Token.START_OBJECT) { + throw new IllegalArgumentException( + "[sparse_vector] fields must be json objects, expected a START_OBJECT but got: " + context.parser().currentToken() + ); + } + + String feature = null; + try { + // make sure that we don't expand dots in field names while parsing + context.path().setWithinLeafObject(true); + for (Token token = context.parser().nextToken(); token != Token.END_OBJECT; token = context.parser().nextToken()) { + if (token == Token.FIELD_NAME) { + feature = context.parser().currentName(); + if (feature.contains(".")) { + throw new IllegalArgumentException( + "[sparse_vector] fields do not support dots in feature names but found [" + feature + "]" + ); + } + } else if (token == Token.VALUE_NULL) { + // ignore feature, this is consistent with numeric fields + } else if (token == Token.VALUE_NUMBER || token == Token.VALUE_STRING) { + final String key = name() + "." + feature; + float value = context.parser().floatValue(true); + if (context.doc().getByKey(key) != null) { + throw new IllegalArgumentException( + "[sparse_vector] fields do not support indexing multiple values for the same feature [" + + key + + "] in the same document" + ); + } + context.doc().addWithKey(key, new FeatureField(name(), feature, value)); + } else { + throw new IllegalArgumentException( + "[sparse_vector] fields take hashes that map a feature to a strictly positive " + + "float, but got unexpected token " + + token + ); + } + } + } finally { + context.path().setWithinLeafObject(false); + } } @Override protected void parseCreateField(DocumentParserContext context) { - throw new IllegalStateException("parse is implemented directly"); + throw new AssertionError("parse is implemented directly"); } @Override @@ -129,8 +208,4 @@ protected String contentType() { return CONTENT_TYPE; } - @Override - public FieldMapper.Builder getMergeBuilder() { - return new Builder(simpleName()).init(this); - } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index 9ad906c31c74a..9ea63325ef3ad 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -8,93 +8,218 @@ package org.elasticsearch.index.mapper.vectors; -import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute; +import org.apache.lucene.document.FeatureField; +import org.apache.lucene.index.IndexableField; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.compress.CompressedXContent; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperTestCase; +import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceToParse; -import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.index.IndexVersionUtils; +import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentType; +import org.hamcrest.Matchers; +import org.junit.AssumptionViolatedException; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.NEW_SPARSE_VECTOR_INDEX_VERSION; +import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.PREVIOUS_SPARSE_VECTOR_INDEX_VERSION; import static org.hamcrest.Matchers.containsString; -public class SparseVectorFieldMapperTests extends ESSingleNodeTestCase { +public class SparseVectorFieldMapperTests extends MapperTestCase { + + @Override + protected Object getSampleValueForDocument() { + return Map.of("ten", 10, "twenty", 20); + } + + @Override + protected Object getSampleObjectForDocument() { + return getSampleValueForDocument(); + } + + @Override + protected void assertExistsQuery(MapperService mapperService) { + IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> super.assertExistsQuery(mapperService)); + assertEquals("[sparse_vector] fields do not support [exists] queries", iae.getMessage()); + } - // this allows to set indexVersion as it is a private setting @Override - protected boolean forbidPrivateIndexSettings() { + protected void minimalMapping(XContentBuilder b) throws IOException { + b.field("type", "sparse_vector"); + } + + @Override + protected boolean supportsStoredFields() { return false; } - public void testValueFetcherIsNotSupported() { - SparseVectorFieldMapper.Builder builder = new SparseVectorFieldMapper.Builder("field"); - MappedFieldType fieldMapper = builder.build(MapperBuilderContext.root(false)).fieldType(); - UnsupportedOperationException exc = expectThrows(UnsupportedOperationException.class, () -> fieldMapper.valueFetcher(null, null)); - assertEquals(SparseVectorFieldMapper.ERROR_MESSAGE_7X, exc.getMessage()); + @Override + protected boolean supportsIgnoreMalformed() { + return false; } - public void testSparseVectorWith8xIndex() throws Exception { - IndexVersion version = IndexVersionUtils.randomVersionBetween(random(), IndexVersion.V_8_0_0, IndexVersion.current()); - Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).build(); + @Override + protected void registerParameters(ParameterChecker checker) throws IOException {} - IndexService indexService = createIndex("index", settings); - MapperService mapperService = indexService.mapperService(); + @Override + protected boolean supportsMeta() { + return false; + } - BytesReference mapping = BytesReference.bytes( - XContentFactory.jsonBuilder() - .startObject() - .startObject("_doc") - .startObject("properties") - .startObject("my-vector") - .field("type", "sparse_vector") - .endObject() - .endObject() - .endObject() - .endObject() - ); + private static int getFrequency(TokenStream tk) throws IOException { + TermFrequencyAttribute freqAttribute = tk.addAttribute(TermFrequencyAttribute.class); + tk.reset(); + assertTrue(tk.incrementToken()); + int freq = freqAttribute.getTermFrequency(); + assertFalse(tk.incrementToken()); + return freq; + } - MapperParsingException e = expectThrows( - MapperParsingException.class, - () -> mapperService.parseMapping(MapperService.SINGLE_MAPPING_NAME, new CompressedXContent(mapping)) - ); - assertThat(e.getMessage(), containsString(SparseVectorFieldMapper.ERROR_MESSAGE)); + public void testDefaults() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString()); + + ParsedDocument doc1 = mapper.parse(source(this::writeField)); + + List fields = doc1.rootDoc().getFields("field"); + assertEquals(2, fields.size()); + assertThat(fields.get(0), Matchers.instanceOf(FeatureField.class)); + FeatureField featureField1 = null; + FeatureField featureField2 = null; + for (IndexableField field : fields) { + if (field.stringValue().equals("ten")) { + featureField1 = (FeatureField) field; + } else if (field.stringValue().equals("twenty")) { + featureField2 = (FeatureField) field; + } else { + throw new UnsupportedOperationException(); + } + } + + int freq1 = getFrequency(featureField1.tokenStream(null, null)); + int freq2 = getFrequency(featureField2.tokenStream(null, null)); + assertTrue(freq1 < freq2); } - public void testSparseVectorWith7xIndex() throws Exception { - IndexVersion version = IndexVersionUtils.randomPreviousCompatibleVersion(random(), IndexVersion.V_8_0_0); - Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).build(); + public void testDotInFieldName() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + DocumentParsingException ex = expectThrows( + DocumentParsingException.class, + () -> mapper.parse(source(b -> b.field("field", Map.of("politi.cs", 10, "sports", 20)))) + ); + assertThat(ex.getCause().getMessage(), containsString("do not support dots in feature names")); + assertThat(ex.getCause().getMessage(), containsString("politi.cs")); + } - IndexService indexService = createIndex("index", settings); - MapperService mapperService = indexService.mapperService(); + public void testRejectMultiValuedFields() throws MapperParsingException, IOException { + DocumentMapper mapper = createDocumentMapper(mapping(b -> { + b.startObject("field").field("type", "sparse_vector").endObject(); + b.startObject("foo").startObject("properties"); + { + b.startObject("field").field("type", "sparse_vector").endObject(); + } + b.endObject().endObject(); + })); - BytesReference mapping = BytesReference.bytes( - XContentFactory.jsonBuilder() - .startObject() - .startObject("_doc") - .startObject("properties") - .startObject("my-vector") - .field("type", "sparse_vector") - .endObject() - .endObject() - .endObject() - .endObject() + DocumentParsingException e = expectThrows( + DocumentParsingException.class, + () -> mapper.parse(source(b -> b.startObject("field").field("foo", Arrays.asList(10, 20)).endObject())) + ); + assertEquals( + "[sparse_vector] fields take hashes that map a feature to a strictly positive float, but got unexpected token " + "START_ARRAY", + e.getCause().getMessage() ); - DocumentMapper mapper = mapperService.merge( - MapperService.SINGLE_MAPPING_NAME, - new CompressedXContent(mapping), - MapperService.MergeReason.MAPPING_UPDATE + e = expectThrows(DocumentParsingException.class, () -> mapper.parse(source(b -> { + b.startArray("foo"); + { + b.startObject().startObject("field").field("bar", 10).endObject().endObject(); + b.startObject().startObject("field").field("bar", 20).endObject().endObject(); + } + b.endArray(); + }))); + assertEquals( + "[sparse_vector] fields do not support indexing multiple values for the same feature [foo.field.bar] in " + "the same document", + e.getCause().getMessage() ); + } + + public void testCannotBeUsedInMultiFields() { + Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "keyword"); + b.startObject("fields"); + b.startObject("feature"); + b.field("type", "sparse_vector"); + b.endObject(); + b.endObject(); + }))); + assertThat(e.getMessage(), containsString("Field [feature] of type [sparse_vector] can't be used in multifields")); + } + + @Override + protected Object generateRandomInputValue(MappedFieldType ft) { + assumeFalse("Test implemented in a follow up", true); + return null; + } + + @Override + protected boolean allowsNullValues() { + return false; // TODO should this allow null values? + } + + @Override + protected SyntheticSourceSupport syntheticSourceSupport(boolean syntheticSource) { + throw new AssumptionViolatedException("not supported"); + } + + @Override + protected IngestScriptSupport ingestScriptSupport() { + throw new AssumptionViolatedException("not supported"); + } + + @Override + protected String[] getParseMinimalWarnings(IndexVersion indexVersion) { + String[] additionalWarnings = null; + if (indexVersion.before(PREVIOUS_SPARSE_VECTOR_INDEX_VERSION)) { + additionalWarnings = new String[] { SparseVectorFieldMapper.ERROR_MESSAGE_7X }; + } + return Strings.concatStringArrays(super.getParseMinimalWarnings(indexVersion), additionalWarnings); + } + + @Override + protected IndexVersion boostNotAllowedIndexVersion() { + return NEW_SPARSE_VECTOR_INDEX_VERSION; + } + + public void testSparseVectorWith7xIndex() throws Exception { + IndexVersion version = IndexVersionUtils.randomPreviousCompatibleVersion(random(), PREVIOUS_SPARSE_VECTOR_INDEX_VERSION); + + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("my-vector") + .field("type", "sparse_vector") + .endObject() + .endObject() + .endObject() + .endObject(); + + DocumentMapper mapper = createDocumentMapper(version, builder); assertWarnings(SparseVectorFieldMapper.ERROR_MESSAGE_7X); // Check that new vectors cannot be indexed. @@ -115,6 +240,18 @@ public void testSparseVectorWith7xIndex() throws Exception { DocumentParsingException.class, () -> mapper.parse(new SourceToParse("id", source, XContentType.JSON)) ); - assertThat(indexException.getCause().getMessage(), containsString(SparseVectorFieldMapper.ERROR_MESSAGE)); + assertThat(indexException.getCause().getMessage(), containsString(SparseVectorFieldMapper.ERROR_MESSAGE_7X)); + } + + public void testSparseVectorUnsupportedIndex() throws Exception { + IndexVersion version = IndexVersionUtils.randomVersionBetween( + random(), + PREVIOUS_SPARSE_VECTOR_INDEX_VERSION, + IndexVersion.V_8_500_000 + ); + Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(version, fieldMapping(b -> { + b.field("type", "sparse_vector"); + }))); + assertThat(e.getMessage(), containsString(SparseVectorFieldMapper.ERROR_MESSAGE_8X)); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java index 574fb63cd3fb0..1575d71110c42 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java @@ -26,22 +26,4 @@ public void testIsNotAggregatable() { MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType("field", Collections.emptyMap()); assertFalse(fieldType.isAggregatable()); } - - public void testDocValueFormatIsNotSupported() { - MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType("field", Collections.emptyMap()); - UnsupportedOperationException exc = expectThrows(UnsupportedOperationException.class, () -> fieldType.docValueFormat(null, null)); - assertEquals(SparseVectorFieldMapper.ERROR_MESSAGE_7X, exc.getMessage()); - } - - public void testExistsQueryIsNotSupported() { - MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType("field", Collections.emptyMap()); - UnsupportedOperationException exc = expectThrows(UnsupportedOperationException.class, () -> fieldType.existsQuery(null)); - assertEquals(SparseVectorFieldMapper.ERROR_MESSAGE_7X, exc.getMessage()); - } - - public void testTermQueryIsNotSupported() { - MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType("field", Collections.emptyMap()); - UnsupportedOperationException exc = expectThrows(UnsupportedOperationException.class, () -> fieldType.termQuery(null, null)); - assertEquals(SparseVectorFieldMapper.ERROR_MESSAGE_7X, exc.getMessage()); - } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index c45e08c857f48..bc58a792cefc6 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -441,6 +441,10 @@ protected String[] getParseMinimalWarnings() { return Strings.EMPTY_ARRAY; } + protected String[] getParseMinimalWarnings(IndexVersion indexVersion) { + return getParseMinimalWarnings(); + } + protected String[] getParseMaximalWarnings() { // Most mappers don't emit any warnings return Strings.EMPTY_ARRAY; @@ -494,24 +498,26 @@ public final void testMeta() throws IOException { ); } - public final void testDeprecatedBoost() throws IOException { + public final void testDeprecatedBoostWarning() throws IOException { try { createMapperService(DEPRECATED_BOOST_INDEX_VERSION, fieldMapping(b -> { minimalMapping(b, DEPRECATED_BOOST_INDEX_VERSION); b.field("boost", 2.0); })); String[] warnings = Strings.concatStringArrays( - getParseMinimalWarnings(), + getParseMinimalWarnings(DEPRECATED_BOOST_INDEX_VERSION), new String[] { "Parameter [boost] on field [field] is deprecated and has no effect" } ); assertWarnings(warnings); } catch (MapperParsingException e) { assertThat(e.getMessage(), anyOf(containsString("Unknown parameter [boost]"), containsString("[boost : 2.0]"))); } + } + public void testBoostNotAllowed() throws IOException { MapperParsingException e = expectThrows( MapperParsingException.class, - () -> createMapperService(IndexVersion.V_8_0_0, fieldMapping(b -> { + () -> createMapperService(boostNotAllowedIndexVersion(), fieldMapping(b -> { minimalMapping(b); b.field("boost", 2.0); })) @@ -521,6 +527,10 @@ public final void testDeprecatedBoost() throws IOException { assertParseMinimalWarnings(); } + protected IndexVersion boostNotAllowedIndexVersion() { + return IndexVersion.V_8_0_0; + } + /** * Use a {@linkplain ValueFetcher} to extract values from doc values. */