Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Term Vectors API: adds support for wildcards in selected fields
This could useful to generate all term vectors or a chosen set of them.

Closes #7061
  • Loading branch information
alexksikes authored and areek committed Sep 8, 2014
1 parent 0d4d5a9 commit 397f74b
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 6 deletions.
3 changes: 2 additions & 1 deletion docs/reference/docs/termvectors.asciidoc
Expand Up @@ -20,7 +20,8 @@ curl -XGET 'http://localhost:9200/twitter/tweet/1/_termvector?fields=text,...'
--------------------------------------------------

or by adding the requested fields in the request body (see
example below).
example below). Fields can also be specified with wildcards
in similar way to the <<query-dsl-multi-match-query,multi match query>> added[1.4.0].

[float]
=== Return values
Expand Down
Expand Up @@ -75,9 +75,7 @@ public List<? extends IndicesRequest> subRequests() {
return requests;
}

public void add(TermVectorRequest template, BytesReference data)
throws Exception {

public void add(TermVectorRequest template, BytesReference data) throws Exception {
XContentParser.Token token;
String currentFieldName = null;
if (data.length() > 0) {
Expand Down
Expand Up @@ -358,7 +358,6 @@ public static void parseRequest(TermVectorRequest termVectorRequest, XContentPar
currentFieldName = parser.currentName();
} else if (currentFieldName != null) {
if (currentFieldName.equals("fields")) {

if (token == XContentParser.Token.START_ARRAY) {
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
fields.add(parser.text());
Expand Down
Expand Up @@ -28,6 +28,7 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.uid.Versions;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.get.GetField;
Expand Down Expand Up @@ -71,8 +72,12 @@ public TermVectorResponse getTermVector(TermVectorRequest request) {
Fields topLevelFields = MultiFields.getFields(topLevelReader);
Versions.DocIdAndVersion docIdAndVersion = Versions.loadDocIdAndVersion(topLevelReader, uidTerm);
if (docIdAndVersion != null) {
Fields termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId);
/* handle potential wildcards in fields */
if (request.selectedFields() != null) {
handleFieldWildcards(request);
}
/* generate term vectors if not available */
Fields termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId);
if (request.selectedFields() != null) {
termVectorsByField = generateTermVectorsIfNeeded(termVectorsByField, request, uidTerm, false);
}
Expand All @@ -90,6 +95,14 @@ public TermVectorResponse getTermVector(TermVectorRequest request) {
return termVectorResponse;
}

private void handleFieldWildcards(TermVectorRequest request) {
Set<String> fieldNames = new HashSet<>();
for (String pattern : request.selectedFields()) {
fieldNames.addAll(indexShard.mapperService().simpleMatchToIndexNames(pattern));
}
request.selectedFields(fieldNames.toArray(Strings.EMPTY_ARRAY));
}

private Fields generateTermVectorsIfNeeded(Fields termVectorsByField, TermVectorRequest request, Term uidTerm, boolean realTime) throws IOException {
List<String> validFields = new ArrayList<>();
for (String field : request.selectedFields()) {
Expand Down Expand Up @@ -187,4 +200,5 @@ public int size() {
return fields.size();
}
}

}
Expand Up @@ -740,4 +740,31 @@ private void compareTermVectors(String fieldName, Fields fields0, Fields fields1
assertThat(iter0.next(), nullValue());
assertThat(iter1.next(), nullValue());
}

@Test
public void testSimpleWildCards() throws ElasticsearchException, IOException {
int numFields = 25;

XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties");
XContentBuilder source = XContentFactory.jsonBuilder().startObject();
for (int i = 0; i < numFields; i++) {
mapping.startObject("field" + i)
.field("type", "string")
.field("term_vector", randomBoolean() ? "yes" : "no")
.endObject();
source.field("field" + i, "some text here");
}
source.endObject();
mapping.endObject().endObject().endObject();

assertAcked(prepareCreate("test").addMapping("type1", mapping));
ensureGreen();

client().prepareIndex("test", "type1", "0").setSource(source).get();
refresh();

TermVectorResponse response = client().prepareTermVector("test", "type1", "0").setSelectedFields("field*").get();
assertThat("Doc doesn't exists but should", response.isExists(), equalTo(true));
assertThat("All term vectors should have been generated", response.getFields().size(), equalTo(numFields));
}
}

0 comments on commit 397f74b

Please sign in to comment.