Permalink
Browse files

Set up basic scripted topic aggregation

For implementing NWBib topic search based on API 2.0

See hbz/nwbib#163
  • Loading branch information...
fsteeg committed Nov 24, 2017
1 parent a7b575f commit 72054407cd25371f58a28de0068a44ce8ada12bd
Showing with 44 additions and 8 deletions.
  1. +2 −0 web/app/controllers/resources/Application.java
  2. +42 −8 web/app/controllers/resources/Index.java
@@ -91,6 +91,8 @@
public static final String MEDIUM_FIELD = "medium.id";
/** The internal ES aggregation name for the owner facet. */
public static final String OWNER_AGGREGATION = "owner";
/** The internal ES aggregation name for the topics. */
public static final String TOPIC_AGGREGATION = "topic";
/** The internal ES field for subjects. */
public static final String SUBJECT_FIELD = "subject.componentList.id";
/** The internal ES field for contributing agents. */
@@ -5,35 +5,42 @@
import static controllers.resources.Application.MEDIUM_FIELD;
import static controllers.resources.Application.OWNER_AGGREGATION;
import static controllers.resources.Application.SUBJECT_FIELD;
import static controllers.resources.Application.TOPIC_AGGREGATION;
import static controllers.resources.Application.TYPE_FIELD;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.action.admin.cluster.storedscripts.PutStoredScriptResponse;
import org.elasticsearch.action.admin.indices.validate.query.ValidateQueryResponse;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.GeoDistanceQueryBuilder;
import org.elasticsearch.index.query.GeoPolygonQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.join.aggregations.ChildrenAggregationBuilder;
import org.elasticsearch.join.query.JoinQueryBuilders;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilder;
@@ -44,8 +51,10 @@
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import play.Logger;
import play.Play;
import play.cache.Cache;
import play.libs.Json;
@@ -103,9 +112,10 @@
/**
* The values supported for the `aggregations` query parameter.
*/
public static final List<String> SUPPORTED_AGGREGATIONS = Arrays.asList(
ISSUED_FIELD, SUBJECT_FIELD, TYPE_FIELD, MEDIUM_FIELD, OWNER_AGGREGATION,
AGENT_FIELD, SPATIAL_LABEL_FIELD, SPATIAL_GEO_FIELD, SUBJECT_ID_FIELD);
public static final List<String> SUPPORTED_AGGREGATIONS =
Arrays.asList(ISSUED_FIELD, SUBJECT_FIELD, TYPE_FIELD, MEDIUM_FIELD,
OWNER_AGGREGATION, AGENT_FIELD, SPATIAL_LABEL_FIELD,
SPATIAL_GEO_FIELD, SUBJECT_ID_FIELD, TOPIC_AGGREGATION);
/**
* @param q The current query string
@@ -116,8 +126,9 @@ public String buildQueryString(String q, String... values) {
String fullQuery = q.isEmpty() ? "*" : "(" + q + ")";
for (int i = 0; i < values.length; i++) {
String fieldValue = values[i];
String fieldName = fieldValue.contains("http")
? QUERY_FIELDS[i].replace(".label", ".id") : QUERY_FIELDS[i];
String fieldName =
fieldValue.contains("http") ? QUERY_FIELDS[i].replace(".label", ".id")
: QUERY_FIELDS[i];
if (fieldName.toLowerCase().endsWith("date")
&& fieldValue.matches("(\\d{1,4}|\\*)-(\\d{1,4}|\\*)")) {
String[] fromTo = fieldValue.split("-");
@@ -200,7 +211,7 @@ public Index queryResources(String q, int from, int size, String sort,
}
if (!aggregations.isEmpty()) {
requestBuilder =
withAggregations(requestBuilder, aggregations.split(","));
withAggregations(client, requestBuilder, aggregations.split(","));
}
SearchResponse response = requestBuilder.execute().actionGet();
SearchHits hits = response.getHits();
@@ -359,11 +370,11 @@ public long getTotal() {
return total;
}
private static SearchRequestBuilder withAggregations(
private static SearchRequestBuilder withAggregations(Client client,
final SearchRequestBuilder searchRequest, String... fields) {
int defaultSize = 100;
Arrays.asList(fields).forEach(field -> {
if (field.equals("owner")) {
if (field.equals(OWNER_AGGREGATION)) {
AggregationBuilder ownerAggregation =
new ChildrenAggregationBuilder(Application.OWNER_AGGREGATION,
TYPE_ITEM)
@@ -374,6 +385,16 @@ private static SearchRequestBuilder withAggregations(
searchRequest
.addAggregation(AggregationBuilders.geohashGrid(SPATIAL_GEO_FIELD)
.field(SPATIAL_GEO_FIELD).precision(9));
} else if (field.equals(TOPIC_AGGREGATION) && !Play.isTest()) {
String lang = "painless";
String id = "topic-aggregation";
// TODO: store script only once, on startup?
PutStoredScriptResponse response = storeScript(client, lang, id);
if (response.isAcknowledged()) {
searchRequest.addAggregation(
AggregationBuilders.terms(TOPIC_AGGREGATION).script(new Script(
ScriptType.STORED, lang, id, Collections.emptyMap())));
}
} else {
boolean many = field.equals("publication.startDate");
searchRequest.addAggregation(AggregationBuilders.terms(field)
@@ -383,6 +404,19 @@ private static SearchRequestBuilder withAggregations(
return searchRequest;
}
private static PutStoredScriptResponse storeScript(Client client, String lang,
String id) {
String script = // TODO: use subject.componentList.label.raw
"String chain = \"\"; for(String id : doc['subject.componentList.id']) {"
+ "chain += \" \" + id} return chain.trim();";
ObjectNode scriptObject = Json.newObject();
scriptObject.putObject("script").put("lang", lang).put("source", script);
Logger.debug("Will store script: {}", scriptObject);
return client.admin().cluster().preparePutStoredScript().setId(id)
.setContent(new BytesArray(scriptObject.toString()), XContentType.JSON)
.get();
}
<T> T withClient(Function<Client, T> function) {
if (elasticsearchClient != null) {
return function.apply(elasticsearchClient);

0 comments on commit 7205440

Please sign in to comment.