Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge branch '91-bulk' of https://github.com/hbz/lobid-gnd
  • Loading branch information
fsteeg authored and sol committed Jun 27, 2018
2 parents 2f48785 + 0e2dd40 commit 1ee4a6d
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 64 deletions.
20 changes: 11 additions & 9 deletions app/controllers/Accept.java
Expand Up @@ -19,7 +19,8 @@ private Accept() {
}

enum Format {
JSON_LD("json(.+)?", "application/json", "application/ld+json"), //
JSON_LINES("jsonl", "application/x-jsonlines"), //
JSON_LD("json(:.+)?", "application/json", "application/ld+json"), //
HTML("html", "text/html"), //
RDF_XML("rdf", "application/rdf+xml", "application/xml", "text/xml"), //
N_TRIPLE("nt", "application/n-triples", "text/plain"), //
Expand All @@ -41,16 +42,17 @@ private Format(String format, String... types) {
* The accepted types
* @return The selected format for the given parameter and types
*/
public static String formatFor(String formatParam, Collection<MediaRange> acceptedTypes) {
public static Format formatFor(String formatParam, Collection<MediaRange> acceptedTypes) {
for (Format format : Format.values())
if (formatParam != null && formatParam.matches(format.queryParamString))
return formatParam;
for (MediaRange mediaRange : acceptedTypes)
for (Format format : Format.values())
for (String mimeType : format.types)
if (mediaRange.accepts(mimeType))
return format.queryParamString;
return Format.JSON_LD.queryParamString;
return format;
if (formatParam == null || formatParam.isEmpty())
for (MediaRange mediaRange : acceptedTypes)
for (Format format : Format.values())
for (String mimeType : format.types)
if (mediaRange.accepts(mimeType))
return format;
return (formatParam == null || formatParam.isEmpty()) && acceptedTypes.isEmpty() ? Format.JSON_LD : null;
}

}
127 changes: 94 additions & 33 deletions app/controllers/HomeController.java
Expand Up @@ -14,6 +14,7 @@
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
Expand All @@ -30,11 +31,18 @@
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.riot.RiotNotFoundException;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.SortOrder;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
Expand All @@ -46,7 +54,10 @@
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigObject;

import akka.stream.javadsl.Source;
import akka.util.ByteString;
import apps.Convert;
import controllers.Accept.Format;
import models.AuthorityResource;
import models.GndOntology;
import models.RdfConverter;
Expand Down Expand Up @@ -144,7 +155,6 @@ public Result authorityDotFormat(final String id, String format) {
}

public Result authority(String id, String format) {
String responseFormat = Accept.formatFor(format, request().acceptedTypes());
SearchHits hits = index
.query(String.format("deprecatedUri:\"%s%s\"", AuthorityResource.DNB_PREFIX, id), "", 0, 1).getHits();
if (hits.getTotalHits() > 0 && !hits.getAt(0).getId().equals(id)) {
Expand All @@ -154,17 +164,28 @@ public Result authority(String id, String format) {
if (jsonLd == null) {
return notFound("Not found: " + id);
}
Format responseFormat = Accept.formatFor(format, request().acceptedTypes());
if (responseFormat == null || responseFormat == Accept.Format.JSON_LINES
|| format != null && format.contains(":")) {
return unsupportedMediaType(String.format("Unsupported for single resource: format=%s, accept=%s", format,
request().acceptedTypes()));
}
try {
JsonNode json = Json.parse(jsonLd);
if (responseFormat.equals("html")) {
AuthorityResource entity = new AuthorityResource(json);
switch (responseFormat) {
case HTML: {
AuthorityResource entity = new AuthorityResource(Json.parse(jsonLd));
if (entity.getImage().url.contains("File:"))
entity.imageAttribution = attribution(entity.getImage().url
.substring(entity.getImage().url.indexOf("File:") + 5).split("\\?")[0]);
entity.creatorOf = creatorOf(id);
return ok(views.html.details.render(entity));
}
return responseFor(json, responseFormat);
default: {
return rdfResultFor(Json.parse(jsonLd), responseFormat.queryParamString).orElseGet(() -> {
return result(jsonLd, Accept.Format.JSON_LD.types[0]);
});
}
}
} catch (Exception e) {
Logger.error("Could not create response", e);
return internalServerError(e.getMessage());
Expand All @@ -188,30 +209,19 @@ private String getAuthorityResource(String id) {
return response.getSourceAsString();
}

private Result responseFor(JsonNode responseJson, String responseFormat) throws JsonProcessingException {
String content = "";
String contentType = "";
switch (responseFormat) {
case "rdf": {
content = RdfConverter.toRdf(responseJson.toString(), RdfFormat.RDF_XML);
contentType = Accept.Format.RDF_XML.types[0];
break;
}
case "ttl": {
content = RdfConverter.toRdf(responseJson.toString(), RdfFormat.TURTLE);
contentType = Accept.Format.TURTLE.types[0];
break;
}
case "nt": {
content = RdfConverter.toRdf(responseJson.toString(), RdfFormat.N_TRIPLE);
contentType = Accept.Format.N_TRIPLE.types[0];
break;
}
default: {
content = new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(responseJson);
contentType = Accept.Format.JSON_LD.types[0];
}
private Optional<Result> rdfResultFor(JsonNode responseJson, String requestedFormat) {
for (Format f : Format.values()) {
RdfFormat rdfFormat;
if (f.queryParamString.equals(requestedFormat) && (rdfFormat = RdfFormat.of(f.queryParamString)) != null) {
String rdfContent = RdfConverter.toRdf(responseJson.toString(), rdfFormat);
String contentType = f.types[0];
return Optional.of(result(rdfContent, contentType));
}
}
return Optional.empty();
}

private Result result(String content, String contentType) {
return content.isEmpty() ? internalServerError("No content") : ok(content).as(contentType + "; charset=utf-8");
}

Expand Down Expand Up @@ -244,18 +254,69 @@ public Result gnd(String id) {
}

public Result search(String q, String filter, int from, int size, String format) {
String responseFormat = Accept.formatFor(format, request().acceptedTypes());
SearchResponse response = index.query(q.isEmpty() ? "*" : q, filter, from, size);
Format responseFormat = Accept.formatFor(format, request().acceptedTypes());
if (responseFormat == null || Stream.of(RdfFormat.values()).map(RdfFormat::getParam)
.anyMatch(f -> f.equals(responseFormat.queryParamString))) {
return unsupportedMediaType(
String.format("Unsupported for search: format=%s, accept=%s", format, request().acceptedTypes()));
}
String queryString = (q == null || q.isEmpty()) ? "*" : q;
SearchResponse response = index.query(queryString, filter, from, size);
response().setHeader("Access-Control-Allow-Origin", "*");
String[] formatAndConfig = responseFormat.split(":");
String[] formatAndConfig = format == null ? new String[] {} : format.split(":");
boolean returnSuggestions = formatAndConfig.length == 2;
if (returnSuggestions) {
List<Map<String, Object>> hits = Arrays.asList(response.getHits().getHits()).stream()
.map(hit -> hit.getSource()).collect(Collectors.toList());
return withCallback(toSuggestions(Json.toJson(hits), formatAndConfig[1]));
}
return responseFormat.equals("html") ? htmlSearch(q, filter, from, size, responseFormat, response)
: ok(returnAsJson(q, response)).as(config("index.content"));
switch (responseFormat) {
case HTML: {
return htmlSearch(q, filter, from, size, responseFormat.queryParamString, response);
}
case JSON_LINES: {
response().setHeader("Content-Disposition",
String.format("attachment; filename=\"lobid-gnd-bulk-%s.jsonl\"", System.currentTimeMillis()));
return jsonLines(queryString, filter, response);
}
default: {
return ok(returnAsJson(q, response)).as(config("index.content"));
}
}
}

private Result jsonLines(String q, String filter, SearchResponse response) {
BoolQueryBuilder query = QueryBuilders.boolQuery().must(QueryBuilders.queryStringQuery(q));
if (!filter.isEmpty()) {
query = query.filter(QueryBuilders.queryStringQuery(filter));
}
TimeValue keepAlive = new TimeValue(60000);
SearchRequestBuilder scrollRequest = index.client().prepareSearch(config("index.name"))
.addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC).setScroll(keepAlive).setQuery(query)
.setSize(100 /* hits per shard for each scroll */);
Logger.debug("Scrolling with query: q={}, request={}", q, scrollRequest);
Source<ByteString, ?> source = Source.from(() -> hitIterator(scrollRequest.get(), keepAlive));
return ok().chunked(source).as(Accept.Format.JSON_LINES.types[0]);
}

private Iterator<ByteString> hitIterator(SearchResponse scrollResponse, TimeValue keepAlive) {
return new Iterator<ByteString>() {
Iterator<SearchHit> iterator = scrollResponse.getHits().iterator();

@Override
public boolean hasNext() {
if (!iterator.hasNext()) {
iterator = index.client().prepareSearchScroll(scrollResponse.getScrollId())//
.setScroll(keepAlive).execute().actionGet().getHits().iterator();
}
return iterator.hasNext();
}

@Override
public ByteString next() {
return ByteString.fromString(iterator.next().getSourceAsString() + "\n");
}
};
}

private Result htmlSearch(String q, String type, int from, int size, String format, SearchResponse response) {
Expand Down
23 changes: 19 additions & 4 deletions app/models/RdfConverter.java
Expand Up @@ -27,19 +27,34 @@ public class RdfConverter {
*/
@SuppressWarnings("javadoc")
public static enum RdfFormat {
RDF_XML("RDF/XML"), //
N_TRIPLE("N-TRIPLE"), //
TURTLE("TURTLE");
RDF_XML("rdf", "RDF/XML"), //
N_TRIPLE("nt", "N-TRIPLE"), //
TURTLE("ttl", "TURTLE");

private final String name;
private String queryParamString;

RdfFormat(final String name) {
RdfFormat(final String param, final String name) {
this.queryParamString = param;
this.name = name;
}

public String getName() {
return name;
}

public String getParam() {
return queryParamString;
}

public static RdfFormat of(String format) {
for (RdfFormat f : values()) {
if (f.queryParamString.equals(format)) {
return f;
}
}
return null;
}
}

/**
Expand Down
9 changes: 6 additions & 3 deletions app/views/api.scala.html
Expand Up @@ -51,12 +51,15 @@ <h2 id='content_types'>Inhaltstypen <small><a href='#content_types'><span class=

<p>Standardmäßig liefert dieser Dienst strukturierte API-Antworten (als JSON):</p>
<p><code>curl http://test.lobid.org@routes.HomeController.authority("4074335-4")</code></p>
<p>Er unterstützt Content-Negotiation über den Accept-Header für JSON (application/json) oder HTML (text/html):</p>
<p><code>curl --header "Accept: application/json" http://test.lobid.org@routes.HomeController.search("london")</code></p>
<p>Der Query-Parameter "format" (Werte: html,json,bulk) kann verwendet werden, um den Accept-Header aufzuheben, z.B. zur Anzeige von JSON im Browser:</p>
<p>Er unterstützt Content-Negotiation über den Accept-Header für JSON (application/json), JSON lines (application/x-jsonlines) oder HTML (text/html):</p>
<p><code>curl --header "Accept: application/json" http://lobid.org@routes.HomeController.search("london")</code></p>
<p><code>curl --header "Accept: application/x-jsonlines" "http://lobid.org@java.net.URLDecoder.decode(routes.HomeController.search("type:Country").toString)" > countries.jsonl</code></p>
<p>Der Query-Parameter "format" (Werte: html,json,jsonl) kann verwendet werden, um den Accept-Header aufzuheben, z.B. zur Anzeige von JSON im Browser:</p>
<p><a href='@routes.HomeController.authority("4074335-4", format="json")'>@routes.HomeController.authority("4074335-4", format="json")</a></p>
<p>Der Wert des Format-Parameters kann für Einzeltreffer auch in URLs als Dateiendung verwendet werden:</p>
<p><a href='@routes.HomeController.authorityDotFormat("4074335-4", format="json")'>@routes.HomeController.authorityDotFormat("4074335-4", format="json")</a></p>
<p>Für größere Anfragen kann die Antwort als gzip komprimiert werden:</p>
<p><code>curl --header "Accept-Encoding: gzip" "http://lobid.org@java.net.URLDecoder.decode(routes.HomeController.search("type:Company", format="jsonl").toString)" > companies.gz</code></p>
<p>Einzeltreffer unterstützen zudem verschiedene RDF-Serialisierungen (siehe <a href='#jsonld'>Abschnitt zu JSON-LD</a>).</p>

<h2 id="auto-complete">Autovervollständigung <small><a href='#auto-complete'><span class='glyphicon glyphicon-link'></span></a></small></h2>
Expand Down
12 changes: 9 additions & 3 deletions test/controllers/AcceptIntegrationTest.java
Expand Up @@ -41,8 +41,13 @@ public static Collection<Object[]> data() {
{ fakeRequest(GET, "/gnd/search?q=*"), /*->*/ "application/json" },
{ fakeRequest(GET, "/gnd/search?q=*&format="), /*->*/ "application/json" },
{ fakeRequest(GET, "/gnd/search?q=*&format=json"), /*->*/ "application/json" },
{ fakeRequest(GET, "/gnd/search?q=*&format=whatever"), /*->*/ "application/json" },
{ fakeRequest(GET, "/gnd/search?q=*").header("Accept", "text/plain"), /*->*/ "application/json" },
{ fakeRequest(GET, "/gnd/search?q=*&format=whatever"), /*->*/ "text/plain" },
{ fakeRequest(GET, "/gnd/search?q=*").header("Accept", "text/plain"), /*->*/ "text/plain" },
// search, bulk format: JSON lines
{ fakeRequest(GET, "/gnd/search?q=*").header("Accept", "application/x-jsonlines"), /*->*/ "application/x-jsonlines" },
{ fakeRequest(GET, "/gnd/search?format=jsonl"), /*->*/ "application/x-jsonlines" },
{ fakeRequest(GET, "/gnd/search?q=*&format=jsonl"), /*->*/ "application/x-jsonlines" },
{ fakeRequest(GET, "/gnd/search?q=vwxyz&format=jsonl"), /*->*/ "application/x-jsonlines" },
// search, other formats as query param:
{ fakeRequest(GET, "/gnd/search?q=*&format=html"), /*->*/ "text/html" },
// search, other formats via header:
Expand All @@ -52,7 +57,8 @@ public static Collection<Object[]> data() {
{ fakeRequest(GET, "/gnd/118820591"), /*->*/ "application/json" },
{ fakeRequest(GET, "/gnd/118820591?format="), /*->*/ "application/json" },
{ fakeRequest(GET, "/gnd/118820591?format=json"), /*->*/ "application/json" },
{ fakeRequest(GET, "/gnd/118820591?format=whatever"), /*->*/ "application/json" },
{ fakeRequest(GET, "/gnd/118820591?format=whatever"), /*->*/ "text/plain" },
{ fakeRequest(GET, "/gnd/118820591?format=whatever").header("Accept", "text/html"), /*->*/ "text/plain" },
{ fakeRequest(GET, "/gnd/118820591").header("Accept", "text/plain"), /*->*/ "application/n-triples" },
// get, other formats as query param:
{ fakeRequest(GET, "/gnd/118820591?format=html"), /*->*/ "text/html" },
Expand Down

0 comments on commit 1ee4a6d

Please sign in to comment.