Skip to content

Commit

Permalink
Add depiction and sameAs info from EntityFacts to index data
Browse files Browse the repository at this point in the history
See #69
  • Loading branch information
fsteeg committed May 2, 2018
1 parent c34bc1d commit e64b958
Show file tree
Hide file tree
Showing 4 changed files with 259 additions and 7 deletions.
69 changes: 65 additions & 4 deletions app/apps/Convert.java
Expand Up @@ -10,11 +10,14 @@
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
Expand All @@ -35,6 +38,11 @@
import org.culturegraph.mf.framework.ObjectReceiver;
import org.culturegraph.mf.framework.helpers.DefaultObjectPipe;
import org.culturegraph.mf.framework.helpers.DefaultStreamPipe;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

Expand All @@ -49,6 +57,7 @@
import com.typesafe.config.ConfigObject;

import ORG.oclc.oai.harvester2.app.RawWrite;
import controllers.HomeController;
import models.GndOntology;
import play.Logger;
import play.libs.Json;
Expand All @@ -57,6 +66,19 @@ public class Convert {

private static final Config CONFIG = ConfigFactory.parseFile(new File("conf/application.conf"));

static final TransportClient CLIENT = new PreBuiltTransportClient(
Settings.builder().put("cluster.name", HomeController.config("index.cluster")).build());

static {
ConfigFactory.parseFile(new File("conf/application.conf")).getStringList("index.hosts").forEach((host) -> {
try {
CLIENT.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(host), 9300));
} catch (UnknownHostException e) {
e.printStackTrace();
}
});
}

static String config(String id) {
return CONFIG.getString(id);
}
Expand Down Expand Up @@ -147,7 +169,7 @@ public static String toJsonLd(String id, Model sourceModel, boolean dev) {
Object jsonLd = JsonUtils.fromString(out.toString());
jsonLd = JsonLdProcessor.frame(jsonLd, new HashMap<>(frame), options);
jsonLd = JsonLdProcessor.compact(jsonLd, context, options);
return postprocess(contextUrl, jsonLd);
return postprocess(id, contextUrl, jsonLd);
} catch (JsonLdError | IOException e) {
e.printStackTrace();
return null;
Expand Down Expand Up @@ -249,16 +271,55 @@ private static Map<String, Object> load() {
}
}

private static String postprocess(String contextUrl, Object jsonLd) {
private static String postprocess(String id, String contextUrl, Object jsonLd) {
JsonNode in = Json.toJson(jsonLd);
JsonNode graph = in.get("@graph");
JsonNode first = graph == null ? in : graph.elements().next();
JsonNode result = in;
if (first.isObject()) {
@SuppressWarnings("unchecked") /* first.isObject() */
Map<String, Object> res = Json.fromJson(first, TreeMap.class);
res.put("@context", contextUrl);
return Json.stringify(Json.toJson(res));
result = Json.toJson(res);
}
return Json.stringify(withEntityFacts(id, result));
}

private static JsonNode withEntityFacts(String id, JsonNode node) {
JsonNode result = node;
try {
GetResponse response = CLIENT
.prepareGet(config("index.entityfacts.index"), config("index.entityfacts.type"), id).execute()
.actionGet();
if (response.isExists()) {
JsonNode json = Json.parse(response.getSourceAsString());
JsonNode depiction = json.get("depiction");
JsonNode sameAs = json.get("sameAs");
Map<String, Object> map = addIfExits(result, depiction, sameAs);
result = Json.toJson(map);
}
} catch (Exception e) {
Logger.warn("Could not enrich from EntityFacts", e.getMessage());
}
return result;
}

@SuppressWarnings("unchecked")
private static Map<String, Object> addIfExits(JsonNode result, JsonNode depiction, JsonNode sameAs) {
Map<String, Object> map = Json.fromJson(result, Map.class);
if (sameAs != null) {
List<Map<String, Object>> fromJson = Json.fromJson(sameAs, List.class);
List<JsonNode> labelled = fromJson.stream().map((Map<String, Object> sameAsMap) -> {
sameAsMap.put("label", ((Map<String, Object>) sameAsMap.get("collection")).get("name").toString());
sameAsMap.put("id", sameAsMap.get("@id"));
sameAsMap.remove("@id");
return Json.toJson(sameAsMap);
}).collect(Collectors.toList());
map.put("sameAs", labelled);
}
if (depiction != null) {
map.put("depiction", Json.parse(Json.stringify(depiction).replace("@id", "id")));
}
return Json.stringify(in);
return map;
}
}
4 changes: 4 additions & 0 deletions conf/application.conf
Expand Up @@ -29,6 +29,10 @@ index {
type: "authority"
settings: "conf/index-settings.json"
content: "application/json; charset=utf-8"
entityfacts {
index: "entityfacts"
type: "entityfacts"
}
}

context {
Expand Down
34 changes: 31 additions & 3 deletions test/apps/ConvertTest.java
@@ -1,7 +1,9 @@
package apps;

import static apps.Convert.config;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.ByteArrayInputStream;
Expand All @@ -12,13 +14,19 @@
import java.io.InputStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.List;
import java.util.stream.Collectors;

import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.xcontent.XContentType;
import org.junit.Test;

import com.fasterxml.jackson.databind.JsonNode;
Expand Down Expand Up @@ -151,6 +159,26 @@ public void testGndLabelEnrichment() throws FileNotFoundException {
assertEquals("Twain, Mark", author.get("label").textValue());
}

@Test
public void testEntityFactsEnrichment() throws IOException {
String id = "118624822";
indexEntityFacts(id);
String jsonLd = jsonLdFor(id);
assertNotNull("JSON-LD should exist", jsonLd);
assertTrue("Enrichment for depiction should exist", Json.parse(jsonLd).has("depiction"));
JsonNode sameAs = Json.parse(jsonLd).get("sameAs");
assertTrue("Enrichment for sameAs should exist", sameAs.size() > 5);
assertIsObjectWithIdAndLabel(sameAs.elements().next());
}

private void indexEntityFacts(String id) throws IOException {
String json = Files.lines(Paths.get("test/entityfacts/" + id + ".json")).collect(Collectors.joining());
TransportClient client = Convert.CLIENT;
client.prepareIndex(config("index.entityfacts.index"), config("index.entityfacts.type")).setId(id)
.setSource(json, XContentType.JSON).execute().actionGet();
client.admin().indices().refresh(new RefreshRequest()).actionGet();
}

@Test
public void testTriplesToFramedJsonLd() throws FileNotFoundException {
Model model = ModelFactory.createDefaultModel();
Expand Down Expand Up @@ -179,9 +207,9 @@ public void testTriplesToFramedJsonLd() throws FileNotFoundException {
}

private void assertIsObjectWithIdAndLabel(JsonNode json) {
assertTrue(json.isObject());
assertTrue(json.has("id"));
assertTrue(json.has("label"));
assertTrue("JSON node should be an object", json.isObject());
assertTrue("JSON object should have an id", json.has("id"));
assertTrue("JSON object should have a label", json.has("label"));
}

private InputStream in(String s) {
Expand Down
159 changes: 159 additions & 0 deletions test/entityfacts/118624822.json
@@ -0,0 +1,159 @@
{
"@context" : "http://hub.culturegraph.org/entityfacts/context/v1/entityfacts.jsonld",
"@type" : "person",
"@id" : "http://d-nb.info/gnd/118624822",
"preferredName" : "Mark Twain",
"surname" : "Twain",
"forename" : "Mark",
"describedBy" : {
"valid" : "2018-05-02T16:53:32+0200",
"license" : "https://creativecommons.org/publicdomain/zero/1.0/",
"@id" : "http://hub.culturegraph.org/entityfacts/118624822"
},
"variantName" : [ "Marc Twain", "Mark Twayn", "Make Tuwen", "Māku Towein", "Marḳ Tvein", "Mark Tvėn", "Marks Tvēns", "M. Tvens", "Mark Twen", "Marks Twens", "Marka Ṭvena", "Markas Tvenas", "Morkas Tvénas", "Mārk Twayn", "Mārk Tuwayn", "Ma-k`o T`u-wen", "Māku Touein", "Makū Touen", "Marek Twain", "Mark Tuein", "Mark Tven", "M. Tven", "Marḳ Ṭṿain", "Mark Twėn", "Maku Touein", "Mark Twain", "Mark Twėn", "Ma ke Tu wen", "Make-Tuwen", "Mark Tewin", "מארק טווין" ],
"pseudonym" : [ {
"@id" : "http://d-nb.info/gnd/1106406737",
"preferredName" : "Quintus Curtius Snodgrass"
}, {
"@id" : "http://d-nb.info/gnd/1106407008",
"preferredName" : "Louis de Conte"
} ],
"realIdentity" : [ {
"@id" : "http://d-nb.info/gnd/1106405730",
"preferredName" : "Samuel Langhorne Clemens"
} ],
"dateOfBirth" : "November 30, 1835",
"dateOfDeath" : "April 21, 1910",
"placeOfBirth" : [ {
"@id" : "http://d-nb.info/gnd/110640968X",
"preferredName" : "Florida, Mo."
} ],
"placeOfDeath" : [ {
"@id" : "http://d-nb.info/gnd/1025527097",
"preferredName" : "Redding, Conn."
} ],
"professionOrOccupation" : [ {
"@id" : "http://d-nb.info/gnd/4053309-8",
"preferredName" : "Schriftsteller"
}, {
"@id" : "http://d-nb.info/gnd/4028781-6",
"preferredName" : "Journalist"
}, {
"@id" : "http://d-nb.info/gnd/4013091-5",
"preferredName" : "Drucker"
}, {
"@id" : "http://d-nb.info/gnd/4036380-6",
"preferredName" : "Lotse"
}, {
"@id" : "http://d-nb.info/gnd/4055409-0",
"preferredName" : "Soldat"
} ],
"gender" : {
"@id" : "http://d-nb.info/standards/vocab/gnd/gender#male",
"label" : "Mann"
},
"biographicalOrHistoricalInformation" : "Amerikan. Schriftsteller, auch Übersetzungen und Reisebeschreibungen; zunächst Drucker, Lotse, Soldat, Journalist",
"depiction" : {
"@id" : "https://commons.wikimedia.org/wiki/Special:FilePath/MarkTwain.LOC.jpg",
"thumbnail" : {
"@id" : "https://commons.wikimedia.org/wiki/Special:FilePath/MarkTwain.LOC.jpg?width=270"
},
"url" : "https://commons.wikimedia.org/wiki/File:MarkTwain.LOC.jpg?uselang=en"
},
"sameAs" : [ {
"@id" : "http://d-nb.info/gnd/118624822/about",
"collection" : {
"abbr" : "DNB",
"name" : "Gemeinsame Normdatei (GND) im Katalog der Deutschen Nationalbibliothek",
"publisher" : "Deutsche Nationalbibliothek",
"icon" : "http://www.dnb.de/SiteGlobals/StyleBundles/Bilder/favicon.png?__blob=normal&v=1"
}
}, {
"@id" : "http://www.filmportal.de/person/5DF2180863F84E09B51D8C5DE7832A91",
"collection" : {
"abbr" : "filmportal.de",
"name" : "Filmportal",
"publisher" : "Deutsches Filminstitut",
"icon" : "http://filmportal.de/misc/favicon.ico"
}
}, {
"@id" : "http://catalogue.bnf.fr/ark:/12148/cb11927291n",
"collection" : {
"abbr" : "BNF",
"name" : "Bibliothèque nationale de France",
"publisher" : "Bibliothèque nationale de France",
"icon" : "http://www.bnf.fr/bnf_dev/icono/favicon.ico"
}
}, {
"@id" : "http://id.loc.gov/authorities/n79021164",
"collection" : {
"abbr" : "LC",
"name" : "NACO Authority File",
"publisher" : "Library of Congress",
"icon" : "http://www.loc.gov/favicon.ico"
}
}, {
"@id" : "http://kalliope-verbund.info/gnd/118624822",
"collection" : {
"abbr" : "DE-611",
"name" : "Kalliope Verbundkatalog",
"publisher" : "Staatsbibliothek zu Berlin - Preußischer Kulturbesitz"
}
}, {
"@id" : "http://viaf.org/viaf/50566653",
"collection" : {
"abbr" : "VIAF",
"name" : "Virtual International Authority File (VIAF)",
"publisher" : "OCLC",
"icon" : "http://viaf.org/viaf/images/viaf.ico"
}
}, {
"@id" : "http://www.isni.org/0000000121324854",
"collection" : {
"abbr" : "ISNI",
"name" : "International Standard Name Identifier (ISNI)",
"publisher" : "International Standard Name Identifier (ISNI)",
"icon" : "http://www.isni.org/sites/default/files/isni_new_favicon.ico"
}
}, {
"@id" : "http://www.wikidata.org/entity/Q7245",
"collection" : {
"abbr" : "WIKIDATA",
"name" : "Wikidata",
"publisher" : "Wikimedia Foundation Inc.",
"icon" : "https://www.wikidata.org/static/favicon/wikidata.ico"
}
}, {
"@id" : "https://de.wikipedia.org/wiki/Mark_Twain",
"collection" : {
"abbr" : "dewiki",
"name" : "Wikipedia (Deutsch)",
"publisher" : "Wikimedia Foundation Inc.",
"icon" : "https://de.wikipedia.org/static/favicon/wikipedia.ico"
}
}, {
"@id" : "https://de.wikisource.org/wiki/Mark_Twain",
"collection" : {
"abbr" : "WIKISOURCE",
"name" : "Wikisource",
"publisher" : "Wikimedia Foundation Inc.",
"icon" : "https://wikisource.org/static/favicon/wikisource.ico"
}
}, {
"@id" : "https://en.wikipedia.org/wiki/Mark_Twain",
"collection" : {
"abbr" : "enwiki",
"name" : "Wikipedia (English)",
"publisher" : "Wikimedia Foundation Inc.",
"icon" : "https://en.wikipedia.org/static/favicon/wikipedia.ico"
}
}, {
"@id" : "https://www.deutsche-digitale-bibliothek.de/entity/118624822",
"collection" : {
"abbr" : "DDB",
"name" : "Deutsche Digitale Bibliothek",
"publisher" : "Deutsche Digitale Bibliothek",
"icon" : "https://www.deutsche-digitale-bibliothek.de/appStatic/images/favicon.ico"
}
} ]
}

0 comments on commit e64b958

Please sign in to comment.