Skip to content

Commit

Permalink
Add bootstrapped GND labels to ID fields during conversion
Browse files Browse the repository at this point in the history
See #24
  • Loading branch information
fsteeg committed Apr 27, 2018
1 parent 9a197b8 commit ba01880
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 1 deletion.
45 changes: 44 additions & 1 deletion app/apps/Convert.java
Expand Up @@ -10,6 +10,8 @@
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
Expand All @@ -35,6 +37,11 @@
import org.culturegraph.mf.framework.ObjectReceiver;
import org.culturegraph.mf.framework.helpers.DefaultObjectPipe;
import org.culturegraph.mf.framework.helpers.DefaultStreamPipe;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

Expand All @@ -49,6 +56,8 @@
import com.typesafe.config.ConfigObject;

import ORG.oclc.oai.harvester2.app.RawWrite;
import controllers.HomeController;
import models.AuthorityResource;
import models.GndOntology;
import play.Logger;
import play.libs.Json;
Expand All @@ -57,12 +66,29 @@ public class Convert {

private static final Config CONFIG = ConfigFactory.parseFile(new File("conf/application.conf"));

private static final Settings SETTINGS = Settings.builder()
.put("cluster.name", HomeController.config("index.cluster")).build();

private static final TransportClient CLIENT = new PreBuiltTransportClient(SETTINGS);

static {
CONFIG.getStringList("index.hosts").forEach((host) -> {
try {
CLIENT.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(host), 9300));
} catch (UnknownHostException e) {
e.printStackTrace();
}
});
}

static String config(String id) {
return CONFIG.getString(id);
}

static final Map<String, Object> context = load();

private static final Map<String, String> labelCache = new HashMap<>();

static class OpenOaiPmh extends DefaultObjectPipe<String, ObjectReceiver<Reader>> {

ByteArrayOutputStream stream = new ByteArrayOutputStream();
Expand Down Expand Up @@ -173,7 +199,8 @@ private static Model preprocess(Model model, String id) {
// See https://github.com/hbz/lobid-gnd/issues/85
// See https://github.com/hbz/lobid-gnd/issues/24
String localVocab = "http://d-nb.info/standards/";
String object = o.toString().startsWith(localVocab) ? GndOntology.label(o.toString()) : o.toString();
String object = o.toString().startsWith(localVocab) ? GndOntology.label(o.toString())
: boostrapLabel(o.toString());
Statement labelStatement = model.createLiteralStatement(model.createResource(o.toString()),
model.createProperty(label), object);
toAdd.add(labelStatement);
Expand Down Expand Up @@ -217,6 +244,22 @@ private static Model preprocess(Model model, String id) {
return model;
}

private static String boostrapLabel(String fullId) {
if (!fullId.startsWith(AuthorityResource.DNB_PREFIX)) {
return fullId;
}
String id = fullId.substring(AuthorityResource.DNB_PREFIX.length());
return labelCache.containsKey(id) ? labelCache.get(id) : getLabelFromIndex(id, "preferredName");
}

private static String getLabelFromIndex(String id, String field) {
GetResponse response = CLIENT.prepareGet(config("index.name"), config("index.type"), id).execute().actionGet();
if (response.isExists()) {
return response.getSourceAsMap().get(field).toString();
}
return id;
}

private static String secondLevelTypeFor(String gnd, String type) {
String key = type.substring(gnd.length());
ConfigObject object = CONFIG.getObject("types");
Expand Down
18 changes: 18 additions & 0 deletions test/apps/ConvertTest.java
Expand Up @@ -133,6 +133,24 @@ public void testIriFieldStructure() throws FileNotFoundException {
assertTrue(!label.toString().isEmpty());
}

@Test
public void testOntologyLabelEnrichment() throws FileNotFoundException {
String jsonLd = jsonLdFor("118624822");
JsonNode area = Json.parse(jsonLd).get("geographicAreaCode").elements().next();
assertIsObjectWithIdAndLabel(area);
assertEquals("http://d-nb.info/standards/vocab/gnd/geographic-area-code#XD-US", area.get("id").textValue());
assertEquals("USA", area.get("label").textValue());
}

@Test
public void testGndLabelEnrichment() throws FileNotFoundException {
String jsonLd = jsonLdFor("1081942517");
JsonNode author = Json.parse(jsonLd).get("firstAuthor").elements().next();
assertIsObjectWithIdAndLabel(author);
assertEquals("http://d-nb.info/gnd/118624822", author.get("id").textValue());
assertEquals("Twain, Mark", author.get("label").textValue());
}

@Test
public void testTriplesToFramedJsonLd() throws FileNotFoundException {
Model model = ModelFactory.createDefaultModel();
Expand Down

0 comments on commit ba01880

Please sign in to comment.