From e4e281f40634b24c94b09caa46b14021bd1288bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benno=20F=C3=BCnfst=C3=BCck?= Date: Tue, 3 Sep 2019 18:41:14 +0200 Subject: [PATCH] Encode URIs in RDF serialization of commons media Commons Media page titles can include characters that are not valid in URLs, such as double quotes ("). If we don't encode these here, then we will produce invalid RDF N-Triples because rdf4j assumes URLs are valid. --- .../wdtk/rdf/values/StringValueConverter.java | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/values/StringValueConverter.java b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/values/StringValueConverter.java index 9c72f346c..1970845b5 100644 --- a/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/values/StringValueConverter.java +++ b/wdtk-rdf/src/main/java/org/wikidata/wdtk/rdf/values/StringValueConverter.java @@ -28,6 +28,10 @@ import org.wikidata.wdtk.rdf.PropertyRegister; import org.wikidata.wdtk.rdf.RdfWriter; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; + public class StringValueConverter extends AbstractValueConverter { public StringValueConverter(RdfWriter rdfWriter, @@ -97,13 +101,22 @@ public Value getRdfValue(StringValue value, * @return URL of the page */ static String getCommonsFileUrl(String pageName) { - return "http://commons.wikimedia.org/wiki/File:" - + pageName.replace(' ', '_'); + try { + return "http://commons.wikimedia.org/wiki/File:" + + URLEncoder.encode(pageName.replace(' ', '_'), StandardCharsets.UTF_8.toString()); + } catch (UnsupportedEncodingException e) { + // can't happen + throw new IllegalStateException(e); + } } static String getCommonsDataUrl(String pageName) { - return "http://commons.wikimedia.org/data/main/" - + pageName.replace(' ', '_'); + try { + return "http://commons.wikimedia.org/data/main/" + + URLEncoder.encode(pageName.replace(' ', '_'), StandardCharsets.UTF_8.toString()); + } catch (UnsupportedEncodingException e) { + throw new IllegalStateException(e); + } } }