Skip to content
This repository has been archived by the owner on Apr 20, 2022. It is now read-only.

Commit

Permalink
Merge pull request kermitt2#317 from csw/bibdata-quoting
Browse files Browse the repository at this point in the history
Fix BibDataSetContextExtractor to quote replacement text

Former-commit-id: 31936a3
  • Loading branch information
kermitt2 committed May 18, 2018
2 parents 2451a58 + 732af07 commit 3aa29dd
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ private static String extractContextSentence(String cont) {
Matcher m = REF_PATTERN.matcher(cont);
if (m.find()) {
String g = m.group(1);
return m.replaceAll(g);
return m.replaceAll(Matcher.quoteReplacement(g));
} else {
throw new IllegalStateException("Implementation error: no <ref> found in" + cont);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.grobid.core.utilities;

import org.apache.commons.io.IOUtils;

import org.junit.Test;

import java.io.InputStream;
import java.nio.charset.StandardCharsets;

public class BibDataSetContextExtractorTest {

@Test
public void testRefEscapes() throws Exception {
InputStream is = this.getClass().getResourceAsStream("/test/tei-escape.xml");
String tei = IOUtils.toString(is, StandardCharsets.UTF_8);
is.close();
BibDataSetContextExtractor.getCitationReferences(tei);
}

}
12 changes: 12 additions & 0 deletions grobid-core/src/test/resources/test/tei-escape.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.tei-c.org/ns/1.0 /home/lopez/grobid/grobid-home/schemas/xsd/Grobid.xsd"
xmlns:xlink="http://www.w3.org/1999/xlink">
<text xml:lang="en">
<body>
<div xmlns="http://www.tei-c.org/ns/1.0"><p>Lorem ipsum <ref type="bibr" target="#b0">$9,</ref><ref type="bibr" target="#b1">2]</ref>.
</p></div>
</body>
</text>
</TEI>

0 comments on commit 3aa29dd

Please sign in to comment.