Skip to content

Commit

Permalink
Merge pull request #3962 from inception-project/refactoring/3886-Upgr…
Browse files Browse the repository at this point in the history
…ade-dependencies

#3886: Upgrade dependencies
  • Loading branch information
reckart committed Apr 20, 2023
2 parents 1b8e9eb + e9df76e commit 6cba19f
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 84 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
import org.springframework.cache.annotation.EnableCaching;
import org.springframework.context.ConfigurableApplicationContext;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.security.config.annotation.method.configuration.EnableGlobalMethodSecurity;
import org.springframework.security.config.annotation.method.configuration.EnableMethodSecurity;

import de.tudarmstadt.ukp.clarin.webanno.support.db.EmbeddedDatabaseBackupHandler;
import de.tudarmstadt.ukp.clarin.webanno.support.standalone.LoadingSplashScreen;
Expand All @@ -60,7 +60,7 @@
@EntityScan(basePackages = { INCEPTION_BASE_PACKAGE, WEBANNO_BASE_PACKAGE })
@EnableAsync
@EnableCaching
@EnableGlobalMethodSecurity(prePostEnabled = true)
@EnableMethodSecurity(prePostEnabled = true)
//@formatter:on
public class INCEpTION
extends SpringBootServletInitializer
Expand Down
5 changes: 0 additions & 5 deletions inception/inception-pdf-editor/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@
</parent>
<name>INCEpTION - Editor - PDF (pdfbox ${pdfbox.version})</name>
<artifactId>inception-pdf-editor</artifactId>
<properties>
<pdfbox.version>2.0.26</pdfbox.version>
</properties>
<dependencies>
<dependency>
<groupId>de.tudarmstadt.ukp.inception.app</groupId>
Expand Down Expand Up @@ -94,12 +91,10 @@
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>

<dependency>
Expand Down
2 changes: 0 additions & 2 deletions inception/inception-pdf-editor2/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,10 @@
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>

<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ public static Shape calculateFontBounds(TextPosition text, AffineTransform flipA
}

// source:
// https://github.com/apache/pdfbox/blob/10d1e91af4eb9a06af7e95460533bf3ebc1b1280/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java#L1911
// https://github.com/apache/pdfbox/blob/2.0.28/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java#L1911
// The support for extracting the glyph order was added for INCEpTION
/**
* Normalize certain Unicode characters. For example, convert the single "fi" ligature to "f"
Expand Down Expand Up @@ -195,8 +195,17 @@ public static String normalizeWord(String word, List<Integer> glyphOrder)
}
else {
// Trim because some decompositions have an extra space, such as U+FC5E
builder.append(Normalizer
.normalize(word.substring(q, q + 1), Normalizer.Form.NFKC).trim());
String normalized = Normalizer
.normalize(word.substring(q, q + 1), Normalizer.Form.NFKC).trim();

// Hebrew in Alphabetic Presentation Forms from FB1D to FB4F and
// Arabic Presentation Forms-A from FB50 to FDFF and
// Arabic Presentation Forms-B from FE70 to FEFF
if (0xFB1D <= c && normalized.length() > 1) {
// Reverse the order of decomposed Hebrew and Arabic letters
normalized = new StringBuilder(normalized).reverse().toString();
}
builder.append(normalized);
}
p = q + 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,21 +170,24 @@ protected void writeString(String aText, List<TextPosition> aTextPositions) thro

int unicodeLength;
assert (unicodeLength = aTextPositions.stream() //
.map(TextPosition::getUnicode)//
.map(TextPosition::getVisuallyOrderedUnicode)//
.map(g -> normalizeWord(g, null))//
.mapToInt(String::length).sum()) == aText.length() : "Line length ["
+ aText.length() + "] should match glyph unicode length [" + unicodeLength
+ "] - [" + aText + "] <-> [" + aTextPositions.stream() //
.map(TextPosition::getUnicode) //
.map(TextPosition::getVisuallyOrderedUnicode) //
.map(g -> normalizeWord(g, null)) //
.collect(joining())
+ "]";

var originalWord = aTextPositions.stream().map(TextPosition::getUnicode).collect(joining());
var originalWord = aTextPositions.stream() //
.map(TextPosition::getVisuallyOrderedUnicode) //
.collect(joining());
var glyphOrder = new ArrayList<Integer>();
var text = normalizeWord(originalWord, glyphOrder);

assert text.equals(aText) : "Text from PDFbox should match text from TextPositions";
assert text.equals(aText) : "Text from PDFbox [" + aText
+ "] should match text from TextPositions [" + text + "]";

if (glyphOrder.isEmpty()) {
var cs = new ProtoVChunk(getBuffer().length(), aText, 0, false);
Expand All @@ -200,7 +203,7 @@ protected void writeString(String aText, List<TextPosition> aTextPositions) thro

// Account for glyphs that were mapped to more than one character by normalization
// e.g. expanded ligatures
String normalizedUnicode = normalizeWord(pos.getUnicode(), null);
String normalizedUnicode = normalizeWord(pos.getVisuallyOrderedUnicode(), null);

normalizedUnicode = reconcileGlyphWithText(aText, false, normalizedUnicode, cs.end);

Expand Down Expand Up @@ -250,7 +253,7 @@ protected void writeString(String aText, List<TextPosition> aTextPositions) thro

// Account for glyphs that were mapped to more than one character by normalization
// e.g. expanded ligatures
String normalizedUnicode = normalizeWord(pos.getUnicode(), null);
String normalizedUnicode = normalizeWord(pos.getVisuallyOrderedUnicode(), null);
var begin = cs.rtl ? gPos - (normalizedUnicode.length() - 1) : gPos;

normalizedUnicode = reconcileGlyphWithText(aText, rtl, normalizedUnicode, begin);
Expand Down Expand Up @@ -396,7 +399,7 @@ boolean isEmpty()
private void assertAlignedTextPositions(String aText, List<TextPosition> aTextPositions)
{
int cumulativePositionLength = aTextPositions.stream()
.mapToInt(t -> normalizeWord(t.getUnicode(), null).length()) //
.mapToInt(t -> normalizeWord(t.getVisuallyOrderedUnicode(), null).length()) //
.sum();

if (aText.length() != cumulativePositionLength) {
Expand All @@ -405,7 +408,7 @@ private void assertAlignedTextPositions(String aText, List<TextPosition> aTextPo
System.out.println(" Text [" + aText + "]");
StringBuilder sb = new StringBuilder();
for (TextPosition p : aTextPositions) {
sb.append(p.getUnicode());
sb.append(p.getVisuallyOrderedUnicode());
}
String posText = sb.toString();
System.out.println(" Pos [" + posText + "]");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ void thatRtlCoordinatesMakeSenseSorting2() throws Exception

var expectedText = "\n" //
+ "\n" //
+ " ُآَّتاب\n" //
+ " آَُّتاب\n" //
+ " \n" //
+ "\n" //
+ "\n" //
Expand All @@ -280,7 +280,7 @@ void thatRtlCoordinatesMakeSenseSorting2() throws Exception
assertThat(jCas.select(PdfChunk.class).asList()) //
.extracting(PdfChunk::getBegin, PdfChunk::getEnd, PdfChunk::getCoveredText)
.containsExactly( //
tuple(2, 7, " ُآَّ"), //
tuple(2, 7, " آَُّ"), //
tuple(7, 10, "تاب"), //
tuple(11, 12, " "));

Expand All @@ -303,7 +303,7 @@ void thatRtlCoordinatesMakeSenseSorting2() throws Exception
assertThat(actual.getPages().get(0).getChunks()) //
.extracting(VChunk::getBegin, VChunk::getText) //
.containsExactly( //
tuple(2, " ُآَّ"), //
tuple(2, " آَُّ"), //
tuple(7, "تاب"), //
tuple(11, " "));

Expand All @@ -318,7 +318,7 @@ void thatRtlCoordinatesMakeSenseSorting2() throws Exception
.containsExactly( //
tuple(//
114.486824f, //
new String[] { " ", "ُآ", "َّ" }, //
new String[] { " ", "آُ", "َّ" }, //
new float[] { 131.10103f, 120.79997f, 114.486824f }, //
new float[] { 6.494995f, 10.301056f, 6.3131485f }),
tuple(//
Expand Down Expand Up @@ -347,7 +347,7 @@ void thatRtlCoordinatesMakeSenseNoSorting2() throws Exception

var expectedText = "\n" //
+ "\n" //
+ "بُآتَّا \n" //
+ "بآُتَّا \n" //
+ " \n" //
+ "\n" //
+ "\n" //
Expand All @@ -363,8 +363,8 @@ void thatRtlCoordinatesMakeSenseNoSorting2() throws Exception
.extracting(PdfChunk::getBegin, PdfChunk::getEnd, PdfChunk::getCoveredText)
.containsExactly( //
tuple(2, 3, "ب"), //
tuple(3, 6, "ُآت"), //
tuple(6, 9, "َّا"), //
tuple(3, 6, "آُت"), //
tuple(6, 9, "َّا"), //
tuple(10, 11, " "), //
tuple(12, 13, " "));

Expand Down Expand Up @@ -394,8 +394,8 @@ void thatRtlCoordinatesMakeSenseNoSorting2() throws Exception
.extracting(VChunk::getBegin, VChunk::getText) //
.containsExactly( //
tuple(2, "ب"), //
tuple(3, "ُآت"), //
tuple(6, "َّا"), //
tuple(3, "آُت"), //
tuple(6, "َّا"), //
tuple(10, " "), //
tuple(12, " "));

Expand All @@ -414,12 +414,12 @@ void thatRtlCoordinatesMakeSenseNoSorting2() throws Exception
new float[] { 18.523743f }),
tuple( //
114.47643f, //
new String[] { "ُآ", "ت" }, //
new String[] { "آُ", "ت" }, //
new float[] { 120.79997f, 114.47643f }, //
new float[] { 10.2361145f, 6.3235397f }),
tuple( //
108.54f, //
new String[] { "َّ", "ا" }, //
new String[] { "َّ", "ا" }, //
new float[] { 114.486824f, 108.54f }, //
new float[] { 5.481781f, 5.946823f }),
tuple( //
Expand Down

This file was deleted.

13 changes: 12 additions & 1 deletion inception/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
<uimafit.version>3.4.0</uimafit.version>
<uima-json.version>0.5.0</uima-json.version>

<pdfbox.version>2.0.27</pdfbox.version>
<pdfbox.version>2.0.28</pdfbox.version>

<spring.version>5.3.27</spring.version>
<spring.boot.version>2.7.10</spring.boot.version>
Expand Down Expand Up @@ -1369,6 +1369,17 @@
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>

<dependency>
<groupId>com.github.rjeschke</groupId>
<artifactId>txtmark</artifactId>
Expand Down

0 comments on commit 6cba19f

Please sign in to comment.