inception-project · reckart · Apr 20, 2023 · Apr 18, 2023 · Apr 20, 2023 · Apr 20, 2023
diff --git a/inception/inception-app-webapp/src/main/java/de/tudarmstadt/ukp/inception/INCEpTION.java b/inception/inception-app-webapp/src/main/java/de/tudarmstadt/ukp/inception/INCEpTION.java
@@ -40,7 +40,7 @@
 import org.springframework.cache.annotation.EnableCaching;
 import org.springframework.context.ConfigurableApplicationContext;
 import org.springframework.scheduling.annotation.EnableAsync;
-import org.springframework.security.config.annotation.method.configuration.EnableGlobalMethodSecurity;
+import org.springframework.security.config.annotation.method.configuration.EnableMethodSecurity;
 
 import de.tudarmstadt.ukp.clarin.webanno.support.db.EmbeddedDatabaseBackupHandler;
 import de.tudarmstadt.ukp.clarin.webanno.support.standalone.LoadingSplashScreen;
@@ -60,7 +60,7 @@
 @EntityScan(basePackages = { INCEPTION_BASE_PACKAGE, WEBANNO_BASE_PACKAGE })
 @EnableAsync
 @EnableCaching
-@EnableGlobalMethodSecurity(prePostEnabled = true)
+@EnableMethodSecurity(prePostEnabled = true)
 //@formatter:on
 public class INCEpTION
     extends SpringBootServletInitializer

diff --git a/inception/inception-pdf-editor/pom.xml b/inception/inception-pdf-editor/pom.xml
@@ -24,9 +24,6 @@
   </parent>
   <name>INCEpTION - Editor - PDF (pdfbox ${pdfbox.version})</name>
   <artifactId>inception-pdf-editor</artifactId>
-  <properties>
-    <pdfbox.version>2.0.26</pdfbox.version>
-  </properties>
   <dependencies>
     <dependency>
       <groupId>de.tudarmstadt.ukp.inception.app</groupId>
@@ -94,12 +91,10 @@
     <dependency>
       <groupId>org.apache.pdfbox</groupId>
       <artifactId>pdfbox</artifactId>
-      <version>${pdfbox.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.pdfbox</groupId>
       <artifactId>fontbox</artifactId>
-      <version>${pdfbox.version}</version>
     </dependency>
 
     <dependency>

diff --git a/inception/inception-pdf-editor2/pom.xml b/inception/inception-pdf-editor2/pom.xml
@@ -106,12 +106,10 @@
     <dependency>
       <groupId>org.apache.pdfbox</groupId>
       <artifactId>pdfbox</artifactId>
-      <version>${pdfbox.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.pdfbox</groupId>
       <artifactId>fontbox</artifactId>
-      <version>${pdfbox.version}</version>
     </dependency>
 
     <dependency>

diff --git a/...tor2/src/main/java/de/tudarmstadt/ukp/inception/pdfeditor2/pdfbox/GlyphPositionUtils.java b/...tor2/src/main/java/de/tudarmstadt/ukp/inception/pdfeditor2/pdfbox/GlyphPositionUtils.java
@@ -156,7 +156,7 @@ public static Shape calculateFontBounds(TextPosition text, AffineTransform flipA
     }
 
     // source:
-    // https://github.com/apache/pdfbox/blob/10d1e91af4eb9a06af7e95460533bf3ebc1b1280/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java#L1911
+    // https://github.com/apache/pdfbox/blob/2.0.28/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java#L1911
     // The support for extracting the glyph order was added for INCEpTION
     /**
      * Normalize certain Unicode characters. For example, convert the single "fi" ligature to "f"
@@ -195,8 +195,17 @@ public static String normalizeWord(String word, List<Integer> glyphOrder)
                 }
                 else {
                     // Trim because some decompositions have an extra space, such as U+FC5E
-                    builder.append(Normalizer
-                            .normalize(word.substring(q, q + 1), Normalizer.Form.NFKC).trim());
+                    String normalized = Normalizer
+                            .normalize(word.substring(q, q + 1), Normalizer.Form.NFKC).trim();
+
+                    // Hebrew in Alphabetic Presentation Forms from FB1D to FB4F and
+                    // Arabic Presentation Forms-A from FB50 to FDFF and
+                    // Arabic Presentation Forms-B from FE70 to FEFF
+                    if (0xFB1D <= c && normalized.length() > 1) {
+                        // Reverse the order of decomposed Hebrew and Arabic letters
+                        normalized = new StringBuilder(normalized).reverse().toString();
+                    }
+                    builder.append(normalized);
                 }
                 p = q + 1;
             }

diff --git a/...2/src/main/java/de/tudarmstadt/ukp/inception/pdfeditor2/visual/VisualPDFTextStripper.java b/...2/src/main/java/de/tudarmstadt/ukp/inception/pdfeditor2/visual/VisualPDFTextStripper.java
@@ -170,21 +170,24 @@ protected void writeString(String aText, List<TextPosition> aTextPositions) thro
 
         int unicodeLength;
         assert (unicodeLength = aTextPositions.stream() //
-                .map(TextPosition::getUnicode)//
+                .map(TextPosition::getVisuallyOrderedUnicode)//
                 .map(g -> normalizeWord(g, null))//
                 .mapToInt(String::length).sum()) == aText.length() : "Line length ["
                         + aText.length() + "] should match glyph unicode length [" + unicodeLength
                         + "] - [" + aText + "] <-> [" + aTextPositions.stream() //
-                                .map(TextPosition::getUnicode) //
+                                .map(TextPosition::getVisuallyOrderedUnicode) //
                                 .map(g -> normalizeWord(g, null)) //
                                 .collect(joining())
                         + "]";
 
-        var originalWord = aTextPositions.stream().map(TextPosition::getUnicode).collect(joining());
+        var originalWord = aTextPositions.stream() //
+                .map(TextPosition::getVisuallyOrderedUnicode) //
+                .collect(joining());
         var glyphOrder = new ArrayList<Integer>();
         var text = normalizeWord(originalWord, glyphOrder);
 
-        assert text.equals(aText) : "Text from PDFbox should match text from TextPositions";
+        assert text.equals(aText) : "Text from PDFbox [" + aText
+                + "] should match text from TextPositions [" + text + "]";
 
         if (glyphOrder.isEmpty()) {
             var cs = new ProtoVChunk(getBuffer().length(), aText, 0, false);
@@ -200,7 +203,7 @@ protected void writeString(String aText, List<TextPosition> aTextPositions) thro
 
                 // Account for glyphs that were mapped to more than one character by normalization
                 // e.g. expanded ligatures
-                String normalizedUnicode = normalizeWord(pos.getUnicode(), null);
+                String normalizedUnicode = normalizeWord(pos.getVisuallyOrderedUnicode(), null);
 
                 normalizedUnicode = reconcileGlyphWithText(aText, false, normalizedUnicode, cs.end);
 
@@ -250,7 +253,7 @@ protected void writeString(String aText, List<TextPosition> aTextPositions) thro
 
                 // Account for glyphs that were mapped to more than one character by normalization
                 // e.g. expanded ligatures
-                String normalizedUnicode = normalizeWord(pos.getUnicode(), null);
+                String normalizedUnicode = normalizeWord(pos.getVisuallyOrderedUnicode(), null);
                 var begin = cs.rtl ? gPos - (normalizedUnicode.length() - 1) : gPos;
 
                 normalizedUnicode = reconcileGlyphWithText(aText, rtl, normalizedUnicode, begin);
@@ -396,7 +399,7 @@ boolean isEmpty()
     private void assertAlignedTextPositions(String aText, List<TextPosition> aTextPositions)
     {
         int cumulativePositionLength = aTextPositions.stream()
-                .mapToInt(t -> normalizeWord(t.getUnicode(), null).length()) //
+                .mapToInt(t -> normalizeWord(t.getVisuallyOrderedUnicode(), null).length()) //
                 .sum();
 
         if (aText.length() != cumulativePositionLength) {
@@ -405,7 +408,7 @@ private void assertAlignedTextPositions(String aText, List<TextPosition> aTextPo
             System.out.println(" Text [" + aText + "]");
             StringBuilder sb = new StringBuilder();
             for (TextPosition p : aTextPositions) {
-                sb.append(p.getUnicode());
+                sb.append(p.getVisuallyOrderedUnicode());
             }
             String posText = sb.toString();
             System.out.println(" Pos [" + posText + "]");

diff --git a/...or2/src/test/java/de/tudarmstadt/ukp/inception/pdfeditor2/format/VisualPdfReaderTest.java b/...or2/src/test/java/de/tudarmstadt/ukp/inception/pdfeditor2/format/VisualPdfReaderTest.java
@@ -265,7 +265,7 @@ void thatRtlCoordinatesMakeSenseSorting2() throws Exception
 
         var expectedText = "\n" //
                 + "\n" //
-                + " ُآَّتاب\n" //
+                + " آَُّتاب\n" //
                 + " \n" //
                 + "\n" //
                 + "\n" //
@@ -280,7 +280,7 @@ void thatRtlCoordinatesMakeSenseSorting2() throws Exception
         assertThat(jCas.select(PdfChunk.class).asList()) //
                 .extracting(PdfChunk::getBegin, PdfChunk::getEnd, PdfChunk::getCoveredText)
                 .containsExactly( //
-                        tuple(2, 7, " ُآَّ"), //
+                        tuple(2, 7, " آَُّ"), //
                         tuple(7, 10, "تاب"), //
                         tuple(11, 12, " "));
 
@@ -303,7 +303,7 @@ void thatRtlCoordinatesMakeSenseSorting2() throws Exception
         assertThat(actual.getPages().get(0).getChunks()) //
                 .extracting(VChunk::getBegin, VChunk::getText) //
                 .containsExactly( //
-                        tuple(2, " ُآَّ"), //
+                        tuple(2, " آَُّ"), //
                         tuple(7, "تاب"), //
                         tuple(11, " "));
 
@@ -318,7 +318,7 @@ void thatRtlCoordinatesMakeSenseSorting2() throws Exception
                 .containsExactly( //
                         tuple(//
                                 114.486824f, //
-                                new String[] { " ", "ُآ", "َّ" }, //
+                                new String[] { " ", "آُ", "َّ" }, //
                                 new float[] { 131.10103f, 120.79997f, 114.486824f }, //
                                 new float[] { 6.494995f, 10.301056f, 6.3131485f }),
                         tuple(//
@@ -347,7 +347,7 @@ void thatRtlCoordinatesMakeSenseNoSorting2() throws Exception
 
         var expectedText = "\n" //
                 + "\n" //
-                + "بُآتَّا  \n" //
+                + "بآُتَّا  \n" //
                 + " \n" //
                 + "\n" //
                 + "\n" //
@@ -363,8 +363,8 @@ void thatRtlCoordinatesMakeSenseNoSorting2() throws Exception
                 .extracting(PdfChunk::getBegin, PdfChunk::getEnd, PdfChunk::getCoveredText)
                 .containsExactly( //
                         tuple(2, 3, "ب"), //
-                        tuple(3, 6, "ُآت"), //
-                        tuple(6, 9, "َّا"), //
+                        tuple(3, 6, "آُت"), //
+                        tuple(6, 9, "َّا"), //
                         tuple(10, 11, " "), //
                         tuple(12, 13, " "));
 
@@ -394,8 +394,8 @@ void thatRtlCoordinatesMakeSenseNoSorting2() throws Exception
                 .extracting(VChunk::getBegin, VChunk::getText) //
                 .containsExactly( //
                         tuple(2, "ب"), //
-                        tuple(3, "ُآت"), //
-                        tuple(6, "َّا"), //
+                        tuple(3, "آُت"), //
+                        tuple(6, "َّا"), //
                         tuple(10, " "), //
                         tuple(12, " "));
 
@@ -414,12 +414,12 @@ void thatRtlCoordinatesMakeSenseNoSorting2() throws Exception
                                 new float[] { 18.523743f }),
                         tuple( //
                                 114.47643f, //
-                                new String[] { "ُآ", "ت" }, //
+                                new String[] { "آُ", "ت" }, //
                                 new float[] { 120.79997f, 114.47643f }, //
                                 new float[] { 10.2361145f, 6.3235397f }),
                         tuple( //
                                 108.54f, //
-                                new String[] { "َّ", "ا" }, //
+                                new String[] { "َّ", "ا" }, //
                                 new float[] { 114.486824f, 108.54f }, //
                                 new float[] { 5.481781f, 5.946823f }),
                         tuple( //

diff --git a/...c/main/java/de/tudarmstadt/ukp/inception/ui/core/bootstrap/DisabledBootstrapCheckbox.java b/...c/main/java/de/tudarmstadt/ukp/inception/ui/core/bootstrap/DisabledBootstrapCheckbox.java
diff --git a/inception/pom.xml b/inception/pom.xml
@@ -74,7 +74,7 @@
     <uimafit.version>3.4.0</uimafit.version>
     <uima-json.version>0.5.0</uima-json.version>
 
-    <pdfbox.version>2.0.27</pdfbox.version>
+    <pdfbox.version>2.0.28</pdfbox.version>
 
     <spring.version>5.3.27</spring.version>
     <spring.boot.version>2.7.10</spring.boot.version>
@@ -1369,6 +1369,17 @@
         </exclusions>
       </dependency>
 
+      <dependency>
+        <groupId>org.apache.pdfbox</groupId>
+        <artifactId>pdfbox</artifactId>
+        <version>${pdfbox.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.pdfbox</groupId>
+        <artifactId>fontbox</artifactId>
+        <version>${pdfbox.version}</version>
+      </dependency>
+
       <dependency>
         <groupId>com.github.rjeschke</groupId>
         <artifactId>txtmark</artifactId>