diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java b/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java index ffbd956e187..4f2185682ad 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java @@ -51,6 +51,8 @@ public GsubWorker getGsubWorker(CmapLookup cmapLookup, GsubData gsubData) return new GsubWorkerForGujarati(cmapLookup, gsubData); case LATIN: return new GsubWorkerForLatin(gsubData); + case DFLT: + return new GsubWorkerForDflt(gsubData); default: return new DefaultGsubWorker(); } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDflt.java b/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDflt.java new file mode 100644 index 00000000000..30f416f5b7b --- /dev/null +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDflt.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fontbox.ttf.gsub; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.fontbox.ttf.model.GsubData; +import org.apache.fontbox.ttf.model.ScriptFeature; + +/** + * DFLT (Default) script-specific implementation of GSUB system. + * + *

According to the OpenType specification, a Script table with the script tag 'DFLT' (default) + * is used in fonts to define features that are not script-specific. Applications should use the + * DFLT script table when no script table exists for the specific script of the text being + * processed, or when text lacks a defined script (containing only symbols or punctuation).

+ * + *

This implementation applies common, script-neutral typographic features that work across + * writing systems. The feature order follows standard OpenType recommendations for universal + * glyph substitutions.

+ * + *

Reference: + * + * OpenType ScriptList Table Specification

+ * + */ +public class GsubWorkerForDflt implements GsubWorker +{ + private static final Log LOG = LogFactory.getLog(GsubWorkerForDflt.class); + + /** + * Script-neutral features in recommended processing order. + * + * + * + * Note: This feature list focuses on common GSUB (substitution) features. + * GPOS features like 'kern', 'mark', 'mkmk' are handled separately. + */ + private static final List FEATURES_IN_ORDER = Arrays.asList("ccmp", "liga", "clig", "calt"); + + private final GsubData gsubData; + + GsubWorkerForDflt(GsubData gsubData) + { + this.gsubData = gsubData; + } + + @Override + public List applyTransforms(List originalGlyphIds) + { + List intermediateGlyphsFromGsub = originalGlyphIds; + + for (String feature : FEATURES_IN_ORDER) + { + if (!gsubData.isFeatureSupported(feature)) + { + LOG.debug("the feature " + feature + " was not found"); + continue; + } + + LOG.debug("applying the feature " + feature); + + ScriptFeature scriptFeature = gsubData.getFeature(feature); + + intermediateGlyphsFromGsub = applyGsubFeature(scriptFeature, + intermediateGlyphsFromGsub); + } + + return Collections.unmodifiableList(intermediateGlyphsFromGsub); + } + + private List applyGsubFeature(ScriptFeature scriptFeature, + List originalGlyphs) + { + if (scriptFeature.getAllGlyphIdsForSubstitution().isEmpty()) + { + LOG.debug("getAllGlyphIdsForSubstitution() for " + scriptFeature.getName() + " is empty"); + return originalGlyphs; + } + + GlyphArraySplitter glyphArraySplitter = new GlyphArraySplitterRegexImpl( + scriptFeature.getAllGlyphIdsForSubstitution()); + + List> tokens = glyphArraySplitter.split(originalGlyphs); + List gsubProcessedGlyphs = new ArrayList<>(); + + for (List chunk : tokens) + { + if (scriptFeature.canReplaceGlyphs(chunk)) + { + // gsub system kicks in, you get the glyphId directly + int glyphId = scriptFeature.getReplacementForGlyphs(chunk); + gsubProcessedGlyphs.add(glyphId); + } + else + { + gsubProcessedGlyphs.addAll(chunk); + } + } + + LOG.debug("originalGlyphs: " + originalGlyphs + ", gsubProcessedGlyphs: " + + gsubProcessedGlyphs); + + return gsubProcessedGlyphs; + } +} diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java b/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java index 203b994a6d4..bcae03e1845 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java @@ -36,6 +36,7 @@ public enum Language DEVANAGARI(new String[] { "dev2", "deva" }), GUJARATI(new String[] { "gjr2", "gujr" }), LATIN(new String[] { "latn" }), + DFLT(new String[] { "DFLT" }), /** * An entry explicitly denoting the absence of any concrete language. May be useful when no actual glyph diff --git a/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDfltTest.java b/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDfltTest.java new file mode 100644 index 00000000000..494ccf60605 --- /dev/null +++ b/fontbox/src/test/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDfltTest.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fontbox.ttf.gsub; + +import org.apache.fontbox.ttf.CmapLookup; +import org.apache.fontbox.ttf.TTFParser; +import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.pdfbox.io.RandomAccessReadBufferedFile; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; + +/** + * Integration test for {@link GsubWorkerForDflt}. Tests DFLT (default) script GSUB worker. + * + *

The DFLT script is used for script-neutral typographic features that work across + * writing systems, particularly when text lacks a specific script (symbols, punctuation) + * or when no script-specific table exists.

+ * + *

JosefinSans-Italic.ttf (SIL Open Font License) uses DFLT script and has standard ligatures + * (fi, fl) which are used for testing GSUB transformations. Words without ligature sequences + * (like "font" or "code") pass through unchanged, while words containing "fi" or "fl" are + * transformed to use ligature glyphs.

+ * + */ +class GsubWorkerForDfltTest +{ + private static final String JOSEFIN_SANS_TTF = + "src/test/resources/ttf/JosefinSans-Italic.ttf"; + + private static CmapLookup cmapLookup; + private static GsubWorker gsubWorkerForDflt; + + @BeforeAll + static void init() throws IOException + { + try (TrueTypeFont ttf = new TTFParser().parse(new RandomAccessReadBufferedFile(JOSEFIN_SANS_TTF))) + { + cmapLookup = ttf.getUnicodeCmapLookup(); + gsubWorkerForDflt = new GsubWorkerFactory().getGsubWorker(cmapLookup, ttf.getGsubData()); + } + } + + @Test + void testCorrectWorkerType() + { + assertInstanceOf(GsubWorkerForDflt.class, gsubWorkerForDflt); + } + + static Stream provideTransformTestCases() + { + return Stream.of( + // No ligature - text passes through unchanged + Arguments.of("code", Arrays.asList(229, 293, 235, 237), "no ligature sequences"), + // Simple ligature + Arguments.of("fi", Collections.singletonList(407), "fi -> ligature"), + // Ligature within word + Arguments.of("office", Arrays.asList(293, 257, 407, 229, 237), "ffi -> f + fi-ligature"), + // Multi-f sequence + Arguments.of("ffl", Arrays.asList(257, 408), "ffl -> f + fl-ligature") + ); + } + + @ParameterizedTest(name = "{0}: {2}") + @MethodSource("provideTransformTestCases") + void testApplyTransforms(String input, List expectedGlyphs, String description) + { + List result = gsubWorkerForDflt.applyTransforms(getGlyphIds(input)); + assertEquals(expectedGlyphs, result); + } + + @Test + void testApplyTransforms_immutableResult() + { + List result = gsubWorkerForDflt.applyTransforms(getGlyphIds("abc")); + + assertThrows(UnsupportedOperationException.class, () -> result.add(999)); + assertThrows(UnsupportedOperationException.class, () -> result.remove(0)); + } + + private static List getGlyphIds(String word) + { + List originalGlyphIds = new ArrayList<>(); + + for (char unicodeChar : word.toCharArray()) + { + int glyphId = cmapLookup.getGlyphId(unicodeChar); + assertTrue(glyphId > 0); + originalGlyphIds.add(glyphId); + } + + return originalGlyphIds; + } +} diff --git a/fontbox/src/test/resources/ttf/JosefinSans-Italic.ttf b/fontbox/src/test/resources/ttf/JosefinSans-Italic.ttf new file mode 100644 index 00000000000..e8d2853f3a4 Binary files /dev/null and b/fontbox/src/test/resources/ttf/JosefinSans-Italic.ttf differ