Skip to content

Commit

Permalink
Handle multilingual strings to improve text shaping results (fix py-p…
Browse files Browse the repository at this point in the history
…df#1187) (py-pdf#1193)

* automatically detect unicode script

* Add files via upload

* Fix fragment width with text shaping

* add test and changelog
  • Loading branch information
andersonhc committed Jun 6, 2024
1 parent f0bd468 commit fbbb3f7
Show file tree
Hide file tree
Showing 9 changed files with 2,586 additions and 6 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
* [`Templates`](https://py-pdf.github.io/fpdf2/fpdf/Templates.html) can now be also defined in JSON files.
* support to optionally set `wrapmode` in templates (default `"WORD"` can optionally be set to `"CHAR"` to support wrapping on characters for scripts like Chinese or Japanese) - _cf._ [#1159](https://github.com/py-pdf/fpdf2/issues/1159)
* support for quadratic and cubic Bézier curves with [`FPDF.bezier()`](https://py-pdf.github.io/fpdf2/fpdf/Shapes.html#fpdf.fpdf.FPDF.bezier)
* feature to identify the Unicode script of the input text and break it into fragments when different scripts are used, improving text shaping results
### Fixed
* [`fpdf.drawing.DeviceCMYK`](https://py-pdf.github.io/fpdf2/fpdf/drawing.html#fpdf.drawing.DeviceCMYK) objects can now be passed to [`FPDF.set_draw_color()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.set_draw_color), [`FPDF.set_fill_color()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.set_fill_color) and [`FPDF.set_text_color()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.set_text_color) without raising a `ValueError`: [documentation](https://py-pdf.github.io/fpdf2/Text.html#text-formatting).
* individual `/Resources` directories are now properly created for each document page. This change ensures better compliance with the PDF specification but results in a slight increase in the size of PDF documents. You can still use the old behavior by setting `FPDF().single_resources_object = True`.
* individual `/Resources` directories are now properly created for each document page. This change ensures better compliance with the PDF specification but results in a slight increase in the size of PDF documents. You can still use the old behavior by setting `FPDF().single_resources_object = True`
* line size calculation for fragments when text shaping is used
### Changed
* [`FPDF.table()`](https://py-pdf.github.io/fpdf2/Tables.html) now raises an error when a single row is too high to be rendered on a single page
### Deprecated
Expand Down
4 changes: 1 addition & 3 deletions fpdf/fonts.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,7 @@ def shaped_text_width(self, text, font_size_pt, text_shaping_parms):
text_width = 0
for pos in glyph_positions:
text_width += (
round(self.scale * (pos.x_advance + pos.x_offset) + 0.001)
* font_size_pt
* 0.001
round(self.scale * pos.x_advance + 0.001) * font_size_pt * 0.001
)
return (len(glyph_positions), text_width)

Expand Down
19 changes: 17 additions & 2 deletions fpdf/fpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ class Image:
from .syntax import DestinationXYZ, PDFArray, PDFDate
from .table import Table, draw_box_borders
from .text_region import TextRegionMixin, TextColumns
from .unicode_script import UnicodeScript, get_unicode_script
from .util import get_scale_factor, Padding

# Public global variables:
Expand Down Expand Up @@ -3379,7 +3380,7 @@ def get_fallback_font(self, char, style=""):

def _parse_chars(self, text: str, markdown: bool) -> Iterator[Fragment]:
"Split text into fragments"
if not markdown and (not self.is_ttf_font or not self._fallback_font_ids):
if not markdown and not self.is_ttf_font:
yield Fragment(text, self._get_current_graphics_state(), self.k)
return
txt_frag, in_bold, in_italics, in_underline = (
Expand All @@ -3389,9 +3390,10 @@ def _parse_chars(self, text: str, markdown: bool) -> Iterator[Fragment]:
bool(self.underline),
)
current_fallback_font = None
current_text_script = None

def frag():
nonlocal txt_frag, current_fallback_font
nonlocal txt_frag, current_fallback_font, current_text_script
gstate = self._get_current_graphics_state()
gstate["font_style"] = ("B" if in_bold else "") + (
"I" if in_italics else ""
Expand All @@ -3406,6 +3408,7 @@ def frag():
)
gstate["current_font"] = self.fonts[current_fallback_font]
current_fallback_font = None
current_text_script = None
fragment = Fragment(
txt_frag,
gstate,
Expand All @@ -3426,6 +3429,16 @@ def frag():
self.MARKDOWN_UNDERLINE_MARKER,
)
half_marker = text[0]
text_script = get_unicode_script(text[0])
if text_script not in (
UnicodeScript.COMMON,
UnicodeScript.UNKNOWN,
current_text_script,
):
if txt_frag and current_text_script:
yield frag()
current_text_script = text_script

# Check that previous & next characters are not identical to the marker:
if markdown:
if (
Expand Down Expand Up @@ -5132,6 +5145,8 @@ def output(
DeprecationWarning,
stacklevel=get_stack_level(),
)
# Clear cache of cached functions to free up memory after output
get_unicode_script.cache_clear()
# Finish document if necessary:
if not self.buffer:
if self.page == 0:
Expand Down
Loading

0 comments on commit fbbb3f7

Please sign in to comment.