Skip to content

Commit 62d1e08

Browse files
committed
Prefer decomposed form if font has GPOS mark feature
Fixes #653
1 parent d7f2177 commit 62d1e08

File tree

3 files changed

+72
-63
lines changed

3 files changed

+72
-63
lines changed

src/hb-ot-shape-complex-hebrew.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ compose_hebrew (const hb_ot_shape_normalize_context_t *c,
7070

7171
bool found = (bool) c->unicode->compose (a, b, ab);
7272

73-
if (!found && !c->plan->has_mark)
73+
if (!found)
7474
{
7575
/* Special-case Hebrew presentation forms that are excluded from
7676
* standard normalization, but wanted for old fonts. */

src/hb-ot-shape-normalize.cc

Lines changed: 67 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,14 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
294294
_hb_buffer_assert_unicode_vars (buffer);
295295

296296
hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference;
297+
if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_AUTO)
298+
{
299+
if (plan->has_mark)
300+
mode = HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED;
301+
else
302+
mode = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
303+
}
304+
297305
const hb_ot_shape_normalize_context_t c = {
298306
plan,
299307
buffer,
@@ -358,65 +366,6 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
358366

359367
i = end;
360368
}
361-
362-
363-
if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_NONE ||
364-
mode == HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED)
365-
return;
366-
367-
/* Third round, recompose */
368-
369-
/* As noted in the comment earlier, we don't try to combine
370-
* ccc=0 chars with their previous Starter. */
371-
372-
buffer->clear_output ();
373-
count = buffer->len;
374-
unsigned int starter = 0;
375-
buffer->next_glyph ();
376-
while (buffer->idx < count && buffer->successful)
377-
{
378-
hb_codepoint_t composed, glyph;
379-
if (/* We don't try to compose a non-mark character with it's preceding starter.
380-
* This is both an optimization to avoid trying to compose every two neighboring
381-
* glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul
382-
* fonts are not designed to mix-and-match pre-composed syllables and Jamo. */
383-
HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur())))
384-
{
385-
if (/* If there's anything between the starter and this char, they should have CCC
386-
* smaller than this character's. */
387-
(starter == buffer->out_len - 1 ||
388-
info_cc (buffer->prev()) < info_cc (buffer->cur())) &&
389-
/* And compose. */
390-
c.compose (&c,
391-
buffer->out_info[starter].codepoint,
392-
buffer->cur().codepoint,
393-
&composed) &&
394-
/* And the font has glyph for the composite. */
395-
font->get_nominal_glyph (composed, &glyph))
396-
{
397-
/* Composes. */
398-
buffer->next_glyph (); /* Copy to out-buffer. */
399-
if (unlikely (!buffer->successful))
400-
return;
401-
buffer->merge_out_clusters (starter, buffer->out_len);
402-
buffer->out_len--; /* Remove the second composable. */
403-
/* Modify starter and carry on. */
404-
buffer->out_info[starter].codepoint = composed;
405-
buffer->out_info[starter].glyph_index() = glyph;
406-
_hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer);
407-
408-
continue;
409-
}
410-
}
411-
412-
/* Blocked, or doesn't compose. */
413-
buffer->next_glyph ();
414-
415-
if (info_cc (buffer->prev()) == 0)
416-
starter = buffer->out_len - 1;
417-
}
418-
buffer->swap_buffers ();
419-
420369
if (buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_CGJ)
421370
{
422371
/* For all CGJ, check if it prevented any reordering at all.
@@ -430,4 +379,63 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
430379
_hb_glyph_info_unhide (&buffer->info[i]);
431380
}
432381
}
382+
383+
384+
/* Third round, recompose */
385+
386+
if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS ||
387+
mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT)
388+
return;
389+
{
390+
/* As noted in the comment earlier, we don't try to combine
391+
* ccc=0 chars with their previous Starter. */
392+
393+
buffer->clear_output ();
394+
count = buffer->len;
395+
unsigned int starter = 0;
396+
buffer->next_glyph ();
397+
while (buffer->idx < count && buffer->successful)
398+
{
399+
hb_codepoint_t composed, glyph;
400+
if (/* We don't try to compose a non-mark character with it's preceding starter.
401+
* This is both an optimization to avoid trying to compose every two neighboring
402+
* glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul
403+
* fonts are not designed to mix-and-match pre-composed syllables and Jamo. */
404+
HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur())))
405+
{
406+
if (/* If there's anything between the starter and this char, they should have CCC
407+
* smaller than this character's. */
408+
(starter == buffer->out_len - 1 ||
409+
info_cc (buffer->prev()) < info_cc (buffer->cur())) &&
410+
/* And compose. */
411+
c.compose (&c,
412+
buffer->out_info[starter].codepoint,
413+
buffer->cur().codepoint,
414+
&composed) &&
415+
/* And the font has glyph for the composite. */
416+
font->get_nominal_glyph (composed, &glyph))
417+
{
418+
/* Composes. */
419+
buffer->next_glyph (); /* Copy to out-buffer. */
420+
if (unlikely (!buffer->successful))
421+
return;
422+
buffer->merge_out_clusters (starter, buffer->out_len);
423+
buffer->out_len--; /* Remove the second composable. */
424+
/* Modify starter and carry on. */
425+
buffer->out_info[starter].codepoint = composed;
426+
buffer->out_info[starter].glyph_index() = glyph;
427+
_hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer);
428+
429+
continue;
430+
}
431+
}
432+
433+
/* Blocked, or doesn't compose. */
434+
buffer->next_glyph ();
435+
436+
if (info_cc (buffer->prev()) == 0)
437+
starter = buffer->out_len - 1;
438+
}
439+
buffer->swap_buffers ();
440+
}
433441
}

src/hb-ot-shape-normalize.hh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,11 @@ struct hb_ot_shape_plan_t;
3838
enum hb_ot_shape_normalization_mode_t {
3939
HB_OT_SHAPE_NORMALIZATION_MODE_NONE,
4040
HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED,
41-
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* never composes base-to-base */
42-
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* always fully decomposes and then recompose back */
41+
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* Never composes base-to-base */
42+
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* Always fully decomposes and then recompose back */
4343

44-
HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS
44+
HB_OT_SHAPE_NORMALIZATION_MODE_AUTO, /* Choose decomposed if GPOS mark feature available, compose otherwise. */
45+
HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_AUTO
4546
};
4647

4748
HB_INTERNAL void _hb_ot_shape_normalize (const hb_ot_shape_plan_t *shaper,

0 commit comments

Comments
 (0)