Skip to content

Commit 6e6f82b

Browse files
committed
Implement SYRIAC ABBREVIATION MARK with 'stch' feature
The feature is enabled for any character in the Arabic shaper. We should experiment with using it for Arabic subtending marks. Though, that has a directionality problem as well, since those are used with digits... Fixes #141
1 parent c743ec5 commit 6e6f82b

File tree

4 files changed

+244
-6
lines changed

4 files changed

+244
-6
lines changed

src/hb-ot-shape-complex-arabic.cc

Lines changed: 213 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,16 @@
2828
#include "hb-ot-shape-private.hh"
2929

3030

31+
#ifndef HB_DEBUG_ARABIC
32+
#define HB_DEBUG_ARABIC (HB_DEBUG+0)
33+
#endif
34+
35+
3136
/* buffer var allocations */
3237
#define arabic_shaping_action() complex_var_u8_0() /* arabic shaping action */
3338

39+
#define HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH HB_BUFFER_SCRATCH_FLAG_COMPLEX0
40+
3441

3542
/*
3643
* Joining types:
@@ -84,7 +91,7 @@ static const hb_tag_t arabic_features[] =
8491

8592

8693
/* Same order as the feature array */
87-
enum {
94+
enum arabic_action_t {
8895
ISOL,
8996
FINA,
9097
FIN2,
@@ -95,7 +102,11 @@ enum {
95102

96103
NONE,
97104

98-
ARABIC_NUM_FEATURES = NONE
105+
ARABIC_NUM_FEATURES = NONE,
106+
107+
/* We abuse the same byte for other things... */
108+
STCH_FIXED,
109+
STCH_REPEATING,
99110
};
100111

101112
static const struct arabic_state_table_entry {
@@ -139,6 +150,11 @@ arabic_fallback_shape (const hb_ot_shape_plan_t *plan,
139150
hb_font_t *font,
140151
hb_buffer_t *buffer);
141152

153+
static void
154+
record_stch (const hb_ot_shape_plan_t *plan,
155+
hb_font_t *font,
156+
hb_buffer_t *buffer);
157+
142158
static void
143159
collect_features_arabic (hb_ot_shape_planner_t *plan)
144160
{
@@ -165,6 +181,9 @@ collect_features_arabic (hb_ot_shape_planner_t *plan)
165181

166182
map->add_gsub_pause (nuke_joiners);
167183

184+
map->add_global_bool_feature (HB_TAG('s','t','c','h'));
185+
map->add_gsub_pause (record_stch);
186+
168187
map->add_global_bool_feature (HB_TAG('c','c','m','p'));
169188
map->add_global_bool_feature (HB_TAG('l','o','c','l'));
170189

@@ -208,8 +227,10 @@ struct arabic_shape_plan_t
208227
* mask_array[NONE] == 0. */
209228
hb_mask_t mask_array[ARABIC_NUM_FEATURES + 1];
210229

211-
bool do_fallback;
212230
arabic_fallback_plan_t *fallback_plan;
231+
232+
unsigned int do_fallback : 1;
233+
unsigned int has_stch : 1;
213234
};
214235

215236
void *
@@ -220,6 +241,7 @@ data_create_arabic (const hb_ot_shape_plan_t *plan)
220241
return NULL;
221242

222243
arabic_plan->do_fallback = plan->props.script == HB_SCRIPT_ARABIC;
244+
arabic_plan->has_stch = !!plan->map.get_1_mask (HB_TAG ('s','t','c','h'));
223245
for (unsigned int i = 0; i < ARABIC_NUM_FEATURES; i++) {
224246
arabic_plan->mask_array[i] = plan->map.get_1_mask (arabic_features[i]);
225247
arabic_plan->do_fallback = arabic_plan->do_fallback &&
@@ -320,8 +342,6 @@ setup_masks_arabic_plan (const arabic_shape_plan_t *arabic_plan,
320342
hb_glyph_info_t *info = buffer->info;
321343
for (unsigned int i = 0; i < count; i++)
322344
info[i].mask |= arabic_plan->mask_array[info[i].arabic_shaping_action()];
323-
324-
HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
325345
}
326346

327347
static void
@@ -371,6 +391,193 @@ arabic_fallback_shape (const hb_ot_shape_plan_t *plan,
371391
arabic_fallback_plan_shape (fallback_plan, font, buffer);
372392
}
373393

394+
/*
395+
* Stretch feature: "stch".
396+
* See example here:
397+
* https://www.microsoft.com/typography/OpenTypeDev/syriac/intro.htm
398+
* We implement this in a generic way, such that the Arabic subtending
399+
* marks can use it as well.
400+
*/
401+
402+
static void
403+
record_stch (const hb_ot_shape_plan_t *plan,
404+
hb_font_t *font,
405+
hb_buffer_t *buffer)
406+
{
407+
const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
408+
if (!arabic_plan->has_stch)
409+
return;
410+
411+
/* 'stch' feature was just applied. Look for anything that multiplied,
412+
* and record it for stch treatment later. Note that rtlm, frac, etc
413+
* are applied before stch, but we assume that they didn't result in
414+
* anything multiplying into 5 pieces, so it's safe-ish... */
415+
416+
unsigned int count = buffer->len;
417+
hb_glyph_info_t *info = buffer->info;
418+
for (unsigned int i = 0; i < count; i++)
419+
if (unlikely (_hb_glyph_info_multiplied (&info[i])))
420+
{
421+
unsigned int comp = _hb_glyph_info_get_lig_comp (&info[i]);
422+
info[i].arabic_shaping_action() = comp % 2 ? STCH_REPEATING : STCH_FIXED;
423+
buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH;
424+
}
425+
}
426+
427+
static void
428+
apply_stch (const hb_ot_shape_plan_t *plan,
429+
hb_buffer_t *buffer,
430+
hb_font_t *font)
431+
{
432+
if (likely (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH)))
433+
return;
434+
435+
/* The Arabic shaper currently always processes in RTL mode, so we should
436+
* stretch / position the stretched pieces to the left / preceding glyphs. */
437+
438+
/* We do a two pass implementation:
439+
* First pass calculates the exact number of extra glyphs we need,
440+
* We then enlarge buffer to have that much room,
441+
* Second pass applies the stretch, copying things to the end of buffer.
442+
*/
443+
444+
/* 30 = 2048 / 70.
445+
* https://www.microsoft.com/typography/cursivescriptguidelines.mspx */
446+
hb_position_t overlap = font->x_scale / 30;
447+
DEBUG_MSG (ARABIC, NULL, "overlap for stretching is %d", overlap);
448+
int sign = font->x_scale < 0 ? -1 : +1;
449+
unsigned int extra_glyphs_needed = 0; // Set during MEASURE, used during CUT
450+
451+
for (enum step_t { MEASURE, CUT } step = MEASURE; step <= CUT; step = (step_t) (step + 1))
452+
{
453+
unsigned int count = buffer->len;
454+
hb_glyph_info_t *info = buffer->info;
455+
hb_glyph_position_t *pos = buffer->pos;
456+
unsigned int new_len = count + extra_glyphs_needed; // write head during CUT
457+
unsigned int j = new_len;
458+
for (unsigned int i = count; i; i--)
459+
{
460+
if (!hb_in_range<unsigned> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING))
461+
{
462+
if (step == CUT)
463+
{
464+
--j;
465+
info[j] = info[i - 1];
466+
pos[j] = pos[i - 1];
467+
}
468+
continue;
469+
}
470+
471+
/* Yay, justification! */
472+
473+
hb_position_t w_total = 0; // Total to be filled
474+
hb_position_t w_fixed = 0; // Sum of fixed tiles
475+
hb_position_t w_repeating = 0; // Sum of repeating tiles
476+
int n_fixed = 0;
477+
int n_repeating = 0;
478+
479+
unsigned int end = i;
480+
while (i &&
481+
hb_in_range<unsigned> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING))
482+
{
483+
i--;
484+
hb_glyph_extents_t extents;
485+
if (!font->get_glyph_extents (info[i].codepoint, &extents))
486+
extents.width = 0;
487+
extents.width -= overlap;
488+
if (info[i].arabic_shaping_action() == STCH_FIXED)
489+
{
490+
w_fixed += extents.width;
491+
n_fixed++;
492+
}
493+
else
494+
{
495+
w_repeating += extents.width;
496+
n_repeating++;
497+
}
498+
}
499+
unsigned int start = i;
500+
unsigned int context = i;
501+
while (context &&
502+
!hb_in_range<unsigned> (info[context - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING) &&
503+
(_hb_glyph_info_is_default_ignorable (&info[context - 1]) ||
504+
HB_UNICODE_GENERAL_CATEGORY_IS_WORD (_hb_glyph_info_get_general_category (&info[context - 1]))))
505+
{
506+
context--;
507+
w_total += pos[context].x_advance;
508+
}
509+
i++; // Don't touch i again.
510+
511+
DEBUG_MSG (ARABIC, NULL, "%s stretch at (%d,%d,%d)",
512+
step == MEASURE ? "measuring" : "cutting", context, start, end);
513+
DEBUG_MSG (ARABIC, NULL, "rest of word: count=%d width %d", start - context, w_total);
514+
DEBUG_MSG (ARABIC, NULL, "fixed tiles: count=%d width=%d", n_fixed, w_fixed);
515+
DEBUG_MSG (ARABIC, NULL, "repeating tiles: count=%d width=%d", n_repeating, w_repeating);
516+
517+
/* Number of additional times to repeat each repeating tile. */
518+
int n_copies = 0;
519+
520+
hb_position_t w_remaining = w_total - w_fixed - overlap;
521+
if (sign * w_remaining > sign * w_repeating && sign * w_repeating > 0)
522+
n_copies = (sign * w_remaining + sign * w_repeating / 2) / (sign * w_repeating) - 1;
523+
524+
if (step == MEASURE)
525+
{
526+
extra_glyphs_needed += n_copies * n_repeating;
527+
DEBUG_MSG (ARABIC, NULL, "will add extra %d copies of repeating tiles", n_copies);
528+
}
529+
else
530+
{
531+
hb_position_t x_offset = -overlap;
532+
for (unsigned int k = end; k > start; k--)
533+
{
534+
hb_glyph_extents_t extents;
535+
if (!font->get_glyph_extents (info[k - 1].codepoint, &extents))
536+
extents.width = 0;
537+
extents.width -= overlap;
538+
539+
unsigned int repeat = 1;
540+
if (info[k - 1].arabic_shaping_action() == STCH_REPEATING)
541+
repeat += n_copies;
542+
543+
DEBUG_MSG (ARABIC, NULL, "appending %d copies of glyph %d; j=%d",
544+
repeat, info[k - 1].codepoint, j);
545+
for (unsigned int n = 0; n < repeat; n++)
546+
{
547+
x_offset -= extents.width;
548+
pos[k - 1].x_offset = x_offset;
549+
/* Append copy. */
550+
--j;
551+
info[j] = info[k - 1];
552+
pos[j] = pos[k - 1];
553+
}
554+
}
555+
}
556+
}
557+
558+
if (step == MEASURE)
559+
{
560+
if (unlikely (!buffer->ensure (count + extra_glyphs_needed)))
561+
break;
562+
}
563+
else
564+
{
565+
assert (j == 0);
566+
buffer->len = new_len;
567+
}
568+
}
569+
}
570+
571+
572+
static void
573+
postprocess_glyphs_arabic (const hb_ot_shape_plan_t *plan,
574+
hb_buffer_t *buffer,
575+
hb_font_t *font)
576+
{
577+
apply_stch (plan, buffer, font);
578+
579+
HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
580+
}
374581

375582
const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
376583
{
@@ -380,7 +587,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
380587
data_create_arabic,
381588
data_destroy_arabic,
382589
NULL, /* preprocess_text */
383-
NULL, /* postprocess_glyphs */
590+
postprocess_glyphs_arabic,
384591
HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
385592
NULL, /* decompose */
386593
NULL, /* compose */

src/hb-unicode-private.hh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,5 +362,24 @@ extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil;
362362
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
363363
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
364364

365+
#define HB_UNICODE_GENERAL_CATEGORY_IS_WORD(gen_cat) \
366+
(FLAG_SAFE (gen_cat) & \
367+
(FLAG (HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED) | \
368+
FLAG (HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE) | \
369+
FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) | \
370+
FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) | \
371+
FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) | \
372+
FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) | \
373+
FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) | \
374+
FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
375+
FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \
376+
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) | \
377+
FLAG (HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) | \
378+
FLAG (HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER) | \
379+
FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER) | \
380+
FLAG (HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL) | \
381+
FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL) | \
382+
FLAG (HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL) | \
383+
FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL)))
365384

366385
#endif /* HB_UNICODE_PRIVATE_HH */
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1+
abbreviation-mark.txt
12
alaph.txt
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
ܐܒ
2+
ܐ܏
3+
ܐ܏ܒ
4+
ܐ܏ܒܓ
5+
ܐ܏ܒܓܕ
6+
ܐ܏ܒܓܕܐ
7+
ܐ܏ܒܓܕܐܐܐܐܐܐܐܐܐ
8+
ܐ܏ܒܓܕܓܓܓܓܓܓ
9+
ܐ܏ܒܓ
10+
11+

0 commit comments

Comments
 (0)