diff --git a/crates/typst/src/layout/inline/mod.rs b/crates/typst/src/layout/inline/mod.rs index 271a8a92b0c8..6897837b0ba6 100644 --- a/crates/typst/src/layout/inline/mod.rs +++ b/crates/typst/src/layout/inline/mod.rs @@ -816,8 +816,10 @@ fn linebreak_simple<'a>( let mut last = None; breakpoints(p, |end, breakpoint| { + let prepend_hyphen = lines.last().map(should_repeat_hyphen).unwrap_or(false); + // Compute the line and its size. - let mut attempt = line(engine, p, start..end, breakpoint); + let mut attempt = line(engine, p, start..end, breakpoint, prepend_hyphen); // If the line doesn't fit anymore, we push the last fitting attempt // into the stack and rebuild the line from the attempt's end. The @@ -826,7 +828,7 @@ fn linebreak_simple<'a>( if let Some((last_attempt, last_end)) = last.take() { lines.push(last_attempt); start = last_end; - attempt = line(engine, p, start..end, breakpoint); + attempt = line(engine, p, start..end, breakpoint, prepend_hyphen); } } @@ -896,7 +898,7 @@ fn linebreak_optimized<'a>( let mut table = vec![Entry { pred: 0, total: 0.0, - line: line(engine, p, 0..0, Breakpoint::Mandatory), + line: line(engine, p, 0..0, Breakpoint::Mandatory, false), }]; let em = p.size; @@ -910,8 +912,9 @@ fn linebreak_optimized<'a>( for (i, pred) in table.iter().enumerate().skip(active) { // Layout the line. let start = pred.line.end; + let prepend_hyphen = should_repeat_hyphen(&pred.line); - let attempt = line(engine, p, start..end, breakpoint); + let attempt = line(engine, p, start..end, breakpoint, prepend_hyphen); // Determine how much the line's spaces would need to be stretched // to make it the desired width. @@ -1024,6 +1027,7 @@ fn line<'a>( p: &'a Preparation, mut range: Range, breakpoint: Breakpoint, + prepend_hyphen: bool, ) -> Line<'a> { let end = range.end; let mut justify = @@ -1091,13 +1095,25 @@ fn line<'a>( // need the shaped empty string to make the line the appropriate // height. That is the case exactly if the string is empty and there // are no other items in the line. - if hyphen || start + shaped.text.len() > range.end || maybe_adjust_last_glyph { - if hyphen || start < range.end || before.is_empty() { + if hyphen + || start + shaped.text.len() > range.end + || maybe_adjust_last_glyph + || (prepend_hyphen && before.is_empty()) + { + if hyphen + || start < range.end + || before.is_empty() + || (prepend_hyphen && before.is_empty()) + { let mut reshaped = shaped.reshape(engine, &p.spans, start..range.end); if hyphen || shy { reshaped.push_hyphen(engine, p.fallback); } + if prepend_hyphen && before.is_empty() { + reshaped.prepend_hyphen(engine, p.fallback); + } + if let Some(last_glyph) = reshaped.glyphs.last() { if last_glyph.is_cjk_left_aligned_punctuation(gb_style) { // If the last glyph is a CJK punctuation, we want to shrink it. @@ -1143,10 +1159,18 @@ fn line<'a>( let end = range.end.min(base + shaped.text.len()); // Reshape if necessary. - if range.start + shaped.text.len() > end || maybe_adjust_first_glyph { + if range.start + shaped.text.len() > end + || maybe_adjust_first_glyph + || prepend_hyphen + { // If the range is empty, we don't want to push an empty text item. if range.start < end { - let reshaped = shaped.reshape(engine, &p.spans, range.start..end); + let mut reshaped = shaped.reshape(engine, &p.spans, range.start..end); + + if prepend_hyphen { + reshaped.prepend_hyphen(engine, p.fallback) + } + width += reshaped.width; first = Some(Item::Text(reshaped)); } @@ -1458,3 +1482,50 @@ fn overhang(c: char) -> f64 { _ => 0.0, } } + +/// Whether the hyphen should repeat at the begin of the next line +fn should_repeat_hyphen(pred_line: &Line) -> bool { + // If the predecessor line does not end with a Dash::HardHyphen, we shall not place a hyphen at + // the beginning of the next line. + if pred_line.dash != Some(Dash::HardHyphen) { + return false; + } + + // If there's a trimmed out space, we needn't repeat the hyphen. That's the case of a text like + // "... kebab é a -melhor- comida que existe", where the hyphens are a kind of emphasis marker. + if pred_line.trimmed.end != pred_line.end { + return false; + } + + // The hyphen should repeat only in the languages that requires that feature. + // For more information see the discussion at https://github.com/typst/typst/issues/3235 + if let Some(Item::Text(shape)) = pred_line.last.as_ref() { + match shape.lang.as_str() { + // Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3 + // + // Czech: see https://prirucka.ujc.cas.cz/?id=164 + // + // Croatian: see http://pravopis.hr/pravilo/spojnica/68/ + // + // Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni + // + // Portuguese: see Base XX of "Acordo Ortográfico da Língua Portuguesa de 1990" + // https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf + // + // Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/ + "dsb" | "cs" | "hr" | "pl" | "pt" | "sk" => true, + // In Spanish the hyphen is required only if the word next to hyphen isn't capitalized. + // + // See § 4.1.1.1.2.e on the "Ortografía de la lengua española" + // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea + "es" => pred_line.bidi.text[pred_line.end..] + .chars() + .next() + .map(|c| !c.is_uppercase()) + .unwrap_or(false), + _ => false, + } + } else { + false + } +} diff --git a/crates/typst/src/layout/inline/shaping.rs b/crates/typst/src/layout/inline/shaping.rs index 82a33f9b09a3..0517d9dc1f4b 100644 --- a/crates/typst/src/layout/inline/shaping.rs +++ b/crates/typst/src/layout/inline/shaping.rs @@ -492,6 +492,56 @@ impl<'a> ShapedText<'a> { }); } + /// Prepend a hyphen to begin of the text. + pub fn prepend_hyphen(&mut self, engine: &Engine, fallback: bool) { + let world = engine.world; + let book = world.book(); + let fallback_func = if fallback { + Some(|| book.select_fallback(None, self.variant, "-")) + } else { + None + }; + let mut chain = families(self.styles) + .map(|family| book.select(family, self.variant)) + .chain(fallback_func.iter().map(|f| f())) + .flatten(); + + chain.find_map(|id| { + let font = world.font(id)?; + let ttf = font.ttf(); + let glyph_id = ttf.glyph_index('-')?; + let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?); + let range = self + .glyphs + .first() + .map(|g| g.range.start..g.range.start) + // In the unlikely chance that we hyphenate after an empty line, + // ensure that the glyph range still falls after self.base so + // that subtracting either of the endpoints by self.base doesn't + // underflow. See . + .unwrap_or_else(|| self.base..self.base); + self.width += x_advance.at(self.size); + self.glyphs.to_mut().insert( + 0, + ShapedGlyph { + font, + glyph_id: glyph_id.0, + x_advance, + x_offset: Em::zero(), + y_offset: Em::zero(), + adjustability: Adjustability::default(), + range, + safe_to_break: true, + c: '-', + span: (Span::detached(), 0), + is_justifiable: false, + script: Script::Common, + }, + ); + Some(()) + }); + } + /// Find the subslice of glyphs that represent the given text range if both /// sides are safe to break. fn slice_safe_to_break(&self, text_range: Range) -> Option<&[ShapedGlyph]> { diff --git a/tests/ref/hyphenate-es-captalized-names.png b/tests/ref/hyphenate-es-captalized-names.png new file mode 100644 index 000000000000..a11ed409b8fa Binary files /dev/null and b/tests/ref/hyphenate-es-captalized-names.png differ diff --git a/tests/ref/hyphenate-es-repeat-hyphen.png b/tests/ref/hyphenate-es-repeat-hyphen.png new file mode 100644 index 000000000000..7220e72a35ee Binary files /dev/null and b/tests/ref/hyphenate-es-repeat-hyphen.png differ diff --git a/tests/ref/hyphenate-pt-dash-emphasis.png b/tests/ref/hyphenate-pt-dash-emphasis.png new file mode 100644 index 000000000000..d538b9147eed Binary files /dev/null and b/tests/ref/hyphenate-pt-dash-emphasis.png differ diff --git a/tests/ref/hyphenate-pt-no-repeat-hyphen.png b/tests/ref/hyphenate-pt-no-repeat-hyphen.png new file mode 100644 index 000000000000..6edad8ac0404 Binary files /dev/null and b/tests/ref/hyphenate-pt-no-repeat-hyphen.png differ diff --git a/tests/ref/hyphenate-pt-repeat-hyphen-hyphenate-true.png b/tests/ref/hyphenate-pt-repeat-hyphen-hyphenate-true.png new file mode 100644 index 000000000000..0cb9df556272 Binary files /dev/null and b/tests/ref/hyphenate-pt-repeat-hyphen-hyphenate-true.png differ diff --git a/tests/ref/hyphenate-pt-repeat-hyphen-natural-word-breaking.png b/tests/ref/hyphenate-pt-repeat-hyphen-natural-word-breaking.png new file mode 100644 index 000000000000..0cb9df556272 Binary files /dev/null and b/tests/ref/hyphenate-pt-repeat-hyphen-natural-word-breaking.png differ diff --git a/tests/suite/layout/inline/hyphenate.typ b/tests/suite/layout/inline/hyphenate.typ index bcad4d93fd6d..3c2f27475eb1 100644 --- a/tests/suite/layout/inline/hyphenate.typ +++ b/tests/suite/layout/inline/hyphenate.typ @@ -50,6 +50,52 @@ It's a #emph[Tree]beard. #set text(hyphenate: true) #h(6pt) networks, the rest. +--- hyphenate-pt-repeat-hyphen-natural-word-breaking --- +// The word breaker naturally breaks arco-da-velha at arco-/-da-velha, +// so we shall repeat the hyphen, even that hyphenate is set to false. +#set page(width: 4cm, height: 2cm, margin: 2mm) +#set text(lang: "pt") + +Alguma coisa no arco-da-velha é algo que está muito longe. + +--- hyphenate-pt-repeat-hyphen-hyphenate-true --- +#set page(width: 4cm, height: 2cm, margin: 2mm) +#set text(lang: "pt", hyphenate: true) + +Alguma coisa no arco-da-velha é algo que está muito longe. + +--- hyphenate-pt-no-repeat-hyphen --- +#set page(width: 4cm, height: 2cm, margin: 2mm) +#set text(lang: "pt", hyphenate: true) + +Um médico otorrinolaringologista cuida da garganta do paciente. + +--- hyphenate-pt-dash-emphasis --- +// If the hyphen is followed by a space we shall not repeat the hyphen +// at the next line +#set page(width: 4cm, height: 2cm, margin: 2mm) +#set text(lang: "pt", hyphenate: true) + +Quebabe é a -melhor- comida que existe. + +--- hyphenate-es-repeat-hyphen --- +#set page(width: 6.25cm, height: 6cm, margin: 2mm) +#set text(lang: "es", hyphenate: true) + +Lo que entendemos por nivel léxico-semántico, en cuanto su sentido más +gramatical: es aquel que estudia el origen y forma de las palabras de +un idioma. + +--- hyphenate-es-captalized-names --- +// If the hyphen is followed by a capitalized word we shall not repeat +// the hyphen at the next line +#set page(width: 6.5cm, height: 3.5cm, margin: 2mm) +#set text(lang: "es", hyphenate: true) + +Tras el estallido de la contienda Ruiz-Giménez fue detenido junto a sus +dos hermanos y puesto bajo custodia por las autoridades republicanas, con +el objetivo de protegerle de las patrullas de milicianos. + --- costs-widow-orphan --- #set page(height: 60pt)