@@ -447,6 +447,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
447447#include "termchar.h"
448448#include "dispextern.h"
449449#include "character.h"
450+ #include "category.h"
450451#include "buffer.h"
451452#include "charset.h"
452453#include "indent.h"
@@ -508,6 +509,80 @@ static Lisp_Object list_of_error;
508509 && (*BYTE_POS_ADDR (IT_BYTEPOS (*it)) == ' ' \
509510 || *BYTE_POS_ADDR (IT_BYTEPOS (*it)) == '\t'))))
510511
512+ /* These are the category sets we use. They are defined by
513+ kinsoku.el and chracters.el. */
514+ #define NOT_AT_EOL '<'
515+ #define NOT_AT_BOL '>'
516+ #define LINE_BREAKABLE '|'
517+
518+ static bool
519+ it_char_has_category(struct it *it, int cat)
520+ {
521+ int ch = 0;
522+ if (it->what == IT_CHARACTER)
523+ ch = it->c;
524+ else if (STRINGP (it->string))
525+ ch = SREF (it->string, IT_STRING_BYTEPOS (*it));
526+ else if (it->s)
527+ ch = it->s[IT_BYTEPOS (*it)];
528+ else if (IT_BYTEPOS (*it) < ZV_BYTE)
529+ ch = *BYTE_POS_ADDR (IT_BYTEPOS (*it));
530+
531+ if (ch == 0)
532+ return false;
533+ else
534+ return CHAR_HAS_CATEGORY (ch, cat);
535+ }
536+
537+ /* Return true if the current character allows wrapping before it. */
538+ static bool
539+ char_can_wrap_before (struct it *it)
540+ {
541+ if (!Vword_wrap_by_category)
542+ return !IT_DISPLAYING_WHITESPACE (it);
543+
544+ /* For CJK (LTR) text in RTL paragraph, EOL and BOL are flipped.
545+ Because in RTL paragraph, each glyph is prepended to the last
546+ one, effectively drawing right to left. */
547+ int not_at_bol;
548+ if (it->glyph_row && it->glyph_row->reversed_p)
549+ not_at_bol = NOT_AT_EOL;
550+ else
551+ not_at_bol = NOT_AT_BOL;
552+ /* You cannot wrap before a space or tab because that way you'll
553+ have space and tab at the beginning of next line. */
554+ return (!IT_DISPLAYING_WHITESPACE (it)
555+ /* Can be at BOL. */
556+ && !it_char_has_category (it, not_at_bol));
557+ }
558+
559+ /* Return true if the current character allows wrapping after it. */
560+ static bool
561+ char_can_wrap_after (struct it *it)
562+ {
563+ if (!Vword_wrap_by_category)
564+ return IT_DISPLAYING_WHITESPACE (it);
565+
566+ /* For CJK (LTR) text in RTL paragraph, EOL and BOL are flipped.
567+ Because in RTL paragraph, each glyph is prepended to the last
568+ one, effectively drawing right to left. */
569+ int not_at_eol;
570+ if (it->glyph_row && it->glyph_row->reversed_p)
571+ not_at_eol = NOT_AT_BOL;
572+ else
573+ not_at_eol = NOT_AT_EOL;
574+
575+ return (IT_DISPLAYING_WHITESPACE (it)
576+ /* Can break after && can be at EOL. */
577+ || (it_char_has_category (it, LINE_BREAKABLE)
578+ && !it_char_has_category (it, not_at_eol)));
579+ }
580+
581+ #undef IT_DISPLAYING_WHITESPACE
582+ #undef NOT_AT_EOL
583+ #undef NOT_AT_BOL
584+ #undef LINE_BREAKABLE
585+
511586/* If all the conditions needed to print the fill column indicator are
512587 met, return the (nonnegative) column number, else return a negative
513588 value. */
@@ -9193,13 +9268,20 @@ move_it_in_display_line_to (struct it *it,
91939268 {
91949269 if (it->line_wrap == WORD_WRAP && it->area == TEXT_AREA)
91959270 {
9196- if (IT_DISPLAYING_WHITESPACE (it))
9197- may_wrap = true;
9198- else if (may_wrap)
9271+ bool next_may_wrap = may_wrap;
9272+ /* Can we wrap after this character? */
9273+ if (char_can_wrap_after (it))
9274+ next_may_wrap = true;
9275+ else
9276+ next_may_wrap = false;
9277+ /* Can we wrap here? */
9278+ if (may_wrap && char_can_wrap_before (it))
91999279 {
92009280 /* We have reached a glyph that follows one or more
9201- whitespace characters. If the position is
9202- already found, we are done. */
9281+ whitespace characters or a character that allows
9282+ wrapping after it. If this character allows
9283+ wrapping before it, save this position as a
9284+ wrapping point. */
92039285 if (atpos_it.sp >= 0)
92049286 {
92059287 RESTORE_IT (it, &atpos_it, atpos_data);
@@ -9214,8 +9296,10 @@ move_it_in_display_line_to (struct it *it,
92149296 }
92159297 /* Otherwise, we can wrap here. */
92169298 SAVE_IT (wrap_it, *it, wrap_data);
9217- may_wrap = false;
9299+ next_may_wrap = false;
92189300 }
9301+ /* Update may_wrap for the next iteration. */
9302+ may_wrap = next_may_wrap;
92199303 }
92209304 }
92219305
@@ -9343,10 +9427,10 @@ move_it_in_display_line_to (struct it *it,
93439427 {
93449428 bool can_wrap = true;
93459429
9346- /* If we are at a whitespace character
9347- that barely fits on this screen line,
9348- but the next character is also
9349- whitespace, we cannot wrap here. */
9430+ /* If the previous character says we can
9431+ wrap after it, but the current
9432+ character says we can't wrap before
9433+ it, then we can't wrap here. */
93509434 if (it->line_wrap == WORD_WRAP
93519435 && wrap_it.sp >= 0
93529436 && may_wrap
@@ -9358,7 +9442,7 @@ move_it_in_display_line_to (struct it *it,
93589442 SAVE_IT (tem_it, *it, tem_data);
93599443 set_iterator_to_next (it, true);
93609444 if (get_next_display_element (it)
9361- && IT_DISPLAYING_WHITESPACE (it))
9445+ && !char_can_wrap_before (it))
93629446 can_wrap = false;
93639447 RESTORE_IT (it, &tem_it, tem_data);
93649448 }
@@ -9437,19 +9521,18 @@ move_it_in_display_line_to (struct it *it,
94379521 else
94389522 IT_RESET_X_ASCENT_DESCENT (it);
94399523
9440- /* If the screen line ends with whitespace, and we
9441- are under word-wrap, don't use wrap_it: it is no
9442- longer relevant, but we won't have an opportunity
9443- to update it, since we are done with this screen
9444- line. */
9524+ /* If the screen line ends with whitespace (or
9525+ wrap-able character), and we are under word-wrap,
9526+ don't use wrap_it: it is no longer relevant, but
9527+ we won't have an opportunity to update it, since
9528+ we are done with this screen line. */
94459529 if (may_wrap && IT_OVERFLOW_NEWLINE_INTO_FRINGE (it)
94469530 /* If the character after the one which set the
9447- may_wrap flag is also whitespace, we can't
9448- wrap here, since the screen line cannot be
9449- wrapped in the middle of whitespace.
9450- Therefore, wrap_it _is_ relevant in that
9451- case. */
9452- && !(moved_forward && IT_DISPLAYING_WHITESPACE (it)))
9531+ may_wrap flag says we can't wrap before it,
9532+ we can't wrap here. Therefore, wrap_it
9533+ (previously found wrap-point) _is_ relevant
9534+ in that case. */
9535+ && !(moved_forward && char_can_wrap_before (it)))
94539536 {
94549537 /* If we've found TO_X, go back there, as we now
94559538 know the last word fits on this screen line. */
@@ -23322,9 +23405,14 @@ display_line (struct it *it, int cursor_vpos)
2332223405
2332323406 if (it->line_wrap == WORD_WRAP && it->area == TEXT_AREA)
2332423407 {
23325- if (IT_DISPLAYING_WHITESPACE (it))
23326- may_wrap = true;
23327- else if (may_wrap)
23408+ bool next_may_wrap = may_wrap;
23409+ /* Can we wrap after this character? */
23410+ if (char_can_wrap_after (it))
23411+ next_may_wrap = true;
23412+ else
23413+ next_may_wrap = false;
23414+ /* Can we wrap here? */
23415+ if (may_wrap && char_can_wrap_before (it))
2332823416 {
2332923417 SAVE_IT (wrap_it, *it, wrap_data);
2333023418 wrap_x = x;
@@ -23338,8 +23426,9 @@ display_line (struct it *it, int cursor_vpos)
2333823426 wrap_row_min_bpos = min_bpos;
2333923427 wrap_row_max_pos = max_pos;
2334023428 wrap_row_max_bpos = max_bpos;
23341- may_wrap = false;
2334223429 }
23430+ /* Update may_wrap for the next iteration. */
23431+ may_wrap = next_may_wrap;
2334323432 }
2334423433 }
2334523434
@@ -23463,14 +23552,18 @@ display_line (struct it *it, int cursor_vpos)
2346323552 /* If line-wrap is on, check if a previous
2346423553 wrap point was found. */
2346523554 if (!IT_OVERFLOW_NEWLINE_INTO_FRINGE (it)
23466- && wrap_row_used > 0
23555+ && wrap_row_used > 0 /* Found. */
2346723556 /* Even if there is a previous wrap
2346823557 point, continue the line here as
2346923558 usual, if (i) the previous character
23470- was a space or tab AND (ii) the
23471- current character is not. */
23472- && (!may_wrap
23473- || IT_DISPLAYING_WHITESPACE (it)))
23559+ allows wrapping after it, AND (ii)
23560+ the current character allows wrapping
23561+ before it. Because this is a valid
23562+ break point, we can just continue to
23563+ the next line at here, there is no
23564+ need to wrap early at the previous
23565+ wrap point. */
23566+ && (!may_wrap || !char_can_wrap_before (it)))
2347423567 goto back_to_wrap;
2347523568
2347623569 /* Record the maximum and minimum buffer
@@ -23498,13 +23591,16 @@ display_line (struct it *it, int cursor_vpos)
2349823591 /* If line-wrap is on, check if a
2349923592 previous wrap point was found. */
2350023593 else if (wrap_row_used > 0
23501- /* Even if there is a previous wrap
23502- point, continue the line here as
23503- usual, if (i) the previous character
23504- was a space or tab AND (ii) the
23505- current character is not. */
23506- && (!may_wrap
23507- || IT_DISPLAYING_WHITESPACE (it)))
23594+ /* Even if there is a previous
23595+ wrap point, continue the
23596+ line here as usual, if (i)
23597+ the previous character was a
23598+ space or tab AND (ii) the
23599+ current character is not,
23600+ AND (iii) the current
23601+ character allows wrapping
23602+ before it. */
23603+ && (!may_wrap || !char_can_wrap_before (it)))
2350823604 goto back_to_wrap;
2350923605
2351023606 }
@@ -34662,6 +34758,23 @@ A value of nil means to respect the value of `truncate-lines'.
3466234758If `word-wrap' is enabled, you might want to reduce this. */);
3466334759 Vtruncate_partial_width_windows = make_fixnum (50);
3466434760
34761+ DEFVAR_BOOL("word-wrap-by-category", Vword_wrap_by_category, doc: /*
34762+ Non-nil means also wrap after characters of a certain category.
34763+ Normally when `word-wrap' is on, Emacs only breaks lines after
34764+ whitespace characters. When this option is turned on, Emacs also
34765+ breaks lines after characters that have the "|" category (defined in
34766+ characters.el). This is useful for allowing breaking after CJK
34767+ characters and improves the word-wrapping for CJK text mixed with
34768+ Latin text.
34769+
34770+ If this variable is set using Customize, Emacs automatically loads
34771+ kinsoku.el. When kinsoku.el is loaded, Emacs respects kinsoku rules
34772+ when breaking lines. That means characters with the ">" category
34773+ don't appear at the beginning of a line (e.g., FULLWIDTH COMMA), and
34774+ characters with the "<" category don't appear at the end of a line
34775+ (e.g., LEFT DOUBLE ANGLE BRACKET). */);
34776+ Vword_wrap_by_category = false;
34777+
3466534778 DEFVAR_LISP ("line-number-display-limit", Vline_number_display_limit,
3466634779 doc: /* Maximum buffer size for which line number should be displayed.
3466734780If the buffer is bigger than this, the line number does not appear
0 commit comments