Skip to content

Commit

Permalink
Merge pull request #973 from ecomfe/fix-charset
Browse files Browse the repository at this point in the history
fix: Charsets like Cyrillic should break work the same as Latin
  • Loading branch information
pissang committed Dec 7, 2022
2 parents 75f0e88 + e37e7d2 commit 04e2693
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions src/graphic/helper/parseText.ts
Original file line number Diff line number Diff line change
Expand Up @@ -591,9 +591,17 @@ function pushTokens(
}


function isLatin(ch: string) {
function isAlphabeticLetter(ch: string) {
// Unicode Character Ranges
// https://jrgraphix.net/research/unicode_blocks.php
// The following ranges may not cover all letter ranges but only the more
// popular ones. Developers could make pull requests when they find those
// not covered.
let code = ch.charCodeAt(0);
return code >= 0x21 && code <= 0x17F;
return code >= 0x20 && code <= 0x24F // Latin
|| code >= 0x370 && code <= 0x10FF // Greek, Coptic, Cyrilic, and etc.
|| code >= 0x1200 && code <= 0x13FF // Ethiopic and Cherokee
|| code >= 0x1E00 && code <= 0x206F; // Latin and Greek extended
}

const breakCharMap = reduce(',&?/;] '.split(''), function (obj, ch) {
Expand All @@ -604,7 +612,7 @@ const breakCharMap = reduce(',&?/;] '.split(''), function (obj, ch) {
* If break by word. For latin languages.
*/
function isWordBreakChar(ch: string) {
if (isLatin(ch)) {
if (isAlphabeticLetter(ch)) {
if (breakCharMap[ch]) {
return true;
}
Expand Down

0 comments on commit 04e2693

Please sign in to comment.