Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: rewrite to meet w3c typography (#68)
BREAKING CHANGE: rename `characters` with `letters` BREAKING CHANGE: functions are now returning a Charset (https://github.com/ikatyang/regexp-util#charset)
- Loading branch information
Showing
6 changed files
with
90 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,37 @@ | ||
import { charset, Charset } from 'regexp-util'; | ||
import unicode = require('unicode-regex'); | ||
|
||
const punctuation_charset = unicode({ | ||
const cjk_letters = unicode({ | ||
Script: ['Han', 'Katakana', 'Hiragana', 'Hangul', 'Bopomofo'], | ||
General_Category: [ | ||
'Other_Letter', | ||
'Letter_Number', | ||
'Other_Symbol', | ||
'Modifier_Letter', | ||
], | ||
}); | ||
|
||
const cjk_punctuations = unicode({ | ||
Block: [ | ||
'CJK_Symbols_And_Punctuation', | ||
'Hangul_Syllables', | ||
'Vertical_Forms', | ||
'CJK_Compatibility_Forms', | ||
'Small_Form_Variants', | ||
'Halfwidth_And_Fullwidth_Forms', | ||
], | ||
}); | ||
}).subtract(cjk_letters); | ||
|
||
const character_charset = unicode({ | ||
Block: [ | ||
'Hangul_Jamo', | ||
'CJK_Radicals_Supplement', | ||
'Kangxi_Radicals', | ||
'Hiragana', | ||
'Katakana', | ||
'Bopomofo', | ||
'Hangul_Compatibility_Jamo', | ||
'Enclosed_CJK_Letters_And_Months', | ||
'CJK_Compatibility', | ||
'CJK_Unified_Ideographs_Extension_A', | ||
'CJK_Unified_Ideographs', | ||
'Hangul_Jamo_Extended_A', | ||
'CJK_Compatibility_Ideographs', | ||
], | ||
}); | ||
|
||
const mixed_charset = character_charset.union(punctuation_charset); | ||
const cjk_all = charset(cjk_letters, cjk_punctuations); | ||
|
||
function get_regex() { | ||
return create_regex(mixed_charset); | ||
function cjk_regex() { | ||
return charset(cjk_all); | ||
} | ||
|
||
declare namespace get_regex { | ||
function characters(): RegExp; | ||
function punctuations(): RegExp; | ||
} | ||
|
||
get_regex.characters = () => create_regex(character_charset); | ||
get_regex.punctuations = () => create_regex(punctuation_charset); | ||
|
||
function create_regex(charset: typeof mixed_charset) { | ||
return charset.toRegExp('g'); | ||
declare namespace cjk_regex { | ||
function letters(): Charset; | ||
function punctuations(): Charset; | ||
} | ||
cjk_regex.letters = () => charset(cjk_letters); | ||
cjk_regex.punctuations = () => charset(cjk_punctuations); | ||
|
||
export = get_regex; | ||
export = cjk_regex; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,48 @@ | ||
import cjk_regex = require('../src/index'); | ||
|
||
test('characters', () => { | ||
expect('a').not.toMatch(cjk_regex.characters()); | ||
expect('。').not.toMatch(cjk_regex.characters()); | ||
expect('中').toMatch(cjk_regex.characters()); | ||
expect('あ').toMatch(cjk_regex.characters()); | ||
expect('ㅂ').toMatch(cjk_regex.characters()); | ||
}); | ||
|
||
test('punctuations', () => { | ||
expect('a').not.toMatch(cjk_regex.punctuations()); | ||
expect('。').toMatch(cjk_regex.punctuations()); | ||
expect('中').not.toMatch(cjk_regex.punctuations()); | ||
expect('あ').not.toMatch(cjk_regex.punctuations()); | ||
expect('ㅂ').not.toMatch(cjk_regex.punctuations()); | ||
}); | ||
const test_cases: { | ||
[char: string]: 'non-cjk' | 'cjk-letter' | 'cjk-punctuation'; | ||
} = /* prettier-ignore */ { | ||
'.': 'non-cjk', | ||
'a': 'non-cjk', | ||
'。': 'cjk-punctuation', | ||
'中': 'cjk-letter', | ||
'ㄅ': 'cjk-letter', | ||
'𬉼': 'cjk-letter', | ||
'あ': 'cjk-letter', | ||
'ㅂ': 'cjk-letter', | ||
'가': 'cjk-letter', | ||
'ퟔ': 'cjk-letter', | ||
'〤': 'cjk-letter', | ||
'𛀂': 'cjk-letter', | ||
'ヲ': 'cjk-letter', | ||
'々': 'cjk-letter', | ||
}; | ||
|
||
test('mixed', () => { | ||
expect('a').not.toMatch(cjk_regex()); | ||
expect('。').toMatch(cjk_regex()); | ||
expect('中').toMatch(cjk_regex()); | ||
expect('あ').toMatch(cjk_regex()); | ||
expect('ㅂ').toMatch(cjk_regex()); | ||
Object.keys(test_cases).forEach(character => { | ||
const category = test_cases[character]; | ||
const title = `"${character}" (0x${character | ||
.charCodeAt(0) | ||
.toString(16)}) is ${category}`; | ||
test(title, () => { | ||
switch (category) { | ||
case 'non-cjk': | ||
expect(character).not.toMatch(cjk_regex().toRegExp()); | ||
expect(character).not.toMatch(cjk_regex.letters().toRegExp()); | ||
expect(character).not.toMatch(cjk_regex.punctuations().toRegExp()); | ||
break; | ||
case 'cjk-letter': | ||
expect(character).toMatch(cjk_regex().toRegExp()); | ||
expect(character).toMatch(cjk_regex.letters().toRegExp()); | ||
expect(character).not.toMatch(cjk_regex.punctuations().toRegExp()); | ||
break; | ||
case 'cjk-punctuation': | ||
expect(character).toMatch(cjk_regex().toRegExp()); | ||
expect(character).not.toMatch(cjk_regex.letters().toRegExp()); | ||
expect(character).toMatch(cjk_regex.punctuations().toRegExp()); | ||
break; | ||
default: | ||
throw new Error(`Unexpected category "${category}"`); | ||
} | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters