Skip to content

Commit

Permalink
feat: rewrite to meet w3c typography (#68)
Browse files Browse the repository at this point in the history
BREAKING CHANGE:

rename `characters` with `letters`

BREAKING CHANGE:

functions are now returning a Charset
(https://github.com/ikatyang/regexp-util#charset)
  • Loading branch information
ikatyang committed Mar 22, 2018
1 parent 04cb2a3 commit d1ca3f6
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 73 deletions.
31 changes: 18 additions & 13 deletions README.md
Expand Up @@ -21,21 +21,26 @@ yarn add cjk-regex
## Usage

```js
const get_cjk_regex = require("cjk-regex");

get_cjk_regex().test("a"); //=> false
get_cjk_regex().test(""); //=> true
get_cjk_regex().test(""); //=> true

get_cjk_regex.characters().test("a"); //=> false
get_cjk_regex.characters().test(""); //=> false
get_cjk_regex.characters().test(""); //=> true

get_cjk_regex.punctuations().test("a"); //=> false
get_cjk_regex.punctuations().test(""); //=> true
get_cjk_regex.punctuations().test(""); //=> false
const cjk = require("cjk-regex");

const cjk_charset = cjk();
cjk_charset.toRegExp().test("a"); //=> false
cjk_charset.toRegExp().test(""); //=> true
cjk_charset.toRegExp().test(""); //=> true

const cjk_letter_charset = cjk.letters();
cjk_letter_charset.toRegExp().test("a"); //=> false
cjk_letter_charset.toRegExp().test(""); //=> false
cjk_letter_charset.toRegExp().test(""); //=> true

const cjk_punctuaion_charset = cjk.punctuations();
cjk_punctuaion_charset.toRegExp().test("a"); //=> false
cjk_punctuaion_charset.toRegExp().test(""); //=> true
cjk_punctuaion_charset.toRegExp().test(""); //=> false
```

Returns a [Charset](https://github.com/ikatyang/regexp-util#charset).

## Development

```sh
Expand Down
1 change: 1 addition & 0 deletions package.json
Expand Up @@ -25,6 +25,7 @@
"release": "standard-version"
},
"dependencies": {
"regexp-util": "^1.2.1",
"unicode-regex": "^2.0.0"
},
"devDependencies": {
Expand Down
57 changes: 22 additions & 35 deletions src/index.ts
@@ -1,50 +1,37 @@
import { charset, Charset } from 'regexp-util';
import unicode = require('unicode-regex');

const punctuation_charset = unicode({
const cjk_letters = unicode({
Script: ['Han', 'Katakana', 'Hiragana', 'Hangul', 'Bopomofo'],
General_Category: [
'Other_Letter',
'Letter_Number',
'Other_Symbol',
'Modifier_Letter',
],
});

const cjk_punctuations = unicode({
Block: [
'CJK_Symbols_And_Punctuation',
'Hangul_Syllables',
'Vertical_Forms',
'CJK_Compatibility_Forms',
'Small_Form_Variants',
'Halfwidth_And_Fullwidth_Forms',
],
});
}).subtract(cjk_letters);

const character_charset = unicode({
Block: [
'Hangul_Jamo',
'CJK_Radicals_Supplement',
'Kangxi_Radicals',
'Hiragana',
'Katakana',
'Bopomofo',
'Hangul_Compatibility_Jamo',
'Enclosed_CJK_Letters_And_Months',
'CJK_Compatibility',
'CJK_Unified_Ideographs_Extension_A',
'CJK_Unified_Ideographs',
'Hangul_Jamo_Extended_A',
'CJK_Compatibility_Ideographs',
],
});

const mixed_charset = character_charset.union(punctuation_charset);
const cjk_all = charset(cjk_letters, cjk_punctuations);

function get_regex() {
return create_regex(mixed_charset);
function cjk_regex() {
return charset(cjk_all);
}

declare namespace get_regex {
function characters(): RegExp;
function punctuations(): RegExp;
}

get_regex.characters = () => create_regex(character_charset);
get_regex.punctuations = () => create_regex(punctuation_charset);

function create_regex(charset: typeof mixed_charset) {
return charset.toRegExp('g');
declare namespace cjk_regex {
function letters(): Charset;
function punctuations(): Charset;
}
cjk_regex.letters = () => charset(cjk_letters);
cjk_regex.punctuations = () => charset(cjk_punctuations);

export = get_regex;
export = cjk_regex;
65 changes: 44 additions & 21 deletions tests/test.ts
@@ -1,25 +1,48 @@
import cjk_regex = require('../src/index');

test('characters', () => {
expect('a').not.toMatch(cjk_regex.characters());
expect('。').not.toMatch(cjk_regex.characters());
expect('中').toMatch(cjk_regex.characters());
expect('あ').toMatch(cjk_regex.characters());
expect('ㅂ').toMatch(cjk_regex.characters());
});

test('punctuations', () => {
expect('a').not.toMatch(cjk_regex.punctuations());
expect('。').toMatch(cjk_regex.punctuations());
expect('中').not.toMatch(cjk_regex.punctuations());
expect('あ').not.toMatch(cjk_regex.punctuations());
expect('ㅂ').not.toMatch(cjk_regex.punctuations());
});
const test_cases: {
[char: string]: 'non-cjk' | 'cjk-letter' | 'cjk-punctuation';
} = /* prettier-ignore */ {
'.': 'non-cjk',
'a': 'non-cjk',
'。': 'cjk-punctuation',
'中': 'cjk-letter',
'ㄅ': 'cjk-letter',
'𬉼': 'cjk-letter',
'あ': 'cjk-letter',
'ㅂ': 'cjk-letter',
'가': 'cjk-letter',
'ퟔ': 'cjk-letter',
'〤': 'cjk-letter',
'𛀂': 'cjk-letter',
'ヲ': 'cjk-letter',
'々': 'cjk-letter',
};

test('mixed', () => {
expect('a').not.toMatch(cjk_regex());
expect('。').toMatch(cjk_regex());
expect('中').toMatch(cjk_regex());
expect('あ').toMatch(cjk_regex());
expect('ㅂ').toMatch(cjk_regex());
Object.keys(test_cases).forEach(character => {
const category = test_cases[character];
const title = `"${character}" (0x${character
.charCodeAt(0)
.toString(16)}) is ${category}`;
test(title, () => {
switch (category) {
case 'non-cjk':
expect(character).not.toMatch(cjk_regex().toRegExp());
expect(character).not.toMatch(cjk_regex.letters().toRegExp());
expect(character).not.toMatch(cjk_regex.punctuations().toRegExp());
break;
case 'cjk-letter':
expect(character).toMatch(cjk_regex().toRegExp());
expect(character).toMatch(cjk_regex.letters().toRegExp());
expect(character).not.toMatch(cjk_regex.punctuations().toRegExp());
break;
case 'cjk-punctuation':
expect(character).toMatch(cjk_regex().toRegExp());
expect(character).not.toMatch(cjk_regex.letters().toRegExp());
expect(character).toMatch(cjk_regex.punctuations().toRegExp());
break;
default:
throw new Error(`Unexpected category "${category}"`);
}
});
});
3 changes: 2 additions & 1 deletion tslint.json
Expand Up @@ -2,6 +2,7 @@
"rulesDirectory": ["tslint-plugin-prettier"],
"extends": ["tslint-config-ikatyang", "tslint-config-prettier"],
"rules": {
"prettier": true
"prettier": true,
"no-namespace": false
}
}
6 changes: 3 additions & 3 deletions yarn.lock
Expand Up @@ -2998,9 +2998,9 @@ regex-not@^1.0.0, regex-not@^1.0.2:
extend-shallow "^3.0.2"
safe-regex "^1.1.0"

regexp-util@^1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/regexp-util/-/regexp-util-1.2.0.tgz#427e8573ac8874ff539ecb4696c79725ae9bebfd"
regexp-util@^1.2.0, regexp-util@^1.2.1:
version "1.2.1"
resolved "https://registry.yarnpkg.com/regexp-util/-/regexp-util-1.2.1.tgz#fe354ac4e6e7694abac4dea16c0bc55c096ad84e"
dependencies:
tslib "^1.9.0"

Expand Down

0 comments on commit d1ca3f6

Please sign in to comment.