feat: rewrite to meet w3c typography (#68)

BREAKING CHANGE: rename `characters` with `letters` BREAKING CHANGE: functions are now returning a Charset (https://github.com/ikatyang/regexp-util#charset)
ikatyang · Mar 22, 2018 · d1ca3f6 · d1ca3f6
1 parent 04cb2a3
commit d1ca3f6
Show file tree

Hide file tree

Showing 6 changed files with 90 additions and 73 deletions.
diff --git a/README.md b/README.md
@@ -21,21 +21,26 @@ yarn add cjk-regex
 ## Usage
 
 ```js
-const get_cjk_regex = require("cjk-regex");
-
-get_cjk_regex().test("a"); //=> false
-get_cjk_regex().test("。"); //=> true
-get_cjk_regex().test("中"); //=> true
-
-get_cjk_regex.characters().test("a"); //=> false
-get_cjk_regex.characters().test("。"); //=> false
-get_cjk_regex.characters().test("中"); //=> true
-
-get_cjk_regex.punctuations().test("a"); //=> false
-get_cjk_regex.punctuations().test("。"); //=> true
-get_cjk_regex.punctuations().test("中"); //=> false
+const cjk = require("cjk-regex");
+
+const cjk_charset = cjk();
+cjk_charset.toRegExp().test("a"); //=> false
+cjk_charset.toRegExp().test("。"); //=> true
+cjk_charset.toRegExp().test("中"); //=> true
+
+const cjk_letter_charset = cjk.letters();
+cjk_letter_charset.toRegExp().test("a"); //=> false
+cjk_letter_charset.toRegExp().test("。"); //=> false
+cjk_letter_charset.toRegExp().test("中"); //=> true
+
+const cjk_punctuaion_charset = cjk.punctuations();
+cjk_punctuaion_charset.toRegExp().test("a"); //=> false
+cjk_punctuaion_charset.toRegExp().test("。"); //=> true
+cjk_punctuaion_charset.toRegExp().test("中"); //=> false
 ```
 
+Returns a [Charset](https://github.com/ikatyang/regexp-util#charset).
+
 ## Development
 
 ```sh

diff --git a/package.json b/package.json
@@ -25,6 +25,7 @@
     "release": "standard-version"
   },
   "dependencies": {
+    "regexp-util": "^1.2.1",
     "unicode-regex": "^2.0.0"
   },
   "devDependencies": {

diff --git a/src/index.ts b/src/index.ts
@@ -1,50 +1,37 @@
+import { charset, Charset } from 'regexp-util';
 import unicode = require('unicode-regex');
 
-const punctuation_charset = unicode({
+const cjk_letters = unicode({
+  Script: ['Han', 'Katakana', 'Hiragana', 'Hangul', 'Bopomofo'],
+  General_Category: [
+    'Other_Letter',
+    'Letter_Number',
+    'Other_Symbol',
+    'Modifier_Letter',
+  ],
+});
+
+const cjk_punctuations = unicode({
   Block: [
     'CJK_Symbols_And_Punctuation',
-    'Hangul_Syllables',
     'Vertical_Forms',
     'CJK_Compatibility_Forms',
     'Small_Form_Variants',
     'Halfwidth_And_Fullwidth_Forms',
   ],
-});
+}).subtract(cjk_letters);
 
-const character_charset = unicode({
-  Block: [
-    'Hangul_Jamo',
-    'CJK_Radicals_Supplement',
-    'Kangxi_Radicals',
-    'Hiragana',
-    'Katakana',
-    'Bopomofo',
-    'Hangul_Compatibility_Jamo',
-    'Enclosed_CJK_Letters_And_Months',
-    'CJK_Compatibility',
-    'CJK_Unified_Ideographs_Extension_A',
-    'CJK_Unified_Ideographs',
-    'Hangul_Jamo_Extended_A',
-    'CJK_Compatibility_Ideographs',
-  ],
-});
-
-const mixed_charset = character_charset.union(punctuation_charset);
+const cjk_all = charset(cjk_letters, cjk_punctuations);
 
-function get_regex() {
-  return create_regex(mixed_charset);
+function cjk_regex() {
+  return charset(cjk_all);
 }
 
-declare namespace get_regex {
-  function characters(): RegExp;
-  function punctuations(): RegExp;
-}
-
-get_regex.characters = () => create_regex(character_charset);
-get_regex.punctuations = () => create_regex(punctuation_charset);
-
-function create_regex(charset: typeof mixed_charset) {
-  return charset.toRegExp('g');
+declare namespace cjk_regex {
+  function letters(): Charset;
+  function punctuations(): Charset;
 }
+cjk_regex.letters = () => charset(cjk_letters);
+cjk_regex.punctuations = () => charset(cjk_punctuations);
 
-export = get_regex;
+export = cjk_regex;
diff --git a/tests/test.ts b/tests/test.ts
@@ -1,25 +1,48 @@
 import cjk_regex = require('../src/index');
 
-test('characters', () => {
-  expect('a').not.toMatch(cjk_regex.characters());
-  expect('。').not.toMatch(cjk_regex.characters());
-  expect('中').toMatch(cjk_regex.characters());
-  expect('あ').toMatch(cjk_regex.characters());
-  expect('ㅂ').toMatch(cjk_regex.characters());
-});
-
-test('punctuations', () => {
-  expect('a').not.toMatch(cjk_regex.punctuations());
-  expect('。').toMatch(cjk_regex.punctuations());
-  expect('中').not.toMatch(cjk_regex.punctuations());
-  expect('あ').not.toMatch(cjk_regex.punctuations());
-  expect('ㅂ').not.toMatch(cjk_regex.punctuations());
-});
+const test_cases: {
+  [char: string]: 'non-cjk' | 'cjk-letter' | 'cjk-punctuation';
+} = /* prettier-ignore */ {
+  '.': 'non-cjk',
+  'a': 'non-cjk',
+  '。': 'cjk-punctuation',
+  '中': 'cjk-letter',
+  'ㄅ': 'cjk-letter',
+  '𬉼': 'cjk-letter',
+  'あ': 'cjk-letter',
+  'ㅂ': 'cjk-letter',
+  '가': 'cjk-letter',
+  'ퟔ': 'cjk-letter',
+  '〤': 'cjk-letter',
+  '𛀂': 'cjk-letter',
+  'ｦ': 'cjk-letter',
+  '々': 'cjk-letter',
+};
 
-test('mixed', () => {
-  expect('a').not.toMatch(cjk_regex());
-  expect('。').toMatch(cjk_regex());
-  expect('中').toMatch(cjk_regex());
-  expect('あ').toMatch(cjk_regex());
-  expect('ㅂ').toMatch(cjk_regex());
+Object.keys(test_cases).forEach(character => {
+  const category = test_cases[character];
+  const title = `"${character}" (0x${character
+    .charCodeAt(0)
+    .toString(16)}) is ${category}`;
+  test(title, () => {
+    switch (category) {
+      case 'non-cjk':
+        expect(character).not.toMatch(cjk_regex().toRegExp());
+        expect(character).not.toMatch(cjk_regex.letters().toRegExp());
+        expect(character).not.toMatch(cjk_regex.punctuations().toRegExp());
+        break;
+      case 'cjk-letter':
+        expect(character).toMatch(cjk_regex().toRegExp());
+        expect(character).toMatch(cjk_regex.letters().toRegExp());
+        expect(character).not.toMatch(cjk_regex.punctuations().toRegExp());
+        break;
+      case 'cjk-punctuation':
+        expect(character).toMatch(cjk_regex().toRegExp());
+        expect(character).not.toMatch(cjk_regex.letters().toRegExp());
+        expect(character).toMatch(cjk_regex.punctuations().toRegExp());
+        break;
+      default:
+        throw new Error(`Unexpected category "${category}"`);
+    }
+  });
 });
diff --git a/tslint.json b/tslint.json
@@ -2,6 +2,7 @@
   "rulesDirectory": ["tslint-plugin-prettier"],
   "extends": ["tslint-config-ikatyang", "tslint-config-prettier"],
   "rules": {
-    "prettier": true
+    "prettier": true,
+    "no-namespace": false
   }
 }
diff --git a/yarn.lock b/yarn.lock
@@ -2998,9 +2998,9 @@ regex-not@^1.0.0, regex-not@^1.0.2:
     extend-shallow "^3.0.2"
     safe-regex "^1.1.0"
 
-regexp-util@^1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/regexp-util/-/regexp-util-1.2.0.tgz#427e8573ac8874ff539ecb4696c79725ae9bebfd"
+regexp-util@^1.2.0, regexp-util@^1.2.1:
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/regexp-util/-/regexp-util-1.2.1.tgz#fe354ac4e6e7694abac4dea16c0bc55c096ad84e"
   dependencies:
     tslib "^1.9.0"