Skip to content

Commit

Permalink
1.2.1: Fix incorrect diacritics normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
breezewish committed Aug 30, 2017
1 parent ae37d4e commit d68067a
Show file tree
Hide file tree
Showing 11 changed files with 58 additions and 27 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Expand Up @@ -35,3 +35,6 @@ node_modules
# V8 generated
hydrogen-*
*.asm

# Cache
.cache_*
1 change: 1 addition & 0 deletions .npmignore
Expand Up @@ -5,6 +5,7 @@ benchmark/
.gitignore/
.DS_Store
.travis.yml
.cache_*

*.log
npm-debug.log*
Expand Down
33 changes: 25 additions & 8 deletions dev/parseDictionary.js
Expand Up @@ -25,6 +25,26 @@ const chars = {
};
const dictTypes = Object.keys(ranges);

function cachedRequest(url) {
const fileName = path.basename(url);
const cachePath = `.cache_${fileName}`;
if (argv.cache !== 'off') {
try {
const stat = fs.statSync(cachePath);
if (Date.now() - stat.mtime.getTime() <= 1 * 24 * 60 * 60 * 1000 /* 1 day */) {
console.log('Using cached content for %s', url);
return Promise.resolve(fs.readFileSync(cachePath, 'utf-8'));
}
} catch (ignore) {
}
}
const req = request(url);
return req.then(str => {
fs.writeFileSync(cachePath, str);
return str;
});
}

function parseSource(sourceStr) {
const ret = [];
sourceStr.split('\n').forEach(line => {
Expand Down Expand Up @@ -53,10 +73,12 @@ function addDict(codePoint, pinyinList) {
} else if (c.match(/[a-z]/)) {
return c;
} else {
console.warn('[Code Point = %d] Invalid character `%s` (\\u%s) after applying mapping for `%s`', codePoint, c, ('0000' + c.charCodeAt(0).toString(16)).slice(-4), pinyinRaw);
valid = false;
return '';
}
}).join('');

if (!valid) {
return;
}
Expand All @@ -80,21 +102,16 @@ function addDict(codePoint, pinyinList) {
Promise.resolve()
.then(() => {
console.log('Downloading common characters from %s', CHAR_RANGE_URL);
return request(CHAR_RANGE_URL);
return cachedRequest(CHAR_RANGE_URL);
})
.then(rangeStr => {
const source = parseSource(rangeStr);
ranges.common = source.map(d => d.codePoint);
ranges.common.sort();
})
.then(() => {
if (argv.dict) {
console.log('Using local dictionary %s', argv.dict);
return fs.readFileSync(argv.dict, 'utf8');
} else {
console.log('Downloading latest dictionary from %s', DICTIONARY_URL);
return request(DICTIONARY_URL);
}
console.log('Downloading latest dictionary from %s', DICTIONARY_URL);
return cachedRequest(DICTIONARY_URL);
})
.then(dictStr => {
console.log('Parsing dictionary...');
Expand Down
10 changes: 10 additions & 0 deletions dev/symbolMap.js
Expand Up @@ -7,6 +7,9 @@ module.exports = {
'é': 'e',
'ě': 'e',
'è': 'e',
'ế': 'e',
'ê': 'e',
'ề': 'e',
'ō': 'o',
'ó': 'o',
'ǒ': 'o',
Expand All @@ -24,4 +27,11 @@ module.exports = {
'ǘ': 'v',
'ǚ': 'v',
'ǜ': 'v',
'ň': 'n',
'ń': 'n',
'ǹ': 'n',
'ḿ': 'm',
'\u0300': '',
'\u0304': '',
'\u030c': '',
};
14 changes: 7 additions & 7 deletions dist/pinyinlite_common.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/pinyinlite_common.min.js

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions dist/pinyinlite_full.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/pinyinlite_full.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package.json
@@ -1,6 +1,6 @@
{
"name": "pinyinlite",
"version": "1.2.0",
"version": "1.2.1",
"description": "Lightweight and Lightning-Fast Pinyin library",
"main": "index_full.js",
"browser": "dist/pinyinlite_common.min.js",
Expand Down
2 changes: 1 addition & 1 deletion src/dict_common.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/dict_full.js

Large diffs are not rendered by default.

0 comments on commit d68067a

Please sign in to comment.