Skip to content

Commit

Permalink
js解析支持unicode fix #44
Browse files Browse the repository at this point in the history
  • Loading branch information
army8735 committed Feb 4, 2015
1 parent 334da49 commit 5ee86cb
Show file tree
Hide file tree
Showing 7 changed files with 50,718 additions and 14 deletions.
60 changes: 49 additions & 11 deletions lib/unicode.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,29 @@ var Lt = require('unicode-7.0.0/categories/Lt/code-points');
var Lm = require('unicode-7.0.0/categories/Lm/code-points');
var Lo = require('unicode-7.0.0/categories/Lo/code-points');
var Nl = require('unicode-7.0.0/categories/Nl/code-points');
var Mn = require('unicode-7.0.0/categories/Mn/code-points');
var Mc = require('unicode-7.0.0/categories/Mc/code-points');
var Nd = require('unicode-7.0.0/categories/Nd/code-points');
var Pc = require('unicode-7.0.0/categories/Pc/code-points');

var sort = require('../src/util/sort');

var 编码集 = Lu.concat(Ll).concat(Lt).concat(Lm).concat(Lo).concat(Nl);
var 编码集 = Lu.concat(Ll, Lt, Lm, Lo, Nl);
sort(编码集);
编码集 = 过滤(编码集);

编码集 = 编码集.filter(function(编码) {
if(编码 >= 0x010000 && 编码 <= 0x10FFFF) {
return false;
}
if(编码 >= 0x0 && 编码 <= 0x7F) {
return false;
}
return true;
});
function 过滤(编码集) {
编码集 = 编码集.filter(function(编码) {
if(编码 >= 0x010000 && 编码 <= 0x10FFFF) {
return false;
}
if(编码 >= 0x0 && 编码 <= 0x7F) {
return false;
}
return true;
});
return 编码集;
}

fs.writeFileSync('./编码集.txt', 编码集.join('\n'), { encoding: 'utf-8' });

Expand Down Expand Up @@ -67,4 +75,34 @@ function 补前缀(编码) {
}
}

fs.writeFileSync('./结果集.txt', 结果集.join('\n'), { encoding: 'utf-8' });
fs.writeFileSync('./结果集.txt', 结果集.join('\n'), { encoding: 'utf-8' });




var 后缀编码集 = [0x200C, 0x200D].concat(Lu, Ll, Lt, Lm, Lo, Nl, Mn, Mc, Nd, Pc);
sort(后缀编码集);
后缀编码集 = 过滤(后缀编码集);

fs.writeFileSync('./后缀编码集.txt', 后缀编码集.join('\n'), { encoding: 'utf-8' });

var 后缀结果集 = [];
var 上个索引 = 0;
for(var i = 1, len = 后缀编码集.length; i < len; i++) {
var 当前 = 后缀编码集[i];
var 前个 = 后缀编码集[i-1];
if(当前 == 前个 + 1) {
//
}
else {
if(上个索引 == i - 1) {
后缀结果集.push(转为Unicode(前个));
}
else {
后缀结果集.push(转为Unicode(后缀编码集[上个索引]) + '-' + 转为Unicode(前个));
}
上个索引 = i;
}
}

fs.writeFileSync('./后缀结果集.txt', 后缀结果集.join('\n'), { encoding: 'utf-8' });
Loading

0 comments on commit 5ee86cb

Please sign in to comment.