Skip to content

Commit

Permalink
Merge 437ef04 into 87b48fc
Browse files Browse the repository at this point in the history
  • Loading branch information
hotoo committed Aug 26, 2015
2 parents 87b48fc + 437ef04 commit 9bc04aa
Show file tree
Hide file tree
Showing 22 changed files with 371 additions and 21,204 deletions.
2 changes: 0 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ os:
language: node_js

node_js:
- "0.12"
- "iojs-1"
- "iojs-2"
- "iojs-3"

Expand Down
5 changes: 2 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ test-npm:
./node_modules/.bin/_mocha \
-- \
--harmony \
--require should \
--reporter spec \
--timeout 2000 \
--inline-diffs \
Expand All @@ -52,7 +51,7 @@ test-spm:
@spm test

lint:
@./node_modules/eslint/bin/eslint.js ./src/pinyin.js ./tests/
@./node_modules/eslint/bin/eslint.js ./lib/ ./bin/ ./tests/

test: lint test-spm test-npm test-cli benchmark

Expand Down Expand Up @@ -86,4 +85,4 @@ dict-node:
infrequent:
@node ./tools/infrequent.js > ./tools/zi/infrequent.js

.PHONY: build-doc publish-doc server clean test coverage test-spm test-npm test-cli lint
.PHONY: build-doc publish-doc server clean test coverage test-spm test-npm test-cli lint benchmark
9 changes: 9 additions & 0 deletions benchmark/long.js

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions benchmark/short.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
var memory = process.memoryUsage().rss;

var text = "你好拼音";

var pinyin = require("../web-pinyin");
console.time("pinyin");
pinyin(text);
console.timeEnd("pinyin");
console.log(process.memoryUsage().rss - memory);
2 changes: 1 addition & 1 deletion bin/pinyin
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ if (commander.args.length === 0) {
commander.help();
}

var pinyin = require("../src/pinyin");
var pinyin = require("../");
var options = {
style: pinyin["STYLE_" + (commander.style || "TONE").toUpperCase()],
heteronym: commander.heteronym || false,
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
7 changes: 4 additions & 3 deletions examples/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
textarea{width:90%; height:100px;}
</style>

### 输入[汉字](?han=简体中文汉字)
### 输入[汉字](?han= 简体中文汉字)

<div>
<textarea id="input"></textarea>
Expand All @@ -33,11 +33,12 @@ textarea{width:90%; height:100px;}

<script type="text/spm">
var pinyin = require('pinyin');
var Url = require('url');
//var Url = require('url');

var $ = function(id){return document.getElementById(id);}
var styles = document.getElementsByName("style");
var han = new Url(location.href).getParam("han");
//var han = new Url(location.href).getParam("han");
var han = "中文"

function build(){
var han = $("input").value;
Expand Down
2 changes: 1 addition & 1 deletion index.js
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@

module.exports = require("./src/pinyin");
module.exports = require("./lib/");
106 changes: 106 additions & 0 deletions lib/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"use strict";

const assign = require("object-assign");
const PINYIN_DICT = require("../data/dict-zi");
const Pinyin = require("./pinyin");
let jieba;
let PHRASES_DICT;

class NodePinyin extends Pinyin {

// @param {String} hans 要转为拼音的目标字符串(汉字)。
// @param {Object} options, 可选,用于指定拼音风格,是否启用多音字。
// @return {Array} 返回的拼音列表。
convert (hans, options) {
if(typeof hans !== "string"){
return [];
}

options = assign({}, Pinyin.DEFAULT_OPTIONS, options);
let phrases = options && options.segment ? segment(hans) : hans;
let pys = [];
let nohans = "";

for (let i = 0, firstCharCode, words, l = phrases.length; i < l; i++) {

words = phrases[i];
firstCharCode = words.charCodeAt(0);

if(PINYIN_DICT[firstCharCode]){

// ends of non-chinese words.
if(nohans.length > 0){
pys.push([nohans]);
nohans = ""; // reset non-chinese words.
}

if (words.length === 1) {
pys = pys.concat(super.convert(words, options));
} else {
pys = pys.concat(this.phrases_pinyin(words, options));
}

} else {
nohans += words;
}
}

// 清理最后的非中文字符串。
if(nohans.length > 0){
pys.push([nohans]);
nohans = ""; // reset non-chinese words.
}

return pys;
}

// 词语注音
// @param {String} phrases, 指定的词组。
// @param {Object} options, 选项。
// @return {Array}
phrases_pinyin(phrases, options) {
let py = [];
if (PHRASES_DICT.hasOwnProperty(phrases)){
//! copy pinyin result.
PHRASES_DICT[phrases].forEach(function(item, idx){
py[idx] = [];
if (options.heteronym){
item.forEach(function(py_item, py_index){
py[idx][py_index] = Pinyin.toFixed(py_item, options.style);
});
} else {
py[idx][0] = Pinyin.toFixed(item[0], options.style);
}
});
} else {
for(let i = 0, l = phrases.length; i < l; i++){
py = py.concat(super.convert(phrases[i], options));
}
}
return py;
}
}

function segment(hans) {
try {
jieba = jieba || require("nodejieba");
} catch (ex) {
console.error();
console.error(" Segment need nodejieba, please run '$ npm install nodejieba'.");
console.error(" 分词需要使用 nodejieba 模块,请运行 '$ npm install nodejieba' 并确保安装完成。");
console.error();
throw ex;
}
// 词语拼音库。
PHRASES_DICT = PHRASES_DICT || require("../data/phrases-dict");
return jieba.cut(hans);
}

const pinyin = new NodePinyin(PINYIN_DICT);

module.exports = pinyin.convert.bind(pinyin);
module.exports.STYLE_NORMAL = Pinyin.STYLE_NORMAL;
module.exports.STYLE_TONE = Pinyin.STYLE_TONE;
module.exports.STYLE_TONE2 = Pinyin.STYLE_TONE2;
module.exports.STYLE_INITIALS = Pinyin.STYLE_INITIALS;
module.exports.STYLE_FIRST_LETTER = Pinyin.STYLE_FIRST_LETTER;
File renamed without changes.
181 changes: 181 additions & 0 deletions lib/pinyin.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
"use strict";

const assign = require("object-assign");
// XXX: Symbol when spm test support.
const PINYIN_STYLE = {
NORMAL: 0, // 普通风格,不带音标。
TONE: 1, // 标准风格,音标在韵母的第一个字母上。
TONE2: 2, // 声调中拼音之后,使用数字 1~4 标识。
INITIALS: 3, // 仅需要声母部分。
FIRST_LETTER: 4, // 仅保留首字母。
};
const DEFAULT_OPTIONS = {
style: PINYIN_STYLE.TONE, // 风格
segment: false, // 分词。
heteronym: false, // 多音字
};

// 声母表。
const INITIALS = "b,p,m,f,d,t,n,l,g,k,h,j,q,x,r,zh,ch,sh,z,c,s".split(",");
// 韵母表。
//const FINALS = "ang,eng,ing,ong,an,en,in,un,er,ai,ei,ui,ao,ou,iu,ie,ve,a,o,e,i,u,v".split(",");
// 带音标字符。
const PHONETIC_SYMBOL = require("./phonetic-symbol");
const RE_PHONETIC_SYMBOL = new RegExp("([" + Object.keys(PHONETIC_SYMBOL).join("") + "])", "g");
const RE_TONE2 = /([aeoiuvnm])([0-4])$/;

/*
* 格式化拼音为声母(Initials)形式。
* @param {String}
* @return {String}
*/
function initials(pinyin) {
for (let i = 0, l = INITIALS.length; i < l; i++){
if (pinyin.indexOf(INITIALS[i]) === 0) {
return INITIALS[i];
}
}
return "";
}

class Pinyin {
constructor (dict) {
this._dict = dict;
}

// @param {String} hans 要转为拼音的目标字符串(汉字)。
// @param {Object} options, 可选,用于指定拼音风格,是否启用多音字。
// @return {Array} 返回的拼音列表。
convert (hans, options) {

if (typeof hans !== "string") {
return [];
}

options = assign({}, DEFAULT_OPTIONS, options);

let pys = [];
let nohans = "";

for(let i = 0, firstCharCode, words, l = hans.length; i < l; i++){

words = hans[i];
firstCharCode = words.charCodeAt(0);

if(this._dict[firstCharCode]){

// ends of non-chinese words.
if(nohans.length > 0){
pys.push([nohans]);
nohans = ""; // reset non-chinese words.
}

pys.push(this.single_pinyin(words, options));

}else{
nohans += words;
}
}

// 清理最后的非中文字符串。
if(nohans.length > 0){
pys.push([nohans]);
nohans = ""; // reset non-chinese words.
}
return pys;
}

// 单字拼音转换。
// @param {String} han, 单个汉字
// @return {Array} 返回拼音列表,多音字会有多个拼音项。
single_pinyin (han, options) {

if (typeof han !== "string") {
return [];
}
if (han.length !== 1) {
return this.single_pinyin(han.charAt(0), options);
}

let hanCode = han.charCodeAt(0);

if (!this._dict[hanCode]) {
return [han];
}

let pys = this._dict[hanCode].split(",");
if(!options.heteronym){
return [Pinyin.toFixed(pys[0], options.style)];
}

// 临时存储已存在的拼音,避免多音字拼音转换为非注音风格出现重复。
let py_cached = {};
let pinyins = [];
for(let i = 0, py, l = pys.length; i < l; i++){
py = Pinyin.toFixed(pys[i], options.style);
if(py_cached.hasOwnProperty(py)){
continue;
}
py_cached[py] = py;

pinyins.push(py);
}
return pinyins;
}

static toFixed (pinyin, style) {
let tone = ""; // 声调。
let first_letter;
let py;
switch(style){
case PINYIN_STYLE.INITIALS:
return initials(pinyin);

case PINYIN_STYLE.FIRST_LETTER:
first_letter = pinyin.charAt(0);
if (PHONETIC_SYMBOL.hasOwnProperty(first_letter)) {
first_letter = PHONETIC_SYMBOL[first_letter].charAt(0);
}
return first_letter;

case PINYIN_STYLE.NORMAL:
return pinyin.replace(RE_PHONETIC_SYMBOL, function($0, $1_phonetic){
return PHONETIC_SYMBOL[$1_phonetic].replace(RE_TONE2, "$1");
});

case PINYIN_STYLE.TONE2:
py = pinyin.replace(RE_PHONETIC_SYMBOL, function($0, $1){
// 声调数值。
tone = PHONETIC_SYMBOL[$1].replace(RE_TONE2, "$2");

return PHONETIC_SYMBOL[$1].replace(RE_TONE2, "$1");
});
return py + tone;

case PINYIN_STYLE.TONE:
default:
return pinyin;
}
}

static get STYLE_NORMAL () {
return PINYIN_STYLE.NORMAL;
}
static get STYLE_TONE () {
return PINYIN_STYLE.TONE;
}
static get STYLE_TONE2 () {
return PINYIN_STYLE.TONE2;
}
static get STYLE_INITIALS () {
return PINYIN_STYLE.INITIALS;
}
static get STYLE_FIRST_LETTER () {
return PINYIN_STYLE.FIRST_LETTER;
}
static get DEFAULT_OPTIONS () {
return DEFAULT_OPTIONS;
}
}

module.exports = Pinyin;
Loading

0 comments on commit 9bc04aa

Please sign in to comment.