Skip to content

Commit

Permalink
Merge pull request #282 from hotoo/alecgibson-group
Browse files Browse the repository at this point in the history
Alecgibson group
  • Loading branch information
hotoo committed Nov 8, 2020
2 parents 2c914c7 + 4fa0da4 commit 3884d23
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 4 deletions.
6 changes: 5 additions & 1 deletion README-us_EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,12 @@ console.log(pinyin("中心", {
})); // [ [ 'zhōng', 'zhòng' ], [ 'xīn' ] ]
console.log(pinyin("中心", {
heteronym: true, // Enable heteronym mode.
segment: true // Enable Chinese words eegmentation, fix most heteronym problem.
segment: true // Enable Chinese words segmentation, fix most heteronym problem.
})); // [ [ 'zhōng' ], [ 'xīn' ] ]
console.log(pinyin("我喜欢你", {
segment: true, // Enable segmentation. Needed for grouping.
group: true // Group pinyin segments
})); // [ [ 'wǒ' ], [ 'xǐhuān' ], [ 'nǐ' ] ]
console.log(pinyin("中心", {
style: pinyin.STYLE_INITIALS, // Setting pinyin style.
heteronym: true
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ console.log(pinyin("中心", {
heteronym: true, // 启用多音字模式
segment: true // 启用分词,以解决多音字问题。
})); // [ [ 'zhōng' ], [ 'xīn' ] ]
console.log(pinyin("我喜欢你", {
segment: true, // 启用分词
group: true // 启用词组
})); // [ [ 'wǒ' ], [ 'xǐhuān' ], [ 'nǐ' ] ]
console.log(pinyin("中心", {
style: pinyin.STYLE_INITIALS, // 设置拼音风格
heteronym: true
Expand Down
21 changes: 18 additions & 3 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

const assign = require("object-assign");
const PINYIN_DICT = require("../data/dict-zi");
const util = require("./util");
const Pinyin = require("./pinyin");
let jieba;
let PHRASES_DICT;
Expand Down Expand Up @@ -34,10 +35,14 @@ class NodePinyin extends Pinyin {
nohans = ""; // reset non-chinese words.
}

if (words.length === 1) {
pys = pys.concat(super.convert(words, options));
const newPys = words.length === 1
? super.convert(words, options)
: this.phrases_pinyin(words, options);

if (options.group) {
pys.push(groupPhrases(newPys));
} else {
pys = pys.concat(this.phrases_pinyin(words, options));
pys = pys.concat(newPys);
}

} else {
Expand Down Expand Up @@ -96,6 +101,16 @@ function segment(hans) {
return jieba.cutSmall(hans, 4);
}

function groupPhrases(phrases) {
if (phrases.length === 1) {
return phrases[0];
}

const grouped = util.combo(phrases);

return grouped;
}

const pinyin = new NodePinyin(PINYIN_DICT);

module.exports = pinyin.convert.bind(pinyin);
Expand Down
56 changes: 56 additions & 0 deletions lib/util.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"use strict";

/**
* 组合 2 个拼音数组。
* @param {Array<String>} a1 第一个数组,形如 ["zhāo", "cháo"]
* @param {Array<String>} a2 字符串型数组。形如 ["yáng"]
* @return {Array<String>} 组合后的一维数组,如上可得 ["zhāoyáng", "cháoyáng"]
*/
function combo2array(a1, a2) {
const result = [];
if (!a1.length) {
return a2;
}
if (!a2.length) {
return a1;
}
for (let i = 0, l = a1.length; i < l; i++) {
for (let j = 0, m = a2.length; j < m; j++) {
result.push(a1[i] + a2[j]);
}
}
return result;
// arr.forEach(arrItem => {
// resultItem = '';
// arrItem.forEach(item => {
// resultItem += item;
// });
// result.push(resultItem);
// });
}

/**
* 合并二维元祖。
* @param {Array<Array<String>>} arr 二维元祖 [["zhāo", "cháo"], ["yáng"], ["dōng"], ["shēng"]]
* @return {Array<String>} 返回二维字符串组合数组。形如
* [
* ["zhāoyáng"], ["dōng"], ["shēng"],
* ["cháoyáng"], ["dōng"], ["shēng"]
* ]
*/
function combo(arr) {
if (arr.length === 0) {
return [];
}
if (arr.length === 1) {
return arr[0];
}
let result = combo2array(arr[0], arr[1]);
for (let i = 2, l = arr.length; i < l; i++) {
result = combo2array(result, arr[i]);
}
return result;
}

exports.combo2array = combo2array;
exports.combo = combo;
14 changes: 14 additions & 0 deletions tests/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,17 @@ describe("pinyin.compare", function() {
expect(sortedData).to.eql("排我序要".split(""));
});
});

describe("pinyin group", function() {
it("groups segments", function () {
const han = "我喜欢你";
const py = pinyin(han, {segment: true, group: true, heteronym: true});
expect(py).to.eql([["wǒ"], ["xǐhuān"], ["nǐ"]]);
});

it("groups segments with heteronyms", function() {
const han = "我都喜欢朝阳";
const py = pinyin(han, {segment: true, group: true, heteronym: true});
expect(py).to.eql([["wǒ"], ["dū", "dōu"], ["xǐhuān"], ["zhāoyáng", "cháoyáng"]]);
});
});
70 changes: 70 additions & 0 deletions tests/util.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"use strict";

const expect = require("expect.js");
const util = require("../lib/util");

describe("test/util.test.js", function() {
describe("combo2array", function() {
it("combo2array([], [])", function() {
expect(util.combo2array([], [])).to.eql([]);
});

it("combo2array([a], [])", function() {
expect(util.combo2array(["a"], [])).to.eql(["a"]);
});

it("combo2array([], [1])", function() {
expect(util.combo2array([], ["1"])).to.eql(["1"]);
});

it("combo2array([a], [1])", function() {
expect(util.combo2array(["a"], ["1"])).to.eql(["a1"]);
});

it("combo2array([a,b], [1])", function() {
expect(util.combo2array(["a", "b"], ["1"])).to.eql(["a1", "b1"]);
});

it("combo2array([a], [1,2])", function() {
expect(util.combo2array(["a"], ["1", "2"])).to.eql(["a1", "a2"]);
});

it("combo2array([a,b], [1,2])", function() {
expect(util.combo2array(["a", "b"], ["1", "2"])).to.eql(["a1", "a2", "b1", "b2"]);
});

it("combo2array([a,b,c], [1,2,3])", function() {
expect(util.combo2array(["a", "b", "c"], ["1", "2", "3"])).to.eql(["a1", "a2", "a3", "b1", "b2", "b3", "c1", "c2", "c3"]);
});
});

describe("combo", function() {
it("combo([])", function() {
expect(util.combo([])).to.eql([]);
});

it("combo([[a]])", function() {
expect(util.combo([["a"]])).to.eql([["a"]]);
});

it("combo([[a,b]])", function() {
expect(util.combo([["a", "b"]])).to.eql([["a", "b"]]);
});

it("combo([[a,b],[1]])", function() {
expect(util.combo([["a", "b"], ["1"]])).to.eql([["a1", "b1"]]);
});

it("combo([[a,b],[1,2]])", function() {
expect(util.combo([["a", "b"], ["1", "2"]])).to.eql([["a1", "a2", "b1", "b2"]]);
});

it("combo([[a,b],[1,2],[A]])", function() {
expect(util.combo([["a", "b"], ["1", "2"], ["A"]])).to.eql([["a1A", "a2A", "b1A", "b2A"]]);
});

it("combo([[a,b],[1,2],[A,B]])", function() {
expect(util.combo([["a", "b"], ["1", "2"], ["A", "B"]])).to.eql([["a1A", "a1B", "a2A", "a2B", "b1A", "b1B", "b2A", "b2B"]]);
});
});
});

0 comments on commit 3884d23

Please sign in to comment.