Skip to content

Commit

Permalink
fix: 词组分组算法
Browse files Browse the repository at this point in the history
  • Loading branch information
hotoo committed Nov 8, 2020
1 parent 92c63f2 commit 3b96e88
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 6 deletions.
7 changes: 3 additions & 4 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

const assign = require("object-assign");
const PINYIN_DICT = require("../data/dict-zi");
const util = require("./util");
const Pinyin = require("./pinyin");
let jieba;
let PHRASES_DICT;
Expand Down Expand Up @@ -105,11 +106,9 @@ function groupPhrases(phrases) {
return phrases[0];
}

const grouped = phrases.reduce(function(phrase, pys) {
return phrase + pys[0];
}, "");
const grouped = util.combo(phrases);

return [grouped];
return grouped;
}

const pinyin = new NodePinyin(PINYIN_DICT);
Expand Down
56 changes: 56 additions & 0 deletions lib/util.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@


/**
* 组合 2 个拼音数组。
* @param {Array<String>} a1 第一个数组,形如 ["zhāo", "cháo"]
* @param {Array<String>} a2 字符串型数组。形如 ["yáng"]
* @return {Array<String>} 组合后的一维数组,如上可得 ["zhāoyáng", "cháoyáng"]
*/
function combo2array(a1, a2) {
const result = [];
if (!a1.length) {
return a2;
}
if (!a2.length) {
return a1;
}
for (let i = 0, l = a1.length; i < l; i++) {
for (let j = 0, m = a2.length; j < m; j++) {
result.push(a1[i] + a2[j]);
}
}
return result;
// arr.forEach(arrItem => {
// resultItem = '';
// arrItem.forEach(item => {
// resultItem += item;
// });
// result.push(resultItem);
// });
}

/**
* 合并二维元祖。
* @param {Array<Array<String>>} arr 二维元祖 [["zhāo", "cháo"], ["yáng"], ["dōng"], ["shēng"]]
* @return {Array<String>} 返回二维字符串组合数组。形如
* [
* ["zhāoyáng"], ["dōng"], ["shēng"],
* ["cháoyáng"], ["dōng"], ["shēng"]
* ]
*/
function combo(arr) {
if (arr.length === 0) {
return [];
}
if (arr.length === 1) {
return arr[0];
}
let result = combo2array(arr[0], arr[1]);
for (let i = 2, l = arr.length; i < l; i++) {
result = combo2array(result, arr[i]);
}
return result;
}

exports.combo2array = combo2array;
exports.combo = combo;
4 changes: 2 additions & 2 deletions tests/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ describe("pinyin group", function() {
});

it("groups segments with heteronyms", function() {
const han = "我都喜欢";
const han = "我都喜欢朝阳";
const py = pinyin(han, {segment: true, group: true, heteronym: true});
expect(py).to.eql([["wǒ"], ["dū", "dōu"], ["xǐhuān"]]);
expect(py).to.eql([["wǒ"], ["dū", "dōu"], ["xǐhuān"], ["zhāoyáng", "cháoyáng"]]);
});
});
70 changes: 70 additions & 0 deletions tests/util.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"use strict";

const expect = require("expect.js");
const util = require("../lib/util");

describe("test/util.test.js", function() {
describe("combo2array", function() {
it("combo2array([], [])", function() {
expect(util.combo2array([], [])).to.eql([]);
});

it("combo2array([a], [])", function() {
expect(util.combo2array(["a"], [])).to.eql(["a"]);
});

it("combo2array([], [1])", function() {
expect(util.combo2array([], ["1"])).to.eql(["1"]);
});

it("combo2array([a], [1])", function() {
expect(util.combo2array(["a"], ["1"])).to.eql(["a1"]);
});

it("combo2array([a,b], [1])", function() {
expect(util.combo2array(["a", "b"], ["1"])).to.eql(["a1", "b1"]);
});

it("combo2array([a], [1,2])", function() {
expect(util.combo2array(["a"], ["1", "2"])).to.eql(["a1", "a2"]);
});

it("combo2array([a,b], [1,2])", function() {
expect(util.combo2array(["a", "b"], ["1", "2"])).to.eql(["a1", "a2", "b1", "b2"]);
});

it("combo2array([a,b,c], [1,2,3])", function() {
expect(util.combo2array(["a", "b", "c"], ["1", "2", "3"])).to.eql(["a1", "a2", "a3", "b1", "b2", "b3", "c1", "c2", "c3"]);
});
});

describe("combo", function() {
it("combo([])", function() {
expect(util.combo([])).to.eql([]);
});

it("combo([[a]])", function() {
expect(util.combo([["a"]])).to.eql([["a"]]);
});

it("combo([[a,b]])", function() {
expect(util.combo([["a", "b"]])).to.eql([["a", "b"]]);
});

it("combo([[a,b],[1]])", function() {
expect(util.combo([["a", "b"], ["1"]])).to.eql([["a1", "b1"]]);
});

it("combo([[a,b],[1,2]])", function() {
expect(util.combo([["a", "b"], ["1", "2"]])).to.eql([["a1", "a2", "b1", "b2"]]);
});

it("combo([[a,b],[1,2],[A]])", function() {
expect(util.combo([["a", "b"], ["1", "2"], ["A"]])).to.eql([["a1A", "a2A", "b1A", "b2A"]]);
});

it("combo([[a,b],[1,2],[A,B]])", function() {
expect(util.combo([["a", "b"], ["1", "2"], ["A", "B"]])).to.eql([["a1A", "a1B", "a2A", "a2B", "b1A", "b1B", "b2A", "b2B"]]);
});
});
});

0 comments on commit 3b96e88

Please sign in to comment.