Skip to content

Commit

Permalink
fix(nlcst-parse-japanese): remove word_position
Browse files Browse the repository at this point in the history
  • Loading branch information
azu committed Oct 8, 2017
1 parent ea16e43 commit 398a89f
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 76 deletions.
2 changes: 0 additions & 2 deletions packages/nlcst-parse-japanese/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ This library use [kuromoji.js](https://github.com/takuyaa/kuromoji.js#api "kurom

| Property | Example | Description |
| --------------- | :---------- | ------------------------- |
| word_id | 509800 | 辞書内での単語ID |
| word_type | 'KNOWN' | 単語タイプ(辞書に登録されている単語ならKNOWN |
| word_position | 1 | 単語の開始位置 |
| surface_form | '黒文字' | 表層形 |
| pos | '名詞' | 品詞 |
| pos_detail_1 | '一般' | 品詞細分類1 |
Expand Down
5 changes: 3 additions & 2 deletions packages/nlcst-parse-japanese/src/nlcst-parse-japanese.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ const { getTokenizer } = require("kuromojin");
export interface IpadicFeatures {
word_id?: number;
word_type: "KNOWN" | "UNKNOWN";
word_position?: number;
surface_form: string;
pos: string;
pos_detail_1: string;
Expand Down Expand Up @@ -233,7 +232,9 @@ function tokenize(
// 分解された文字列単位にNLCST Treeを生成する
for (let tindex = 0; tindex < data.length; tindex++) {
const item = data[tindex];

// REMOVE word_position
// positionがword_positionの代わりとなるため
delete item.word_position;
// 行頭の場合
if (tindex === 0) {
// SentenceNodeをParagraphNodeに追加
Expand Down
45 changes: 0 additions & 45 deletions packages/nlcst-parse-japanese/test/test-patterns/basic/output.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
"data": {
"word_id": 90,
"word_type": "UNKNOWN",
"word_position": 1,
"surface_form": "1",
"pos": "名詞",
"pos_detail_1": "",
Expand All @@ -55,7 +54,6 @@
"data": {
"word_id": 90,
"word_type": "UNKNOWN",
"word_position": 1,
"surface_form": "1",
"pos": "名詞",
"pos_detail_1": "",
Expand Down Expand Up @@ -84,7 +82,6 @@
"data": {
"word_id": 10,
"word_type": "UNKNOWN",
"word_position": 2,
"surface_form": " ",
"pos": "記号",
"pos_detail_1": "空白",
Expand Down Expand Up @@ -116,7 +113,6 @@
"data": {
"word_id": 43350,
"word_type": "KNOWN",
"word_position": 3,
"surface_form": "これ",
"pos": "名詞",
"pos_detail_1": "代名詞",
Expand Down Expand Up @@ -145,7 +141,6 @@
"data": {
"word_id": 43350,
"word_type": "KNOWN",
"word_position": 3,
"surface_form": "これ",
"pos": "名詞",
"pos_detail_1": "代名詞",
Expand Down Expand Up @@ -179,7 +174,6 @@
"data": {
"word_id": 93010,
"word_type": "KNOWN",
"word_position": 5,
"surface_form": "",
"pos": "助詞",
"pos_detail_1": "係助詞",
Expand Down Expand Up @@ -208,7 +202,6 @@
"data": {
"word_id": 93010,
"word_type": "KNOWN",
"word_position": 5,
"surface_form": "",
"pos": "助詞",
"pos_detail_1": "係助詞",
Expand Down Expand Up @@ -242,7 +235,6 @@
"data": {
"word_id": 2492910,
"word_type": "KNOWN",
"word_position": 6,
"surface_form": "前段",
"pos": "名詞",
"pos_detail_1": "一般",
Expand Down Expand Up @@ -271,7 +263,6 @@
"data": {
"word_id": 2492910,
"word_type": "KNOWN",
"word_position": 6,
"surface_form": "前段",
"pos": "名詞",
"pos_detail_1": "一般",
Expand Down Expand Up @@ -305,7 +296,6 @@
"data": {
"word_id": 23760,
"word_type": "KNOWN",
"word_position": 8,
"surface_form": "です",
"pos": "助動詞",
"pos_detail_1": "*",
Expand Down Expand Up @@ -334,7 +324,6 @@
"data": {
"word_id": 23760,
"word_type": "KNOWN",
"word_position": 8,
"surface_form": "です",
"pos": "助動詞",
"pos_detail_1": "*",
Expand Down Expand Up @@ -365,7 +354,6 @@
"data": {
"word_id": 90940,
"word_type": "KNOWN",
"word_position": 10,
"surface_form": "",
"pos": "記号",
"pos_detail_1": "句点",
Expand Down Expand Up @@ -416,7 +404,6 @@
"data": {
"word_id": 43350,
"word_type": "KNOWN",
"word_position": 11,
"surface_form": "これ",
"pos": "名詞",
"pos_detail_1": "代名詞",
Expand Down Expand Up @@ -445,7 +432,6 @@
"data": {
"word_id": 43350,
"word_type": "KNOWN",
"word_position": 11,
"surface_form": "これ",
"pos": "名詞",
"pos_detail_1": "代名詞",
Expand Down Expand Up @@ -479,7 +465,6 @@
"data": {
"word_id": 93010,
"word_type": "KNOWN",
"word_position": 13,
"surface_form": "",
"pos": "助詞",
"pos_detail_1": "係助詞",
Expand Down Expand Up @@ -508,7 +493,6 @@
"data": {
"word_id": 93010,
"word_type": "KNOWN",
"word_position": 13,
"surface_form": "",
"pos": "助詞",
"pos_detail_1": "係助詞",
Expand Down Expand Up @@ -542,7 +526,6 @@
"data": {
"word_id": 1857190,
"word_type": "KNOWN",
"word_position": 14,
"surface_form": "中段",
"pos": "名詞",
"pos_detail_1": "一般",
Expand Down Expand Up @@ -571,7 +554,6 @@
"data": {
"word_id": 1857190,
"word_type": "KNOWN",
"word_position": 14,
"surface_form": "中段",
"pos": "名詞",
"pos_detail_1": "一般",
Expand Down Expand Up @@ -602,7 +584,6 @@
"data": {
"word_id": 90130,
"word_type": "KNOWN",
"word_position": 16,
"surface_form": "",
"pos": "記号",
"pos_detail_1": "括弧開",
Expand Down Expand Up @@ -636,7 +617,6 @@
"data": {
"word_id": 51530,
"word_type": "KNOWN",
"word_position": 17,
"surface_form": "",
"pos": "名詞",
"pos_detail_1": "",
Expand Down Expand Up @@ -665,7 +645,6 @@
"data": {
"word_id": 51530,
"word_type": "KNOWN",
"word_position": 17,
"surface_form": "",
"pos": "名詞",
"pos_detail_1": "",
Expand Down Expand Up @@ -699,7 +678,6 @@
"data": {
"word_id": 87330,
"word_type": "KNOWN",
"word_position": 18,
"surface_form": "",
"pos": "名詞",
"pos_detail_1": "接尾",
Expand Down Expand Up @@ -728,7 +706,6 @@
"data": {
"word_id": 87330,
"word_type": "KNOWN",
"word_position": 18,
"surface_form": "",
"pos": "名詞",
"pos_detail_1": "接尾",
Expand Down Expand Up @@ -762,7 +739,6 @@
"data": {
"word_id": 93100,
"word_type": "KNOWN",
"word_position": 19,
"surface_form": "",
"pos": "助詞",
"pos_detail_1": "連体化",
Expand Down Expand Up @@ -791,7 +767,6 @@
"data": {
"word_id": 93100,
"word_type": "KNOWN",
"word_position": 19,
"surface_form": "",
"pos": "助詞",
"pos_detail_1": "連体化",
Expand Down Expand Up @@ -825,7 +800,6 @@
"data": {
"word_id": 35540,
"word_type": "KNOWN",
"word_position": 20,
"surface_form": "場合",
"pos": "名詞",
"pos_detail_1": "副詞可能",
Expand Down Expand Up @@ -854,7 +828,6 @@
"data": {
"word_id": 35540,
"word_type": "KNOWN",
"word_position": 20,
"surface_form": "場合",
"pos": "名詞",
"pos_detail_1": "副詞可能",
Expand Down Expand Up @@ -888,7 +861,6 @@
"data": {
"word_id": 93010,
"word_type": "KNOWN",
"word_position": 22,
"surface_form": "",
"pos": "助詞",
"pos_detail_1": "係助詞",
Expand Down Expand Up @@ -917,7 +889,6 @@
"data": {
"word_id": 93010,
"word_type": "KNOWN",
"word_position": 22,
"surface_form": "",
"pos": "助詞",
"pos_detail_1": "係助詞",
Expand Down Expand Up @@ -951,7 +922,6 @@
"data": {
"word_id": 1657870,
"word_type": "KNOWN",
"word_position": 23,
"surface_form": "後段",
"pos": "名詞",
"pos_detail_1": "一般",
Expand Down Expand Up @@ -980,7 +950,6 @@
"data": {
"word_id": 1657870,
"word_type": "KNOWN",
"word_position": 23,
"surface_form": "後段",
"pos": "名詞",
"pos_detail_1": "一般",
Expand Down Expand Up @@ -1011,7 +980,6 @@
"data": {
"word_id": 90940,
"word_type": "KNOWN",
"word_position": 25,
"surface_form": "",
"pos": "記号",
"pos_detail_1": "句点",
Expand Down Expand Up @@ -1042,7 +1010,6 @@
"data": {
"word_id": 90270,
"word_type": "KNOWN",
"word_position": 26,
"surface_form": "",
"pos": "記号",
"pos_detail_1": "括弧閉",
Expand Down Expand Up @@ -1076,7 +1043,6 @@
"data": {
"word_id": 23760,
"word_type": "KNOWN",
"word_position": 27,
"surface_form": "です",
"pos": "助動詞",
"pos_detail_1": "*",
Expand Down Expand Up @@ -1105,7 +1071,6 @@
"data": {
"word_id": 23760,
"word_type": "KNOWN",
"word_position": 27,
"surface_form": "です",
"pos": "助動詞",
"pos_detail_1": "*",
Expand Down Expand Up @@ -1136,7 +1101,6 @@
"data": {
"word_id": 90940,
"word_type": "KNOWN",
"word_position": 29,
"surface_form": "",
"pos": "記号",
"pos_detail_1": "句点",
Expand Down Expand Up @@ -1187,7 +1151,6 @@
"data": {
"word_id": 43350,
"word_type": "KNOWN",
"word_position": 30,
"surface_form": "これ",
"pos": "名詞",
"pos_detail_1": "代名詞",
Expand Down Expand Up @@ -1216,7 +1179,6 @@
"data": {
"word_id": 43350,
"word_type": "KNOWN",
"word_position": 30,
"surface_form": "これ",
"pos": "名詞",
"pos_detail_1": "代名詞",
Expand Down Expand Up @@ -1250,7 +1212,6 @@
"data": {
"word_id": 93010,
"word_type": "KNOWN",
"word_position": 32,
"surface_form": "",
"pos": "助詞",
"pos_detail_1": "係助詞",
Expand Down Expand Up @@ -1279,7 +1240,6 @@
"data": {
"word_id": 93010,
"word_type": "KNOWN",
"word_position": 32,
"surface_form": "",
"pos": "助詞",
"pos_detail_1": "係助詞",
Expand Down Expand Up @@ -1313,7 +1273,6 @@
"data": {
"word_id": 1657870,
"word_type": "KNOWN",
"word_position": 33,
"surface_form": "後段",
"pos": "名詞",
"pos_detail_1": "一般",
Expand Down Expand Up @@ -1342,7 +1301,6 @@
"data": {
"word_id": 1657870,
"word_type": "KNOWN",
"word_position": 33,
"surface_form": "後段",
"pos": "名詞",
"pos_detail_1": "一般",
Expand Down Expand Up @@ -1376,7 +1334,6 @@
"data": {
"word_id": 23760,
"word_type": "KNOWN",
"word_position": 35,
"surface_form": "です",
"pos": "助動詞",
"pos_detail_1": "*",
Expand Down Expand Up @@ -1405,7 +1362,6 @@
"data": {
"word_id": 23760,
"word_type": "KNOWN",
"word_position": 35,
"surface_form": "です",
"pos": "助動詞",
"pos_detail_1": "*",
Expand Down Expand Up @@ -1436,7 +1392,6 @@
"data": {
"word_id": 90940,
"word_type": "KNOWN",
"word_position": 37,
"surface_form": "",
"pos": "記号",
"pos_detail_1": "句点",
Expand Down
Loading

0 comments on commit 398a89f

Please sign in to comment.