|
2 | 2 | "use strict"; |
3 | 3 | import {RuleHelper} from "textlint-rule-helper" |
4 | 4 | import ObjectAssign from "object-assign" |
| 5 | +import {getTokenizer} from "kuromojin"; |
| 6 | +import splitSentences from "sentence-splitter"; |
| 7 | +import Source from "structured-source"; |
5 | 8 | const defaultOptions = {max: 3}; |
6 | | -function countTen(text) { |
7 | | - return text.split("、").length - 1; |
8 | | -} |
9 | 9 | /** |
10 | 10 | * @param {RuleContext} context |
11 | 11 | * @param {object} options |
12 | 12 | */ |
13 | 13 | export default function (context, options = {}) { |
14 | 14 | options = ObjectAssign({}, defaultOptions, options); |
15 | 15 | const maxLen = options.max; |
16 | | - const punctuation = /[。]/; |
17 | 16 | let helper = new RuleHelper(context); |
18 | 17 | let {Syntax, RuleError, report, getSource} = context; |
19 | | - let currentParagraphTexts = []; |
20 | 18 | return { |
21 | | - [Syntax.Paragraph](){ |
22 | | - currentParagraphTexts = [] |
23 | | - }, |
24 | | - [Syntax.Str](node){ |
25 | | - // ignore text from external factor |
26 | | - if (helper.isChildNode(node, [Syntax.Link, Syntax.Image, Syntax.BlockQuote])) { |
| 19 | + [Syntax.Paragraph](node){ |
| 20 | + if (helper.isChildNode(node, [Syntax.BlockQuote])) { |
27 | 21 | return; |
28 | 22 | } |
29 | | - currentParagraphTexts.push(node); |
30 | | - }, |
31 | | - [Syntax.Paragraph + ":exit"](){ |
32 | | - let currentTenCount = 0; |
| 23 | + let sentences = splitSentences(getSource(node), { |
| 24 | + charRegExp: /[。\?\!?!]/, |
| 25 | + newLineCharacters: "\n\n" |
| 26 | + }); |
33 | 27 | /* |
34 | 28 | <p> |
35 | 29 | <str><code><img><str> |
36 | 30 | <str> |
37 | 31 | </p> |
38 | 32 | */ |
39 | | - currentParagraphTexts.forEach(strNode => { |
40 | | - let paddingLine = 0; |
41 | | - let paddingColumn = 0; |
42 | | - let text = getSource(strNode); |
43 | | - let characters = text.split(""); |
44 | | - characters.forEach(char => { |
45 | | - if (char === "、") { |
46 | | - currentTenCount++; |
47 | | - } |
48 | | - if (char === "。") { |
49 | | - // reset |
50 | | - currentTenCount = 0; |
51 | | - } |
52 | | - // report |
53 | | - if (currentTenCount >= maxLen) { |
54 | | - var ruleError = new context.RuleError(`一つの文で"、"を${maxLen}つ以上使用しています`, { |
55 | | - line: paddingLine, |
56 | | - column: paddingColumn |
57 | | - }); |
58 | | - report(strNode, ruleError); |
59 | | - currentTenCount = 0; |
60 | | - } |
61 | | - // calc padding{line,column} |
62 | | - if (char === "\n") { |
63 | | - paddingLine++; |
64 | | - paddingColumn = 0; |
65 | | - } else { |
66 | | - paddingColumn++; |
67 | | - } |
| 33 | + return getTokenizer().then(tokenizer => { |
| 34 | + sentences.forEach(sentence => { |
| 35 | + let text = sentence.value; |
| 36 | + let source = new Source(text); |
| 37 | + let currentTenCount = 0; |
| 38 | + let tokens = tokenizer.tokenizeForSentence(text); |
| 39 | + let lastToken = null; |
| 40 | + tokens.forEach(token => { |
| 41 | + let surface = token.surface_form; |
| 42 | + if (surface === "、") { |
| 43 | + currentTenCount++; |
| 44 | + lastToken = token; |
| 45 | + } |
| 46 | + if (surface === "。") { |
| 47 | + // reset |
| 48 | + currentTenCount = 0; |
| 49 | + } |
| 50 | + // report |
| 51 | + if (currentTenCount >= maxLen) { |
| 52 | + let position = source.indexToPosition(lastToken.word_position - 1); |
| 53 | + let ruleError = new context.RuleError(`一つの文で"、"を${maxLen}つ以上使用しています`, { |
| 54 | + line: position.line-1, |
| 55 | + column:position.column |
| 56 | + }); |
| 57 | + report(node, ruleError); |
| 58 | + currentTenCount = 0; |
| 59 | + } |
| 60 | + }); |
68 | 61 | }); |
69 | 62 | }); |
70 | 63 | } |
|
0 commit comments