Skip to content

Commit

Permalink
perf(toolbox): 词素分析跳过空句
Browse files Browse the repository at this point in the history
  • Loading branch information
mark9804 committed Jun 23, 2024
1 parent 7b6d590 commit b978a30
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ async function getSemantics() {
message: res.message,
type: "error",
});
return;
}
// @ts-ignore
useTranslationCache.setSemantic(props.text, res.tokens);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export type SemanticUnit = {
export enum AnthropicStatusCode {
NORMAL,
TOO_SHORT,
NO_CJK,
NO_API_KEY,
API_ERROR,
JSON_PARSE_ERROR,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,16 @@ async function parseSemantics(
tokens: [] as SemanticUnit[],
};

const CJKChars = new RegExp(
"[\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]"
);

if (!CJKChars.test(input)) {
result.status = AnthropicStatusCode.NO_CJK;
result.message =
"输入不包含有效日文字符";
}

if (input && input.length < 10) {
result.status = AnthropicStatusCode.TOO_SHORT;
result.message = "输入过短,请在长度超过10个字符的文本中使用。";
Expand Down

0 comments on commit b978a30

Please sign in to comment.