Merge ee4a01c into d04f702

VOICEVOX · May 26, 2024 · 1edb97b · 1edb97b
2 parents d04f702 + ee4a01c
commit 1edb97b
Show file tree

Hide file tree

Showing 7 changed files with 242 additions and 234 deletions.
diff --git a/test/user_dict/test_user_dict.py b/test/user_dict/test_user_dict.py
@@ -5,16 +5,15 @@
 import pytest
 from pyopenjtalk import g2p, unset_user_dict
 
-from voicevox_engine.model import UserDictWord, WordTypes
 from voicevox_engine.user_dict.part_of_speech_data import (
     MAX_PRIORITY,
-    part_of_speech_data,
-)
-from voicevox_engine.user_dict.user_dict import (
     UserDictInputError,
-    UserDictionary,
-    _create_word,
+    UserDictWord,
+    WordTypes,
+    create_word,
+    part_of_speech_data,
 )
+from voicevox_engine.user_dict.user_dict import UserDictionary
 
 # jsonとして保存される正しい形式の辞書データ
 valid_dict_dict_json = {
@@ -76,7 +75,7 @@ def test_read_not_exist_json(tmp_path: Path) -> None:
 
 def test_create_word() -> None:
     # 将来的に品詞などが追加された時にテストを増やす
-    assert _create_word(
+    assert create_word(
         surface="test",
         pronunciation="テスト",
         accent_type=1,
@@ -212,7 +211,7 @@ def test_priority() -> None:
     for pos in part_of_speech_data:
         for i in range(MAX_PRIORITY + 1):
             assert (
-                _create_word(
+                create_word(
                     surface="test",
                     pronunciation="テスト",
                     accent_type=1,

diff --git a/test/user_dict/test_user_dict_model.py b/test/user_dict/test_user_dict_model.py
@@ -3,8 +3,8 @@
 import pytest
 from pydantic import ValidationError
 
-from voicevox_engine.model import UserDictWord
 from voicevox_engine.tts_pipeline.kana_converter import parse_kana
+from voicevox_engine.user_dict.part_of_speech_data import UserDictWord
 
 
 class TestModel(TypedDict):

diff --git a/test/user_dict/test_word_types.py b/test/user_dict/test_word_types.py
@@ -1,5 +1,4 @@
-from voicevox_engine.model import WordTypes
-from voicevox_engine.user_dict.part_of_speech_data import part_of_speech_data
+from voicevox_engine.user_dict.part_of_speech_data import WordTypes, part_of_speech_data
 
 
 def test_word_types() -> None:

diff --git a/voicevox_engine/app/routers/user_dict.py b/voicevox_engine/app/routers/user_dict.py
@@ -6,9 +6,14 @@
 from fastapi import APIRouter, Body, Depends, HTTPException, Path, Query
 from pydantic import ValidationError
 
-from voicevox_engine.model import UserDictWord, WordTypes
-from voicevox_engine.user_dict.part_of_speech_data import MAX_PRIORITY, MIN_PRIORITY
-from voicevox_engine.user_dict.user_dict import UserDictInputError, UserDictionary
+from voicevox_engine.user_dict.part_of_speech_data import (
+    MAX_PRIORITY,
+    MIN_PRIORITY,
+    UserDictInputError,
+    UserDictWord,
+    WordTypes,
+)
+from voicevox_engine.user_dict.user_dict import UserDictionary
 
 from ..dependencies import check_disabled_mutable_api
 

diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py
@@ -1,8 +1,7 @@
 from enum import Enum
-from re import findall, fullmatch
 from typing import Any
 
-from pydantic import BaseModel, Field, StrictStr, validator
+from pydantic import BaseModel, Field, StrictStr
 
 from .metas.Metas import Speaker, SpeakerInfo
 
@@ -204,128 +203,6 @@ class InstalledLibraryInfo(BaseLibraryInfo):
     uninstallable: bool = Field(title="アンインストール可能かどうか")
 
 
-USER_DICT_MIN_PRIORITY = 0
-USER_DICT_MAX_PRIORITY = 10
-
-
-class UserDictWord(BaseModel):
-    """
-    辞書のコンパイルに使われる情報
-    """
-
-    surface: str = Field(title="表層形")
-    priority: int = Field(
-        title="優先度", ge=USER_DICT_MIN_PRIORITY, le=USER_DICT_MAX_PRIORITY
-    )
-    context_id: int = Field(title="文脈ID", default=1348)
-    part_of_speech: str = Field(title="品詞")
-    part_of_speech_detail_1: str = Field(title="品詞細分類1")
-    part_of_speech_detail_2: str = Field(title="品詞細分類2")
-    part_of_speech_detail_3: str = Field(title="品詞細分類3")
-    inflectional_type: str = Field(title="活用型")
-    inflectional_form: str = Field(title="活用形")
-    stem: str = Field(title="原形")
-    yomi: str = Field(title="読み")
-    pronunciation: str = Field(title="発音")
-    accent_type: int = Field(title="アクセント型")
-    mora_count: int | None = Field(title="モーラ数")
-    accent_associative_rule: str = Field(title="アクセント結合規則")
-
-    class Config:
-        validate_assignment = True
-
-    @validator("surface")
-    def convert_to_zenkaku(cls, surface: str) -> str:
-        return surface.translate(
-            str.maketrans(
-                "".join(chr(0x21 + i) for i in range(94)),
-                "".join(chr(0xFF01 + i) for i in range(94)),
-            )
-        )
-
-    @validator("pronunciation", pre=True)
-    def check_is_katakana(cls, pronunciation: str) -> str:
-        if not fullmatch(r"[ァ-ヴー]+", pronunciation):
-            raise ValueError("発音は有効なカタカナでなくてはいけません。")
-        sutegana = ["ァ", "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ヮ", "ッ"]
-        for i in range(len(pronunciation)):
-            if pronunciation[i] in sutegana:
-                # 「キャット」のように、捨て仮名が連続する可能性が考えられるので、
-                # 「ッ」に関しては「ッ」そのものが連続している場合と、「ッ」の後にほかの捨て仮名が連続する場合のみ無効とする
-                if i < len(pronunciation) - 1 and (
-                    pronunciation[i + 1] in sutegana[:-1]
-                    or (
-                        pronunciation[i] == sutegana[-1]
-                        and pronunciation[i + 1] == sutegana[-1]
-                    )
-                ):
-                    raise ValueError("無効な発音です。(捨て仮名の連続)")
-            if pronunciation[i] == "ヮ":
-                if i != 0 and pronunciation[i - 1] not in ["ク", "グ"]:
-                    raise ValueError(
-                        "無効な発音です。(「くゎ」「ぐゎ」以外の「ゎ」の使用)"
-                    )
-        return pronunciation
-
-    @validator("mora_count", pre=True, always=True)
-    def check_mora_count_and_accent_type(
-        cls, mora_count: int | None, values: Any
-    ) -> int | None:
-        if "pronunciation" not in values or "accent_type" not in values:
-            # 適切な場所でエラーを出すようにする
-            return mora_count
-
-        if mora_count is None:
-            rule_others = (
-                "[イ][ェ]|[ヴ][ャュョ]|[トド][ゥ]|[テデ][ィャュョ]|[デ][ェ]|[クグ][ヮ]"
-            )
-            rule_line_i = "[キシチニヒミリギジビピ][ェャュョ]"
-            rule_line_u = "[ツフヴ][ァ]|[ウスツフヴズ][ィ]|[ウツフヴ][ェォ]"
-            rule_one_mora = "[ァ-ヴー]"
-            mora_count = len(
-                findall(
-                    f"(?:{rule_others}|{rule_line_i}|{rule_line_u}|{rule_one_mora})",
-                    values["pronunciation"],
-                )
-            )
-
-        if not 0 <= values["accent_type"] <= mora_count:
-            raise ValueError(
-                "誤ったアクセント型です({})。 expect: 0 <= accent_type <= {}".format(
-                    values["accent_type"], mora_count
-                )
-            )
-        return mora_count
-
-
-class PartOfSpeechDetail(BaseModel):
-    """
-    品詞ごとの情報
-    """
-
-    part_of_speech: str = Field(title="品詞")
-    part_of_speech_detail_1: str = Field(title="品詞細分類1")
-    part_of_speech_detail_2: str = Field(title="品詞細分類2")
-    part_of_speech_detail_3: str = Field(title="品詞細分類3")
-    # context_idは辞書の左・右文脈IDのこと
-    # https://github.com/VOICEVOX/open_jtalk/blob/427cfd761b78efb6094bea3c5bb8c968f0d711ab/src/mecab-naist-jdic/_left-id.def # noqa
-    context_id: int = Field(title="文脈ID")
-    cost_candidates: list[int] = Field(title="コストのパーセンタイル")
-    accent_associative_rules: list[str] = Field(title="アクセント結合規則の一覧")
-
-
-class WordTypes(str, Enum):
-    """
-    fastapiでword_type引数を検証する時に使用するクラス
-    """
-
-    PROPER_NOUN = "PROPER_NOUN"
-    COMMON_NOUN = "COMMON_NOUN"
-    VERB = "VERB"
-    ADJECTIVE = "ADJECTIVE"
-    SUFFIX = "SUFFIX"
-
-
 class SupportedFeaturesInfo(BaseModel):
     """
     エンジンの機能の情報