Skip to content

Commit

Permalink
Merge ee4a01c into d04f702
Browse files Browse the repository at this point in the history
  • Loading branch information
tarepan authored May 26, 2024
2 parents d04f702 + ee4a01c commit 1edb97b
Show file tree
Hide file tree
Showing 7 changed files with 242 additions and 234 deletions.
15 changes: 7 additions & 8 deletions test/user_dict/test_user_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,15 @@
import pytest
from pyopenjtalk import g2p, unset_user_dict

from voicevox_engine.model import UserDictWord, WordTypes
from voicevox_engine.user_dict.part_of_speech_data import (
MAX_PRIORITY,
part_of_speech_data,
)
from voicevox_engine.user_dict.user_dict import (
UserDictInputError,
UserDictionary,
_create_word,
UserDictWord,
WordTypes,
create_word,
part_of_speech_data,
)
from voicevox_engine.user_dict.user_dict import UserDictionary

# jsonとして保存される正しい形式の辞書データ
valid_dict_dict_json = {
Expand Down Expand Up @@ -76,7 +75,7 @@ def test_read_not_exist_json(tmp_path: Path) -> None:

def test_create_word() -> None:
# 将来的に品詞などが追加された時にテストを増やす
assert _create_word(
assert create_word(
surface="test",
pronunciation="テスト",
accent_type=1,
Expand Down Expand Up @@ -212,7 +211,7 @@ def test_priority() -> None:
for pos in part_of_speech_data:
for i in range(MAX_PRIORITY + 1):
assert (
_create_word(
create_word(
surface="test",
pronunciation="テスト",
accent_type=1,
Expand Down
2 changes: 1 addition & 1 deletion test/user_dict/test_user_dict_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import pytest
from pydantic import ValidationError

from voicevox_engine.model import UserDictWord
from voicevox_engine.tts_pipeline.kana_converter import parse_kana
from voicevox_engine.user_dict.part_of_speech_data import UserDictWord


class TestModel(TypedDict):
Expand Down
3 changes: 1 addition & 2 deletions test/user_dict/test_word_types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from voicevox_engine.model import WordTypes
from voicevox_engine.user_dict.part_of_speech_data import part_of_speech_data
from voicevox_engine.user_dict.part_of_speech_data import WordTypes, part_of_speech_data


def test_word_types() -> None:
Expand Down
11 changes: 8 additions & 3 deletions voicevox_engine/app/routers/user_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,14 @@
from fastapi import APIRouter, Body, Depends, HTTPException, Path, Query
from pydantic import ValidationError

from voicevox_engine.model import UserDictWord, WordTypes
from voicevox_engine.user_dict.part_of_speech_data import MAX_PRIORITY, MIN_PRIORITY
from voicevox_engine.user_dict.user_dict import UserDictInputError, UserDictionary
from voicevox_engine.user_dict.part_of_speech_data import (
MAX_PRIORITY,
MIN_PRIORITY,
UserDictInputError,
UserDictWord,
WordTypes,
)
from voicevox_engine.user_dict.user_dict import UserDictionary

from ..dependencies import check_disabled_mutable_api

Expand Down
125 changes: 1 addition & 124 deletions voicevox_engine/model.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from enum import Enum
from re import findall, fullmatch
from typing import Any

from pydantic import BaseModel, Field, StrictStr, validator
from pydantic import BaseModel, Field, StrictStr

from .metas.Metas import Speaker, SpeakerInfo

Expand Down Expand Up @@ -204,128 +203,6 @@ class InstalledLibraryInfo(BaseLibraryInfo):
uninstallable: bool = Field(title="アンインストール可能かどうか")


USER_DICT_MIN_PRIORITY = 0
USER_DICT_MAX_PRIORITY = 10


class UserDictWord(BaseModel):
"""
辞書のコンパイルに使われる情報
"""

surface: str = Field(title="表層形")
priority: int = Field(
title="優先度", ge=USER_DICT_MIN_PRIORITY, le=USER_DICT_MAX_PRIORITY
)
context_id: int = Field(title="文脈ID", default=1348)
part_of_speech: str = Field(title="品詞")
part_of_speech_detail_1: str = Field(title="品詞細分類1")
part_of_speech_detail_2: str = Field(title="品詞細分類2")
part_of_speech_detail_3: str = Field(title="品詞細分類3")
inflectional_type: str = Field(title="活用型")
inflectional_form: str = Field(title="活用形")
stem: str = Field(title="原形")
yomi: str = Field(title="読み")
pronunciation: str = Field(title="発音")
accent_type: int = Field(title="アクセント型")
mora_count: int | None = Field(title="モーラ数")
accent_associative_rule: str = Field(title="アクセント結合規則")

class Config:
validate_assignment = True

@validator("surface")
def convert_to_zenkaku(cls, surface: str) -> str:
return surface.translate(
str.maketrans(
"".join(chr(0x21 + i) for i in range(94)),
"".join(chr(0xFF01 + i) for i in range(94)),
)
)

@validator("pronunciation", pre=True)
def check_is_katakana(cls, pronunciation: str) -> str:
if not fullmatch(r"[ァ-ヴー]+", pronunciation):
raise ValueError("発音は有効なカタカナでなくてはいけません。")
sutegana = ["ァ", "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ヮ", "ッ"]
for i in range(len(pronunciation)):
if pronunciation[i] in sutegana:
# 「キャット」のように、捨て仮名が連続する可能性が考えられるので、
# 「ッ」に関しては「ッ」そのものが連続している場合と、「ッ」の後にほかの捨て仮名が連続する場合のみ無効とする
if i < len(pronunciation) - 1 and (
pronunciation[i + 1] in sutegana[:-1]
or (
pronunciation[i] == sutegana[-1]
and pronunciation[i + 1] == sutegana[-1]
)
):
raise ValueError("無効な発音です。(捨て仮名の連続)")
if pronunciation[i] == "ヮ":
if i != 0 and pronunciation[i - 1] not in ["ク", "グ"]:
raise ValueError(
"無効な発音です。(「くゎ」「ぐゎ」以外の「ゎ」の使用)"
)
return pronunciation

@validator("mora_count", pre=True, always=True)
def check_mora_count_and_accent_type(
cls, mora_count: int | None, values: Any
) -> int | None:
if "pronunciation" not in values or "accent_type" not in values:
# 適切な場所でエラーを出すようにする
return mora_count

if mora_count is None:
rule_others = (
"[イ][ェ]|[ヴ][ャュョ]|[トド][ゥ]|[テデ][ィャュョ]|[デ][ェ]|[クグ][ヮ]"
)
rule_line_i = "[キシチニヒミリギジビピ][ェャュョ]"
rule_line_u = "[ツフヴ][ァ]|[ウスツフヴズ][ィ]|[ウツフヴ][ェォ]"
rule_one_mora = "[ァ-ヴー]"
mora_count = len(
findall(
f"(?:{rule_others}|{rule_line_i}|{rule_line_u}|{rule_one_mora})",
values["pronunciation"],
)
)

if not 0 <= values["accent_type"] <= mora_count:
raise ValueError(
"誤ったアクセント型です({})。 expect: 0 <= accent_type <= {}".format(
values["accent_type"], mora_count
)
)
return mora_count


class PartOfSpeechDetail(BaseModel):
"""
品詞ごとの情報
"""

part_of_speech: str = Field(title="品詞")
part_of_speech_detail_1: str = Field(title="品詞細分類1")
part_of_speech_detail_2: str = Field(title="品詞細分類2")
part_of_speech_detail_3: str = Field(title="品詞細分類3")
# context_idは辞書の左・右文脈IDのこと
# https://github.com/VOICEVOX/open_jtalk/blob/427cfd761b78efb6094bea3c5bb8c968f0d711ab/src/mecab-naist-jdic/_left-id.def # noqa
context_id: int = Field(title="文脈ID")
cost_candidates: list[int] = Field(title="コストのパーセンタイル")
accent_associative_rules: list[str] = Field(title="アクセント結合規則の一覧")


class WordTypes(str, Enum):
"""
fastapiでword_type引数を検証する時に使用するクラス
"""

PROPER_NOUN = "PROPER_NOUN"
COMMON_NOUN = "COMMON_NOUN"
VERB = "VERB"
ADJECTIVE = "ADJECTIVE"
SUFFIX = "SUFFIX"


class SupportedFeaturesInfo(BaseModel):
"""
エンジンの機能の情報
Expand Down
Loading

0 comments on commit 1edb97b

Please sign in to comment.