整理: 合成系の HTTPException をドメインから移動 (#1310)

* refactor: 合成系の `HTTPException` をドメインから移動 * refactor: エラーをリネーム
VOICEVOX · May 29, 2024 · 8aa4ba8 · 8aa4ba8
1 parent fcb964c
commit 8aa4ba8
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 43 deletions.
diff --git a/voicevox_engine/app/routers/tts_pipeline.py b/voicevox_engine/app/routers/tts_pipeline.py
@@ -9,7 +9,10 @@
 from starlette.background import BackgroundTask
 from starlette.responses import FileResponse
 
-from voicevox_engine.cancellable_engine import CancellableEngine
+from voicevox_engine.cancellable_engine import (
+    CancellableEngine,
+    CancellableEngineInternalError,
+)
 from voicevox_engine.core.core_initializer import CoreManager
 from voicevox_engine.metas.Metas import StyleId
 from voicevox_engine.model import (
@@ -30,7 +33,10 @@
     connect_base64_waves,
 )
 from voicevox_engine.tts_pipeline.kana_converter import create_kana, parse_kana
-from voicevox_engine.tts_pipeline.tts_engine import TTSEngineManager
+from voicevox_engine.tts_pipeline.tts_engine import (
+    TalkSingInvalidInputError,
+    TTSEngineManager,
+)
 from voicevox_engine.utility.path_utility import delete_file
 
 
@@ -256,9 +262,13 @@ def cancellable_synthesis(
                 status_code=404,
                 detail="実験的機能はデフォルトで無効になっています。使用するには引数を指定してください。",
             )
-        f_name = cancellable_engine._synthesis_impl(
-            query, style_id, request, core_version=core_version
-        )
+        try:
+            f_name = cancellable_engine._synthesis_impl(
+                query, style_id, request, core_version=core_version
+            )
+        except CancellableEngineInternalError as e:
+            raise HTTPException(status_code=500, detail=str(e))
+
         if f_name == "":
             raise HTTPException(status_code=422, detail="不明なバージョンです")
 
@@ -332,9 +342,12 @@ def sing_frame_audio_query(
         """
         engine = tts_engines.get_engine(core_version)
         core = core_manager.get_core(core_version)
-        phonemes, f0, volume = engine.create_sing_phoneme_and_f0_and_volume(
-            score, style_id
-        )
+        try:
+            phonemes, f0, volume = engine.create_sing_phoneme_and_f0_and_volume(
+                score, style_id
+            )
+        except TalkSingInvalidInputError as e:
+            raise HTTPException(status_code=400, detail=str(e))
 
         return FrameAudioQuery(
             f0=f0,
@@ -357,9 +370,12 @@ def sing_frame_volume(
         core_version: str | None = None,
     ) -> list[float]:
         engine = tts_engines.get_engine(core_version)
-        return engine.create_sing_volume_from_phoneme_and_f0(
-            score, frame_audio_query.phonemes, frame_audio_query.f0, style_id
-        )
+        try:
+            return engine.create_sing_volume_from_phoneme_and_f0(
+                score, frame_audio_query.phonemes, frame_audio_query.f0, style_id
+            )
+        except TalkSingInvalidInputError as e:
+            raise HTTPException(status_code=400, detail=str(e))
 
     @router.post(
         "/frame_synthesis",
@@ -382,7 +398,10 @@ def frame_synthesis(
         歌唱音声合成を行います。
         """
         engine = tts_engines.get_engine(core_version)
-        wave = engine.frame_synthsize_wave(query, style_id)
+        try:
+            wave = engine.frame_synthsize_wave(query, style_id)
+        except TalkSingInvalidInputError as e:
+            raise HTTPException(status_code=400, detail=str(e))
 
         with NamedTemporaryFile(delete=False) as f:
             soundfile.write(

diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py
@@ -12,16 +12,20 @@
 from tempfile import NamedTemporaryFile
 
 import soundfile
-
-# FIXME: remove FastAPI dependency
-from fastapi import HTTPException, Request
+from fastapi import Request
 
 from .core.core_initializer import initialize_cores
 from .metas.Metas import StyleId
 from .model import AudioQuery
 from .tts_pipeline.tts_engine import make_tts_engines_from_cores
 
 
+class CancellableEngineInternalError(Exception):
+    """キャンセル可能エンジンの内部エラー"""
+
+    pass
+
+
 class CancellableEngine:
     """
     音声合成のキャンセル機能に関するクラス
@@ -173,11 +177,9 @@ def _synthesis_impl(
                 audio_file_name = f_name
             else:
                 # ここには来ないはず
-                raise HTTPException(status_code=500, detail="不正な値が生成されました")
+                raise CancellableEngineInternalError("不正な値が生成されました")
         except EOFError:
-            raise HTTPException(
-                status_code=500, detail="既にサブプロセスは終了されています"
-            )
+            raise CancellableEngineInternalError("既にサブプロセスは終了されています")
         except Exception:
             self.finalize_con(request, proc, sub_proc_con1)
             raise

diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py
@@ -31,6 +31,12 @@
 UPSPEAK_PITCH_MAX = 6.5
 
 
+class TalkSingInvalidInputError(Exception):
+    """Talk と Sing の不正な入力エラー"""
+
+    pass
+
+
 # TODO: move mora utility to mora module
 def to_flatten_moras(accent_phrases: list[AccentPhrase]) -> list[Mora]:
     """
@@ -265,10 +271,8 @@ def calc_phoneme_lengths(
         if i < len(consonant_lengths) - 1:
             # 最初のノートは子音長が0の、pauである必要がある
             if i == 0 and consonant_lengths[i] != 0:
-                raise HTTPException(
-                    status_code=400,
-                    detail=f"consonant_lengths[0] must be 0, but {consonant_lengths[0]}",
-                )
+                msg = f"consonant_lengths[0] must be 0, but {consonant_lengths[0]}"
+                raise TalkSingInvalidInputError(msg)
 
             next_consonant_length = consonant_lengths[i + 1]
             note_duration = note_durations[i]
@@ -334,21 +338,17 @@ def notes_to_keys_and_phonemes(
     for note in notes:
         if note.lyric == "":
             if note.key is not None:
-                raise HTTPException(
-                    status_code=400,
-                    detail="lyricが空文字列の場合、keyはnullである必要があります。",
-                )
+                msg = "lyricが空文字列の場合、keyはnullである必要があります。"
+                raise TalkSingInvalidInputError(msg)
             note_lengths.append(note.frame_length)
             note_consonants.append(-1)
             note_vowels.append(0)  # pau
             phonemes.append(0)  # pau
             phoneme_keys.append(-1)
         else:
             if note.key is None:
-                raise HTTPException(
-                    status_code=400,
-                    detail="keyがnullの場合、lyricは空文字列である必要があります。",
-                )
+                msg = "keyがnullの場合、lyricは空文字列である必要があります。"
+                raise TalkSingInvalidInputError(msg)
 
             # TODO: 1ノートに複数のモーラがある場合の処理
             mora_phonemes = mora_kana_to_mora_phonemes.get(
@@ -357,10 +357,8 @@ def notes_to_keys_and_phonemes(
                 _hira_to_kana(note.lyric)  # type: ignore
             )
             if mora_phonemes is None:
-                raise HTTPException(
-                    status_code=400,
-                    detail=f"lyricが不正です: {note.lyric}",
-                )
+                msg = f"lyricが不正です: {note.lyric}"
+                raise TalkSingInvalidInputError(msg)
 
             consonant, vowel = mora_phonemes
             if consonant is None:
@@ -405,10 +403,8 @@ def frame_query_to_sf_decoder_feature(
 
     for phoneme in query.phonemes:
         if phoneme.phoneme not in Phoneme._PHONEME_LIST:
-            raise HTTPException(
-                status_code=400,
-                detail=f"phoneme {phoneme.phoneme} is not valid",
-            )
+            msg = f"phoneme {phoneme.phoneme} is not valid"
+            raise TalkSingInvalidInputError(msg)
 
         phonemes.append(Phoneme(phoneme.phoneme).id)
         phoneme_lengths.append(phoneme.frame_length)
@@ -650,10 +646,8 @@ def create_sing_volume_from_phoneme_and_f0(
             all_equals = np.bool_(False)
 
         if not all_equals:
-            raise HTTPException(
-                status_code=400,
-                detail="Scoreから抽出した音素列とFrameAudioQueryから抽出した音素列が一致しません。",
-            )
+            msg = "Scoreから抽出した音素列とFrameAudioQueryから抽出した音素列が一致しません。"
+            raise TalkSingInvalidInputError(msg)
 
         # 時間スケールを変更する（音素 → フレーム）
         frame_phonemes = np.repeat(phonemes_array, phoneme_lengths)