diff --git a/voicevox_engine/app/routers/tts_pipeline.py b/voicevox_engine/app/routers/tts_pipeline.py index 6e618b6eb..5f50d094a 100644 --- a/voicevox_engine/app/routers/tts_pipeline.py +++ b/voicevox_engine/app/routers/tts_pipeline.py @@ -9,7 +9,10 @@ from starlette.background import BackgroundTask from starlette.responses import FileResponse -from voicevox_engine.cancellable_engine import CancellableEngine +from voicevox_engine.cancellable_engine import ( + CancellableEngine, + CancellableEngineInternalError, +) from voicevox_engine.core.core_initializer import CoreManager from voicevox_engine.metas.Metas import StyleId from voicevox_engine.model import ( @@ -30,7 +33,10 @@ connect_base64_waves, ) from voicevox_engine.tts_pipeline.kana_converter import create_kana, parse_kana -from voicevox_engine.tts_pipeline.tts_engine import TTSEngineManager +from voicevox_engine.tts_pipeline.tts_engine import ( + TalkSingInvalidInputError, + TTSEngineManager, +) from voicevox_engine.utility.path_utility import delete_file @@ -256,9 +262,13 @@ def cancellable_synthesis( status_code=404, detail="実験的機能はデフォルトで無効になっています。使用するには引数を指定してください。", ) - f_name = cancellable_engine._synthesis_impl( - query, style_id, request, core_version=core_version - ) + try: + f_name = cancellable_engine._synthesis_impl( + query, style_id, request, core_version=core_version + ) + except CancellableEngineInternalError as e: + raise HTTPException(status_code=500, detail=str(e)) + if f_name == "": raise HTTPException(status_code=422, detail="不明なバージョンです") @@ -332,9 +342,12 @@ def sing_frame_audio_query( """ engine = tts_engines.get_engine(core_version) core = core_manager.get_core(core_version) - phonemes, f0, volume = engine.create_sing_phoneme_and_f0_and_volume( - score, style_id - ) + try: + phonemes, f0, volume = engine.create_sing_phoneme_and_f0_and_volume( + score, style_id + ) + except TalkSingInvalidInputError as e: + raise HTTPException(status_code=400, detail=str(e)) return FrameAudioQuery( f0=f0, @@ -357,9 +370,12 @@ def sing_frame_volume( core_version: str | None = None, ) -> list[float]: engine = tts_engines.get_engine(core_version) - return engine.create_sing_volume_from_phoneme_and_f0( - score, frame_audio_query.phonemes, frame_audio_query.f0, style_id - ) + try: + return engine.create_sing_volume_from_phoneme_and_f0( + score, frame_audio_query.phonemes, frame_audio_query.f0, style_id + ) + except TalkSingInvalidInputError as e: + raise HTTPException(status_code=400, detail=str(e)) @router.post( "/frame_synthesis", @@ -382,7 +398,10 @@ def frame_synthesis( 歌唱音声合成を行います。 """ engine = tts_engines.get_engine(core_version) - wave = engine.frame_synthsize_wave(query, style_id) + try: + wave = engine.frame_synthsize_wave(query, style_id) + except TalkSingInvalidInputError as e: + raise HTTPException(status_code=400, detail=str(e)) with NamedTemporaryFile(delete=False) as f: soundfile.write( diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py index cef3ec1a3..985d042ac 100644 --- a/voicevox_engine/cancellable_engine.py +++ b/voicevox_engine/cancellable_engine.py @@ -12,9 +12,7 @@ from tempfile import NamedTemporaryFile import soundfile - -# FIXME: remove FastAPI dependency -from fastapi import HTTPException, Request +from fastapi import Request from .core.core_initializer import initialize_cores from .metas.Metas import StyleId @@ -22,6 +20,12 @@ from .tts_pipeline.tts_engine import make_tts_engines_from_cores +class CancellableEngineInternalError(Exception): + """キャンセル可能エンジンの内部エラー""" + + pass + + class CancellableEngine: """ 音声合成のキャンセル機能に関するクラス @@ -173,11 +177,9 @@ def _synthesis_impl( audio_file_name = f_name else: # ここには来ないはず - raise HTTPException(status_code=500, detail="不正な値が生成されました") + raise CancellableEngineInternalError("不正な値が生成されました") except EOFError: - raise HTTPException( - status_code=500, detail="既にサブプロセスは終了されています" - ) + raise CancellableEngineInternalError("既にサブプロセスは終了されています") except Exception: self.finalize_con(request, proc, sub_proc_con1) raise diff --git a/voicevox_engine/tts_pipeline/tts_engine.py b/voicevox_engine/tts_pipeline/tts_engine.py index cc35cf5ae..7e80ca38a 100644 --- a/voicevox_engine/tts_pipeline/tts_engine.py +++ b/voicevox_engine/tts_pipeline/tts_engine.py @@ -31,6 +31,12 @@ UPSPEAK_PITCH_MAX = 6.5 +class TalkSingInvalidInputError(Exception): + """Talk と Sing の不正な入力エラー""" + + pass + + # TODO: move mora utility to mora module def to_flatten_moras(accent_phrases: list[AccentPhrase]) -> list[Mora]: """ @@ -265,10 +271,8 @@ def calc_phoneme_lengths( if i < len(consonant_lengths) - 1: # 最初のノートは子音長が0の、pauである必要がある if i == 0 and consonant_lengths[i] != 0: - raise HTTPException( - status_code=400, - detail=f"consonant_lengths[0] must be 0, but {consonant_lengths[0]}", - ) + msg = f"consonant_lengths[0] must be 0, but {consonant_lengths[0]}" + raise TalkSingInvalidInputError(msg) next_consonant_length = consonant_lengths[i + 1] note_duration = note_durations[i] @@ -334,10 +338,8 @@ def notes_to_keys_and_phonemes( for note in notes: if note.lyric == "": if note.key is not None: - raise HTTPException( - status_code=400, - detail="lyricが空文字列の場合、keyはnullである必要があります。", - ) + msg = "lyricが空文字列の場合、keyはnullである必要があります。" + raise TalkSingInvalidInputError(msg) note_lengths.append(note.frame_length) note_consonants.append(-1) note_vowels.append(0) # pau @@ -345,10 +347,8 @@ def notes_to_keys_and_phonemes( phoneme_keys.append(-1) else: if note.key is None: - raise HTTPException( - status_code=400, - detail="keyがnullの場合、lyricは空文字列である必要があります。", - ) + msg = "keyがnullの場合、lyricは空文字列である必要があります。" + raise TalkSingInvalidInputError(msg) # TODO: 1ノートに複数のモーラがある場合の処理 mora_phonemes = mora_kana_to_mora_phonemes.get( @@ -357,10 +357,8 @@ def notes_to_keys_and_phonemes( _hira_to_kana(note.lyric) # type: ignore ) if mora_phonemes is None: - raise HTTPException( - status_code=400, - detail=f"lyricが不正です: {note.lyric}", - ) + msg = f"lyricが不正です: {note.lyric}" + raise TalkSingInvalidInputError(msg) consonant, vowel = mora_phonemes if consonant is None: @@ -405,10 +403,8 @@ def frame_query_to_sf_decoder_feature( for phoneme in query.phonemes: if phoneme.phoneme not in Phoneme._PHONEME_LIST: - raise HTTPException( - status_code=400, - detail=f"phoneme {phoneme.phoneme} is not valid", - ) + msg = f"phoneme {phoneme.phoneme} is not valid" + raise TalkSingInvalidInputError(msg) phonemes.append(Phoneme(phoneme.phoneme).id) phoneme_lengths.append(phoneme.frame_length) @@ -650,10 +646,8 @@ def create_sing_volume_from_phoneme_and_f0( all_equals = np.bool_(False) if not all_equals: - raise HTTPException( - status_code=400, - detail="Scoreから抽出した音素列とFrameAudioQueryから抽出した音素列が一致しません。", - ) + msg = "Scoreから抽出した音素列とFrameAudioQueryから抽出した音素列が一致しません。" + raise TalkSingInvalidInputError(msg) # 時間スケールを変更する(音素 → フレーム) frame_phonemes = np.repeat(phonemes_array, phoneme_lengths)