Skip to content

Commit

Permalink
Merge branch 'master' into refactor/get_engine_latest
Browse files Browse the repository at this point in the history
  • Loading branch information
tarepan committed Jun 23, 2024
2 parents 45dc377 + 066dcf0 commit 68a4181
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 117 deletions.
6 changes: 2 additions & 4 deletions voicevox_engine/app/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ def generate_app(
app = configure_middlewares(app, cors_policy_mode, allow_origin)
app = configure_global_exception_handlers(app)

metas_store = MetasStore(speaker_info_dir)
resource_manager = ResourceManager(is_development())
resource_manager.register_dir(speaker_info_dir)
metas_store = MetasStore(speaker_info_dir, resource_manager)

app.include_router(
generate_tts_pipeline_router(
Expand All @@ -78,9 +78,7 @@ def generate_app(
generate_preset_router(preset_manager, verify_mutability_allowed)
)
app.include_router(
generate_speaker_router(
core_manager, resource_manager, metas_store, speaker_info_dir
)
generate_speaker_router(core_manager, resource_manager, metas_store)
)
if engine_manifest.supported_features.manage_library:
app.include_router(
Expand Down
121 changes: 10 additions & 111 deletions voicevox_engine/app/routers/character.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
"""話者情報機能を提供する API Router"""

from pathlib import Path
from typing import Annotated, Literal, TypeAlias
from typing import Annotated

from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import FileResponse
from pydantic.json_schema import SkipJsonSchema

from voicevox_engine.core.core_initializer import CoreManager
from voicevox_engine.metas.Metas import Speaker, SpeakerInfo
from voicevox_engine.metas.MetasStore import MetasStore, filter_characters_and_styles
from voicevox_engine.metas.MetasStore import MetasStore, ResourceFormat
from voicevox_engine.resource_manager import ResourceManager, ResourceManagerError

RESOURCE_ENDPOINT = "_resources"
ResourceFormat: TypeAlias = Literal["base64", "url"]


async def _get_resource_baseurl(request: Request) -> str:
Expand All @@ -24,7 +22,6 @@ def generate_speaker_router(
core_manager: CoreManager,
resource_manager: ResourceManager,
metas_store: MetasStore,
speaker_info_dir: Path,
) -> APIRouter:
"""話者情報 API Router を生成する"""
router = APIRouter(tags=["その他"])
Expand All @@ -47,116 +44,16 @@ def speaker_info(
指定されたspeaker_uuidの話者に関する情報をjson形式で返します。
画像や音声はresource_formatで指定した形式で返されます。
"""
return _speaker_info(
version = core_version or core_manager.latest_version()
core = core_manager.get_core(version)
return metas_store.speaker_info(
speaker_uuid=speaker_uuid,
speaker_or_singer="speaker",
core_version=core_version,
core_characters=core.characters,
resource_baseurl=resource_baseurl,
resource_format=resource_format,
)

# FIXME: この関数をどこかに切り出す
def _speaker_info(
speaker_uuid: str,
speaker_or_singer: Literal["speaker", "singer"],
core_version: str | None,
resource_baseurl: str,
resource_format: ResourceFormat,
) -> SpeakerInfo:
# エンジンに含まれる話者メタ情報は、次のディレクトリ構造に従わなければならない:
# {root_dir}/
# character_info/
# {speaker_uuid_0}/
# policy.md
# portrait.png
# icons/
# {id_0}.png
# {id_1}.png
# ...
# portraits/
# {id_0}.png
# {id_1}.png
# ...
# voice_samples/
# {id_0}_001.wav
# {id_0}_002.wav
# {id_0}_003.wav
# {id_1}_001.wav
# ...
# {speaker_uuid_1}/
# ...

version = core_version or core_manager.latest_version()

# 該当話者を検索する
core_characters = core_manager.get_core(version).characters
characters = metas_store.load_combined_metas(core_characters)
speakers = filter_characters_and_styles(characters, speaker_or_singer)
speaker = next(
filter(lambda spk: spk.speaker_uuid == speaker_uuid, speakers), None
)
if speaker is None:
raise HTTPException(status_code=404, detail="該当する話者が見つかりません")

# 話者情報を取得する
try:
speaker_path = speaker_info_dir / speaker_uuid

# speaker policy
policy_path = speaker_path / "policy.md"
policy = policy_path.read_text("utf-8")

def _resource_str(path: Path) -> str:
resource_str = resource_manager.resource_str(
path, "hash" if resource_format == "url" else "base64"
)
if resource_format == "base64":
return resource_str
return f"{resource_baseurl}/{resource_str}"

# speaker portrait
portrait_path = speaker_path / "portrait.png"
portrait = _resource_str(portrait_path)

# スタイル情報を取得する
style_infos = []
for style in speaker.styles:
id = style.id

# style icon
style_icon_path = speaker_path / "icons" / f"{id}.png"
icon = _resource_str(style_icon_path)

# style portrait
style_portrait_path = speaker_path / "portraits" / f"{id}.png"
style_portrait = None
if style_portrait_path.exists():
style_portrait = _resource_str(style_portrait_path)

# voice samples
voice_samples: list[str] = []
for j in range(3):
num = str(j + 1).zfill(3)
voice_path = speaker_path / "voice_samples" / f"{id}_{num}.wav"
voice_samples.append(_resource_str(voice_path))

style_infos.append(
{
"id": id,
"icon": icon,
"portrait": style_portrait,
"voice_samples": voice_samples,
}
)
except (FileNotFoundError, ResourceManagerError):
msg = "追加情報が見つかりませんでした"
raise HTTPException(status_code=500, detail=msg)

spk_info = SpeakerInfo(
policy=policy, portrait=portrait, style_infos=style_infos
)
return spk_info

@router.get("/singers")
def singers(core_version: str | SkipJsonSchema[None] = None) -> list[Speaker]:
"""歌手情報の一覧を取得します"""
Expand All @@ -175,10 +72,12 @@ def singer_info(
指定されたspeaker_uuidの歌手に関する情報をjson形式で返します。
画像や音声はresource_formatで指定した形式で返されます。
"""
return _speaker_info(
version = core_version or core_manager.latest_version()
core = core_manager.get_core(version)
return metas_store.speaker_info(
speaker_uuid=speaker_uuid,
speaker_or_singer="singer",
core_version=core_version,
core_characters=core.characters,
resource_baseurl=resource_baseurl,
resource_format=resource_format,
)
Expand Down
113 changes: 111 additions & 2 deletions voicevox_engine/metas/MetasStore.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,22 @@

from dataclasses import dataclass
from pathlib import Path
from typing import Final, Literal
from typing import Final, Literal, TypeAlias

from fastapi import HTTPException
from pydantic import BaseModel, Field

from voicevox_engine.core.core_adapter import CoreCharacter, CoreCharacterStyle
from voicevox_engine.metas.Metas import (
Speaker,
SpeakerInfo,
SpeakerStyle,
SpeakerSupportedFeatures,
StyleId,
)
from voicevox_engine.resource_manager import ResourceManager, ResourceManagerError

ResourceFormat: TypeAlias = Literal["base64", "url"]


def cast_styles(cores: list[CoreCharacterStyle]) -> list[SpeakerStyle]:
Expand Down Expand Up @@ -68,13 +73,17 @@ class MetasStore:
話者やスタイルのメタ情報を管理する
"""

def __init__(self, engine_speakers_path: Path) -> None:
def __init__(
self, engine_speakers_path: Path, resource_manager: ResourceManager
) -> None:
"""
Parameters
----------
engine_speakers_path : Path
エンジンに含まれる話者メタ情報ディレクトリのパス。
"""
self._speakers_path = engine_speakers_path
self._resource_manager = resource_manager
# エンジンに含まれる各話者のメタ情報
self._loaded_metas: dict[str, _EngineSpeaker] = {
folder.name: _EngineSpeaker.model_validate_json(
Expand Down Expand Up @@ -111,6 +120,106 @@ def load_combined_metas(
)
return characters

def speaker_info(
self,
speaker_uuid: str,
speaker_or_singer: Literal["speaker", "singer"],
core_characters: list[CoreCharacter],
resource_baseurl: str,
resource_format: ResourceFormat,
) -> SpeakerInfo:
# キャラクター情報は以下のディレクトリ構造に従わなければならない。
# {engine_speakers_path}/
# {speaker_uuid_0}/
# policy.md
# portrait.png
# icons/
# {id_0}.png
# {id_1}.png
# ...
# portraits/
# {id_0}.png
# {id_1}.png
# ...
# voice_samples/
# {id_0}_001.wav
# {id_0}_002.wav
# {id_0}_003.wav
# {id_1}_001.wav
# ...
# {speaker_uuid_1}/
# ...

# 該当話者を検索する
characters = self.load_combined_metas(core_characters)
speakers = filter_characters_and_styles(characters, speaker_or_singer)
speaker = next(
filter(lambda spk: spk.speaker_uuid == speaker_uuid, speakers), None
)
if speaker is None:
# FIXME: HTTPExceptionはこのファイルとドメインが合わないので辞める
raise HTTPException(status_code=404, detail="該当する話者が見つかりません")

# 話者情報を取得する
try:
speaker_path = self._speakers_path / speaker_uuid

# speaker policy
policy_path = speaker_path / "policy.md"
policy = policy_path.read_text("utf-8")

def _resource_str(path: Path) -> str:
resource_str = self._resource_manager.resource_str(
path, "hash" if resource_format == "url" else "base64"
)
if resource_format == "base64":
return resource_str
return f"{resource_baseurl}/{resource_str}"

# speaker portrait
portrait_path = speaker_path / "portrait.png"
portrait = _resource_str(portrait_path)

# スタイル情報を取得する
style_infos = []
for style in speaker.styles:
id = style.id

# style icon
style_icon_path = speaker_path / "icons" / f"{id}.png"
icon = _resource_str(style_icon_path)

# style portrait
style_portrait_path = speaker_path / "portraits" / f"{id}.png"
style_portrait = None
if style_portrait_path.exists():
style_portrait = _resource_str(style_portrait_path)

# voice samples
voice_samples: list[str] = []
for j in range(3):
num = str(j + 1).zfill(3)
voice_path = speaker_path / "voice_samples" / f"{id}_{num}.wav"
voice_samples.append(_resource_str(voice_path))

style_infos.append(
{
"id": id,
"icon": icon,
"portrait": style_portrait,
"voice_samples": voice_samples,
}
)
except (FileNotFoundError, ResourceManagerError):
# FIXME: HTTPExceptionはこのファイルとドメインが合わないので辞める
msg = "追加情報が見つかりませんでした"
raise HTTPException(status_code=500, detail=msg)

spk_info = SpeakerInfo(
policy=policy, portrait=portrait, style_infos=style_infos
)
return spk_info

def talk_characters(self, core_characters: list[CoreCharacter]) -> list[Speaker]:
"""話せるキャラクターの情報の一覧を取得する。"""
characters = self.load_combined_metas(core_characters)
Expand Down

0 comments on commit 68a4181

Please sign in to comment.