Skip to content

Commit

Permalink
Merge branch 'master' into refactor/core_in_metas
Browse files Browse the repository at this point in the history
  • Loading branch information
tarepan committed Jun 25, 2024
2 parents a3a67e7 + f2bba11 commit 8446656
Show file tree
Hide file tree
Showing 26 changed files with 415 additions and 437 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ ADD ./run.py ./presets.yaml ./engine_manifest.json /opt/voicevox_engine/
ADD ./resources /opt/voicevox_engine/resources
ADD ./tools/generate_licenses.py /opt/voicevox_engine/tools/
ADD ./tools/licenses /opt/voicevox_engine/tools/licenses
ADD ./tools/generate_filemap.py /opt/voicevox_engine/tools/

# Replace version
ARG VOICEVOX_ENGINE_VERSION=latest
Expand All @@ -259,8 +260,7 @@ RUN <<EOF
EOF

# Generate filemap.json
ADD ./tools/generate_filemap.py /tmp/
RUN gosu user /opt/python/bin/python3 /tmp/generate_filemap.py --target_dir resources/character_info
RUN /opt/python/bin/python3 /opt/voicevox_engine/tools/generate_filemap.py --target_dir /opt/voicevox_engine/resources/character_info

# Keep this layer separated to use layer cache on download failed in local build
RUN <<EOF
Expand Down
1 change: 1 addition & 0 deletions docs/VOICEVOX音声合成エンジンとの連携.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

- バージョンが上がっても、`/audio_query`で返ってくる値をそのまま`/synthesis`に POST すれば音声合成できるようにする予定です
- `AudioQuery`のパラメータは増えますが、なるべくデフォルト値で以前と変わらない音声が生成されるようにします
- 以前のバージョンの`AudioQuery`を新しいバージョンの`/synthesis`にそのまま POST できるようにします(後方互換)
- バージョン 0.7 から音声スタイルが実装されました。スタイルの情報は`/speakers`から取得できます
- スタイルの情報にある`style_id``speaker`に指定することで、今まで通り音声合成ができます
- style_id の指定先が speaker なのは互換性のためです
5 changes: 5 additions & 0 deletions engine_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@
"value": true,
"name": "全体の音量の調整"
},
"adjust_pause_length": {
"type": "bool",
"value": true,
"name": "句読点などの無音時間の調整"
},
"interrogative_upspeak": {
"type": "bool",
"value": true,
Expand Down
8 changes: 4 additions & 4 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,10 +385,10 @@ def main() -> None:
disable_mutable_api = envs.disable_mutable_api

root_dir = select_first_not_none([args.voicevox_dir, engine_root()])
speaker_info_dir = root_dir / "resources" / "character_info"
character_info_dir = root_dir / "resources" / "character_info"
# NOTE: ENGINE v0.19 以前向けに後方互換性を確保する
if not speaker_info_dir.exists():
speaker_info_dir = root_dir / "speaker_info"
if not character_info_dir.exists():
character_info_dir = root_dir / "speaker_info"

# ASGI に準拠した VOICEVOX ENGINE アプリケーションを生成する
app = generate_app(
Expand All @@ -400,7 +400,7 @@ def main() -> None:
engine_manifest,
library_manager,
cancellable_engine,
speaker_info_dir,
character_info_dir,
cors_policy_mode,
allow_origin,
disable_mutable_api=disable_mutable_api,
Expand Down
2 changes: 1 addition & 1 deletion test/benchmark/engine_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def _generate_engine_fake_server(root_dir: Path) -> TestClient:
core_manager=core_manager,
setting_loader=setting_loader,
preset_manager=preset_manager,
speaker_info_dir=root_dir / "resources" / "character_info",
character_info_dir=root_dir / "resources" / "character_info",
user_dict=user_dict,
engine_manifest=engine_manifest,
library_manager=library_manager,
Expand Down
22 changes: 12 additions & 10 deletions test/benchmark/speed/speaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,19 @@ def benchmark_get_speaker_info_all(
# speaker_uuid 一覧を準備
response = client.get("/speakers", params={})
assert response.status_code == 200
speakers = response.json()
speaker_uuids = list(map(lambda speaker: speaker["speaker_uuid"], speakers))
talk_characters = response.json()
uuids = list(map(lambda c: c["speaker_uuid"], talk_characters))

def execute() -> None:
"""計測対象となる処理を実行する"""
for speaker_uuid in speaker_uuids:
client.get("/speaker_info", params={"speaker_uuid": speaker_uuid})
for uuid in uuids:
client.get("/speaker_info", params={"speaker_uuid": uuid})

average_time = benchmark_time(execute, n_repeat=10)
return average_time


def benchmark_request_time_for_all_speakers(
def benchmark_request_time_for_all_talk_characters(
server: ServerType, root_dir: Path | None = None
) -> float:
"""
Expand All @@ -54,12 +54,12 @@ def benchmark_request_time_for_all_speakers(
# speaker_uuid 一覧を準備
response = client.get("/speakers", params={})
assert response.status_code == 200
speakers = response.json()
speaker_uuids = list(map(lambda speaker: speaker["speaker_uuid"], speakers))
talk_characters = response.json()
uuids = list(map(lambda c: c["speaker_uuid"], talk_characters))

def execute() -> None:
"""計測対象となる処理を実行する"""
for _ in speaker_uuids:
for _ in uuids:
client.get("/", params={})

average_time = benchmark_time(execute, n_repeat=10)
Expand Down Expand Up @@ -89,7 +89,9 @@ def execute() -> None:
print(f"全話者 `GET /speaker_info` fakeserve: {result_spk_infos_fakeserve} sec")
print(f"全話者 `GET /speaker_info` localhost: {result_spk_infos_localhost} sec")

req_time_all_fake = benchmark_request_time_for_all_speakers("fake", root_dir)
req_time_all_local = benchmark_request_time_for_all_speakers("localhost", root_dir)
req_time_all_fake = benchmark_request_time_for_all_talk_characters("fake", root_dir)
req_time_all_local = benchmark_request_time_for_all_talk_characters(
"localhost", root_dir
)
print("全話者 `GET /` fakeserve: {:.3f} sec".format(req_time_all_fake))
print("全話者 `GET /` localhost: {:.3f} sec".format(req_time_all_local))

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 5 additions & 8 deletions test/e2e/test_characters.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
"""
話者・歌手のテスト。
TODO: 話者と歌手の両ドメイン共通のドメイン用語を定め、このテストファイル名を変更する。
"""
"""話者・歌手のテスト"""

import hashlib
from test.utility import hash_long_string
Expand Down Expand Up @@ -42,13 +39,13 @@ def test_話者一覧が取得できる(
def test_話者の情報を取得できる(
client: TestClient, snapshot_json: SnapshotAssertion
) -> None:
speakers = _speaker_list_adapter.validate_python(client.get("/speakers").json())
for speaker in speakers:
talkers = _speaker_list_adapter.validate_python(client.get("/speakers").json())
for talker in talkers:
response = client.get(
"/speaker_info", params={"speaker_uuid": speaker.speaker_uuid}
"/speaker_info", params={"speaker_uuid": talker.speaker_uuid}
)
assert snapshot_json(
name=speaker.speaker_uuid,
name=talker.speaker_uuid,
) == hash_long_string(response.json())


Expand Down
2 changes: 1 addition & 1 deletion test/e2e/test_missing_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@

def test_missing_core_422(client: TestClient, snapshot_json: SnapshotAssertion) -> None:
"""存在しないコアを指定するとエラーを返す。"""
response = client.get("/supported_devices", params={"core_version": "4.0.4"})
response = client.get("/speakers", params={"core_version": "4.0.4"})
assert response.status_code == 422
assert snapshot_json == response.json()
4 changes: 2 additions & 2 deletions test/unit/library/test_library_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def setUp(self) -> None:
self.vvlib_manifest = json.loads(f.read())
self.library_uuid = self.vvlib_manifest["uuid"]
with ZipFile(self.library_filename, "w") as zf:
speaker_infos = glob.glob("resources/character_info/**", recursive=True)
for info in speaker_infos:
character_infos = glob.glob("resources/character_info/**", recursive=True)
for info in character_infos:
zf.write(info)
zf.writestr(VVLIB_MANIFEST_NAME, json.dumps(self.vvlib_manifest))
self.library_file = open(self.library_filename, "br")
Expand Down
126 changes: 46 additions & 80 deletions test/unit/test_metas_store.py
Original file line number Diff line number Diff line change
@@ -1,130 +1,96 @@
import uuid

from voicevox_engine.metas.Metas import Speaker, SpeakerStyle, StyleId, StyleType
from voicevox_engine.metas.Metas import (
SpeakerStyle,
SpeakerSupportedFeatures,
StyleId,
StyleType,
)
from voicevox_engine.metas.MetasStore import (
SING_STYLE_TYPES,
TALK_STYLE_TYPES,
_SING_STYLE_TYPES,
_TALK_STYLE_TYPES,
Character,
filter_characters_and_styles,
)


def _speakers_to_characters(speakers: list[Speaker]) -> list[Character]:
"""Speaker 配列をキャラクター配列へキャストする。"""
characters: list[Character] = []
for speaker in speakers:
styles = speaker.styles
talk_styles = filter(lambda style: style.type in TALK_STYLE_TYPES, styles)
sing_styles = filter(lambda style: style.type in SING_STYLE_TYPES, styles)
characters.append(
Character(
name=speaker.name,
uuid=speaker.speaker_uuid,
talk_styles=list(talk_styles),
sing_styles=list(sing_styles),
version=speaker.version,
supported_features=speaker.supported_features,
)
)
return characters


def _gen_speaker(style_types: list[StyleType]) -> Speaker:
return Speaker(
speaker_uuid=str(uuid.uuid4()),
def _gen_character(style_types: list[StyleType]) -> Character:
talk_styles = list(filter(lambda s: s in _TALK_STYLE_TYPES, style_types))
sing_styles = list(filter(lambda s: s in _SING_STYLE_TYPES, style_types))
return Character(
name="",
styles=[
SpeakerStyle(
name="",
id=StyleId(0),
type=style_type,
)
for style_type in style_types
uuid=str(uuid.uuid4()),
talk_styles=[
SpeakerStyle(name="", id=StyleId(0), type=style_type)
for style_type in talk_styles
],
sing_styles=[
SpeakerStyle(name="", id=StyleId(0), type=style_type)
for style_type in sing_styles
],
version="",
supported_features=SpeakerSupportedFeatures(),
)


def _equal_speakers(a: list[Speaker], b: list[Speaker]) -> bool:
def _equal_characters(a: list[Character], b: list[Character]) -> bool:
if len(a) != len(b):
return False
for i in range(len(a)):
if a[i].speaker_uuid != b[i].speaker_uuid:
if a[i].uuid != b[i].uuid:
return False
return True


def test_filter_speakers_and_styles_with_speaker() -> None:
def test_filter_characters_and_styles_with_talk() -> None:
# Inputs
speaker_talk_only = _gen_speaker(["talk"])
speaker_singing_teacher_only = _gen_speaker(["singing_teacher"])
speaker_frame_decode_only = _gen_speaker(["frame_decode"])
speaker_sing_only = _gen_speaker(["sing"])
speaker_allstyle = _gen_speaker(["talk", "singing_teacher", "frame_decode", "sing"])
talk_only = _gen_character(["talk"])
singing_teacher_only = _gen_character(["singing_teacher"])
frame_decode_only = _gen_character(["frame_decode"])
sing_only = _gen_character(["sing"])
allstyle = _gen_character(["talk", "singing_teacher", "frame_decode", "sing"])

# Outputs
result = filter_characters_and_styles(
_speakers_to_characters(
[
speaker_talk_only,
speaker_singing_teacher_only,
speaker_frame_decode_only,
speaker_sing_only,
speaker_allstyle,
]
),
"speaker",
[talk_only, singing_teacher_only, frame_decode_only, sing_only, allstyle],
"talk",
)

# Tests
assert len(result) == 2

# 話者だけになっている
assert _equal_speakers(result, [speaker_talk_only, speaker_allstyle])
assert _equal_characters(result, [talk_only, allstyle])

# スタイルがフィルタリングされている
for speaker in result:
for style in speaker.styles:
for characters in result:
for style in characters.talk_styles + characters.sing_styles:
assert style.type == "talk"


def test_filter_speakers_and_styles_with_singer() -> None:
def test_filter_characters_and_styles_with_sing() -> None:
# Inputs
speaker_talk_only = _gen_speaker(["talk"])
speaker_singing_teacher_only = _gen_speaker(["singing_teacher"])
speaker_frame_decode_only = _gen_speaker(["frame_decode"])
speaker_sing_only = _gen_speaker(["sing"])
speaker_allstyle = _gen_speaker(["talk", "singing_teacher", "frame_decode", "sing"])
talk_only = _gen_character(["talk"])
singing_teacher_only = _gen_character(["singing_teacher"])
frame_decode_only = _gen_character(["frame_decode"])
sing_only = _gen_character(["sing"])
allstyle = _gen_character(["talk", "singing_teacher", "frame_decode", "sing"])

# Outputs
result = filter_characters_and_styles(
_speakers_to_characters(
[
speaker_talk_only,
speaker_singing_teacher_only,
speaker_frame_decode_only,
speaker_sing_only,
speaker_allstyle,
]
),
"singer",
[talk_only, singing_teacher_only, frame_decode_only, sing_only, allstyle],
"sing",
)

# Tests
assert len(result) == 4

# 歌手だけになっている
assert _equal_speakers(
result,
[
speaker_singing_teacher_only,
speaker_frame_decode_only,
speaker_sing_only,
speaker_allstyle,
],
assert _equal_characters(
result, [singing_teacher_only, frame_decode_only, sing_only, allstyle]
)

# スタイルがフィルタリングされている
for speaker in result:
for style in speaker.styles:
for character in result:
for style in character.talk_styles + character.sing_styles:
assert style.type in ["singing_teacher", "frame_decode", "sing"]
Loading

0 comments on commit 8446656

Please sign in to comment.