Skip to content

Commit

Permalink
[Rust] voicevox_tts と voicevox_wav_free の実装 (#186)
Browse files Browse the repository at this point in the history
* implements create_accent_phrases

* implements synthesis

* implements synthesis_wave_format

* implements voicevox_tts and voicevox_wav_free

* resolve clippy warning

* 音声合成できるように修正

* wavのためのバッファのvecをwith_capacityでメモリ確保

* Internalのメソッドの引数としてCStrの代わりにstrを使う

* Dissolveを使わない

* UTF-8文字列としてデコードできない場合のエラーをハンドリングする
  • Loading branch information
PickledChair committed Jul 16, 2022
1 parent 0357912 commit ce9d36b
Show file tree
Hide file tree
Showing 8 changed files with 312 additions and 52 deletions.
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/voicevox_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ anyhow = "1.0.57"
cfg-if = "1.0.0"
derive-getters = "0.2.0"
derive-new = "0.5.9"
libc = "0.2.126"
once_cell = "1.10.0"
onnxruntime = { git = "https://github.com/qwerty2501/onnxruntime-rs.git", version = "0.0.24" }
serde = "1.0.137"
Expand Down
45 changes: 33 additions & 12 deletions crates/voicevox_core/src/c_export.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use super::*;
use internal::Internal;
use libc::c_void;
use once_cell::sync::Lazy;
use std::ffi::CStr;
use std::os::raw::{c_char, c_int};
Expand Down Expand Up @@ -33,6 +34,8 @@ pub enum VoicevoxResultCode {
VOICEVOX_RESULT_INVALID_SPEAKER_ID = 7,
VOICEVOX_RESULT_INVALID_MODEL_INDEX = 8,
VOICEVOX_RESULT_INFERENCE_FAILED = 9,
VOICEVOX_RESULT_FAILED_EXTRACT_FULL_CONTEXT_LABEL = 10,
VOICEVOX_RESULT_INVALID_UTF8_INPUT = 11,
}

fn convert_result<T>(result: Result<T>) -> (Option<T>, VoicevoxResultCode) {
Expand Down Expand Up @@ -73,6 +76,10 @@ fn convert_result<T>(result: Result<T>) -> (Option<T>, VoicevoxResultCode) {
Error::InferenceFailed => {
(None, VoicevoxResultCode::VOICEVOX_RESULT_INFERENCE_FAILED)
}
Error::FailedExtractFullContextLabel(_) => (
None,
VoicevoxResultCode::VOICEVOX_RESULT_FAILED_EXTRACT_FULL_CONTEXT_LABEL,
),
}
}
}
Expand Down Expand Up @@ -233,9 +240,13 @@ pub extern "C" fn decode_forward(

#[no_mangle]
pub extern "C" fn voicevox_load_openjtalk_dict(dict_path: *const c_char) -> VoicevoxResultCode {
let (_, result_code) = convert_result(
lock_internal().voicevox_load_openjtalk_dict(unsafe { CStr::from_ptr(dict_path) }),
);
let (_, result_code) = {
if let Ok(dict_path) = unsafe { CStr::from_ptr(dict_path) }.to_str() {
convert_result(lock_internal().voicevox_load_openjtalk_dict(dict_path))
} else {
(None, VoicevoxResultCode::VOICEVOX_RESULT_INVALID_UTF8_INPUT)
}
};
result_code
}

Expand All @@ -246,12 +257,21 @@ pub extern "C" fn voicevox_tts(
output_binary_size: *mut c_int,
output_wav: *mut *mut u8,
) -> VoicevoxResultCode {
let (_, result_code) = convert_result(lock_internal().voicevox_tts(
unsafe { CStr::from_ptr(text) },
speaker_id,
output_binary_size,
output_wav,
));
let (output_opt, result_code) = {
if let Ok(text) = unsafe { CStr::from_ptr(text) }.to_str() {
convert_result(lock_internal().voicevox_tts(text, speaker_id as usize))
} else {
(None, VoicevoxResultCode::VOICEVOX_RESULT_INVALID_UTF8_INPUT)
}
};
if let Some(output) = output_opt {
unsafe {
output_binary_size.write(output.len() as c_int);
let wav_heap = libc::malloc(output.len());
libc::memcpy(wav_heap, output.as_ptr() as *const c_void, output.len());
output_wav.write(wav_heap as *mut u8);
}
}
result_code
}

Expand All @@ -272,9 +292,10 @@ pub extern "C" fn voicevox_tts_from_kana(
}

#[no_mangle]
pub extern "C" fn voicevox_wav_free(wav: *mut u8) -> VoicevoxResultCode {
let (_, result_code) = convert_result(lock_internal().voicevox_wav_free(wav));
result_code
pub extern "C" fn voicevox_wav_free(wav: *mut u8) {
unsafe {
libc::free(wav as *mut c_void);
}
}

#[no_mangle]
Expand Down
8 changes: 5 additions & 3 deletions crates/voicevox_core/src/engine/full_context_label.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,13 @@ static I3_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(@(\d+|xx)\+)").unwrap(
static J1_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(/J:(\d+|xx)_)").unwrap());

fn string_feature_by_regex(re: &Regex, label: &str) -> Result<String> {
re.find(label)
.map(|m| m.as_str().to_string())
.ok_or_else(|| FullContextLabelError::LabelParse {
if let Some(caps) = re.captures(label) {
Ok(caps.get(2).unwrap().as_str().to_string())
} else {
Err(FullContextLabelError::LabelParse {
label: label.into(),
})
}
}

#[allow(dead_code)]
Expand Down
1 change: 1 addition & 0 deletions crates/voicevox_core/src/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ mod synthesis_engine;
use super::*;

pub use acoustic_feature_extractor::*;
pub use full_context_label::*;
pub use model::*;
pub use synthesis_engine::*;
Loading

0 comments on commit ce9d36b

Please sign in to comment.