Skip to content

Commit

Permalink
ずんだもんのID修正漏れ対応 (#154)
Browse files Browse the repository at this point in the history
* fix requirements.txt

* READMEのモデル配置URLの変更等

モデル配置URLの変更、dataset内のファイル構成を実際に作成される内容に則した物に変更

* PIXIV FANBOX問い合わせ先追加

READMEにPIXIV FANBOXの問い合わせ先を追加

* ずんだもんのID修正漏れ対応

・ID100の場合にずんだもん指定だった状態を修正(現在は101がずんだもん)
・IDの値をプログラムの最初に変数定義する表記に変更
・multi_speakerの内容を現在の上限等に合わせた物に変更
  • Loading branch information
pipolll committed Jan 18, 2024
1 parent 3940993 commit 8f8232a
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 18 deletions.
29 changes: 19 additions & 10 deletions create_dataset_jtalk.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
import pyopenjtalk
import json

MY_ID = 0
ZUNDAMON_ID = 101
SORA_ID = 102
METHANE_ID = 103
TSUMUGI_ID = 104
KIRITAN_ID = 106

MAX_ID = 255

def mozi2phone(mozi):
text = pyopenjtalk.g2p(mozi)
text = "sil " + text + " sil"
Expand Down Expand Up @@ -58,7 +67,7 @@ def create_dataset(filename):
counter = counter +1
Correspondence_list.append(str(speaker_id)+"|"+os.path.basename(d) + "\n")
speaker_id = speaker_id + 1
if speaker_id > 255:
if speaker_id > MAX_ID:
break

for d in textless_dir_list:
Expand Down Expand Up @@ -103,7 +112,7 @@ def create_dataset_zundamon(filename):

#set list wav and text
#myvoice
speaker_id = 0
speaker_id = MY_ID
d = my_path
wav_file_list = glob.glob(d + "/wav/*.wav")
lab_file_list = glob.glob(d + "/text/*.txt")
Expand All @@ -127,7 +136,7 @@ def create_dataset_zundamon(filename):
counter = counter +1
Correspondence_list.append(str(speaker_id)+"|"+os.path.basename(d) + "\n")

speaker_id = 101
speaker_id = ZUNDAMON_ID
d = zundamon_path
wav_file_list = glob.glob(d + "/wav/*.wav")
lab_file_list = glob.glob(d + "/text/*.txt")
Expand Down Expand Up @@ -175,7 +184,7 @@ def create_dataset_zundamon(filename):
f.writelines(output_file_list_val_textless)
with open('filelists/' + filename + '_Correspondence.txt', 'w', encoding='utf-8', newline='\n') as f:
f.writelines(Correspondence_list)
return 255
return MAX_ID

def create_dataset_character(filename, tid):
textful_dir_list = glob.glob("dataset/textful/*")
Expand All @@ -193,7 +202,7 @@ def create_dataset_character(filename, tid):

#set list wav and text
#myvoice
speaker_id = 0
speaker_id = MY_ID
d = my_path
wav_file_list = glob.glob(d + "/wav/*.wav")
lab_file_list = glob.glob(d + "/text/*.txt")
Expand Down Expand Up @@ -265,7 +274,7 @@ def create_dataset_character(filename, tid):
f.writelines(output_file_list_val_textless)
with open('filelists/' + filename + '_Correspondence.txt', 'w', encoding='utf-8', newline='\n') as f:
f.writelines(Correspondence_list)
return 255
return MAX_ID

def create_dataset_multi_character(filename, file_path):
Correspondence_list = list()
Expand Down Expand Up @@ -311,7 +320,7 @@ def create_dataset_multi_character(filename, file_path):
f.writelines(output_file_list_val_textless)
with open('filelists/' + filename + '_Correspondence.txt', 'w', encoding='utf-8', newline='\n') as f:
f.writelines(Correspondence_list)
return 255
return MAX_ID

def main():
parser = argparse.ArgumentParser()
Expand All @@ -320,17 +329,17 @@ def main():
parser.add_argument('-s', '--sr', type=int, default=24000,
help='sampling rate (default = 24000)')
parser.add_argument('-t', '--target', type=int, default=9999,
help='pre_traind targetid (zundamon = 100, sora = 101, methane = 102, tsumugi = 103)')
help='pre_traind targetid (zundamon = {ZUNDAMON_ID}, sora = {SORA_ID}, methane = {METHANE_ID}, tsumugi = {TSUMUGI_ID}, kiritan = {KIRITAN_ID})')
parser.add_argument('-m', '--multi_target', type=str, default=None,
help='pre_traind targetid (zundamon = 100, sora = 101, methane = 102, tsumugi = 103)')
help='pre_traind targetid (zundamon = {ZUNDAMON_ID}, sora = {SORA_ID}, methane = {METHANE_ID}, tsumugi = {TSUMUGI_ID}, kiritan = {KIRITAN_ID})')
parser.add_argument('-c', '--config', type=str, default="./configs/baseconfig.json",
help='JSON file for configuration')
args = parser.parse_args()
filename = args.filename
print(filename)
if args.multi_target != None:
n_spk = create_dataset_multi_character(filename, args.multi_target)
elif args.target != 9999 and args.target == 100:
elif args.target != 9999 and args.target == ZUNDAMON_ID:
n_spk = create_dataset_zundamon(filename)
elif args.target != 9999:
n_spk = create_dataset_character(filename, args.target)
Expand Down
18 changes: 11 additions & 7 deletions dataset/multi_speaker_correspondence.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
00_myvoice|107
01_target|108
02_target|109
03_target|0
04_target|1
05_target|2
1205_zundamon|100
00_myvoice|0
01_target|1
02_target|2
03_target|252
04_target|253
05_target|254
1205_zundamon|101
912_sora|102
459_metahne|103
344_tsumugi|104
106_kiritan|106
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v1.3.2.10
v1.3.2.11

0 comments on commit 8f8232a

Please sign in to comment.