In [1]:
import os, json, uuid, pathlib, torch, IPython.display as ipd
import ChatTTS

# === 配置区 ===
MODEL_PATH = '/root/OpenMic/models/ChatTTS'  # 修改为你的模型路径
DEVICE = 'cuda'  # 'cuda' or 'cpu'
SAVE_DIR = '/root/OpenMic/voices'  # 采样后保存的目录
os.makedirs(SAVE_DIR, exist_ok=True)

N_SAMPLES = 6  # 一次随机采样音色的数量
TEXT_SNIPPET = '大家好，这里是音色测试，请仔细听听音色特点。'

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 安装 ChatTTS 运行时补丁（防止 cache length 负数错误）
import sys
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from src.speech.chattts_patch import apply_chattts_patch
apply_chattts_patch()
print("ChatTTS cache-length patch installed")

ChatTTS cache-length patch installed


In [3]:
# 加载 ChatTTS
chat = ChatTTS.Chat()
chat.load(source='custom', custom_path=MODEL_PATH, device=DEVICE)
print('Model loaded')

Model loaded


In [4]:
def sample_speakers(n=6):
    spk_list = []
    for _ in range(n):
        spk = chat.sample_random_speaker()
        spk_list.append(spk)
    return spk_list

speakers = sample_speakers(N_SAMPLES)
print(f'Sampled {len(speakers)} speakers')

Sampled 6 speakers


In [5]:
# 对每个音色合成一段示例音频供试听
audios = []
for i, spk in enumerate(speakers):
    params_infer_code = ChatTTS.Chat.InferCodeParams(spk_emb=spk)
    wavs = chat.infer([TEXT_SNIPPET], params_infer_code=params_infer_code)
    audio = wavs[0] if wavs else None
    audios.append(audio)
    print(f'#{i} len={len(audio) if audio is not None else 0}')
    display(ipd.Audio(audio, rate=24000))

text:   7%|▋         | 27/384(max) [00:00, 36.37it/s]
code:   9%|▉         | 191/2048(max) [00:02, 68.15it/s]


#0 len=95096


text:   7%|▋         | 27/384(max) [00:00, 66.22it/s]
code:   9%|▉         | 188/2048(max) [00:02, 67.26it/s]

#1 len=95397





text:   7%|▋         | 27/384(max) [00:00, 67.50it/s]
code:   9%|▉         | 185/2048(max) [00:02, 67.49it/s]

#2 len=92385





text:   7%|▋         | 26/384(max) [00:00, 67.19it/s]
code:   9%|▉         | 184/2048(max) [00:02, 69.06it/s]

#3 len=93245





text:   7%|▋         | 26/384(max) [00:00, 67.73it/s]
code:   9%|▊         | 179/2048(max) [00:02, 67.90it/s]

#4 len=90761





text:   7%|▋         | 27/384(max) [00:00, 66.97it/s]
code:   9%|▉         | 191/2048(max) [00:02, 67.66it/s]

#5 len=96243





In [22]:
# 选择想要保存的音色索引，填写注释
SELECTED = [4]  # 修改为你要保存的索引列表
COMMENT = '男声：主持'  # 修改注释

def save_voice(idx_list, comment):
    paths = []
    for idx in idx_list:
        spk = speakers[idx]
        name = f'spk_{idx}_{uuid.uuid4().hex[:8]}'
        pt_path = pathlib.Path(SAVE_DIR) / f'{name}.pt'
        txt_path = pathlib.Path(SAVE_DIR) / f'{name}.txt'
        torch.save(spk, pt_path)
        txt_path.write_text(comment, encoding='utf-8')
        paths.append(str(pt_path))
        print(f'Saved {pt_path} with comment: {comment}')
    return paths

saved = save_voice(SELECTED, COMMENT)
saved

Saved /root/OpenMic/voices/spk_4_45102fed.pt with comment: 男声：主持


['/root/OpenMic/voices/spk_4_45102fed.pt']

In [None]:
# 如何在推理中加载自定义音色
def load_voice(path):
    return torch.load(path, map_location='cpu')

# 使用示例：
# custom_spk = load_voice('/root/OpenMic/models/voices/spk_xxxxx.pt')
# params = ChatTTS.Chat.InferCodeParams(spk_emb=custom_spk)
# wavs = chat.infer(['测试文本'], params_infer_code=params)

print('To use a saved voice: load it and pass as spk_emb in InferCodeParams.')