In [2]:
import torch

# 检查 GPU 是否可用
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# 创建一个随机张量并移动到 GPU（如果可用）
x = torch.rand(5, 5).to(device)
print("Random tensor on device:", x)

Using device: cuda:0
Random tensor on device: tensor([[0.0358, 0.6989, 0.2847, 0.6577, 0.4707],
        [0.5551, 0.4569, 0.6650, 0.7052, 0.6153],
        [0.9382, 0.4860, 0.5263, 0.8036, 0.2150],
        [0.1697, 0.7612, 0.6752, 0.4719, 0.1756],
        [0.4485, 0.2796, 0.3450, 0.4040, 0.8787]], device='cuda:0')


In [None]:
import ChatTTS
import torch
import torchaudio
import soundfile
import time
import torch
import random

module_name = "chatutil"


MODELPATH = "models/pzc163/chatTTS/asset"
 
class ChatTTSUtil:
    def __init__(self ,
                 modelPath = MODELPATH,
                 saveFilePath = "output/" ,
                 fixSpkStyle = True):
        # 初始化ChatTTSUtil类，设置模型路径、保存文件路径和是否固定说话风格
        self.modelPath = modelPath
        self.wavfilePath = saveFilePath
        self.fixSpkStyle = fixSpkStyle
        self.chat = ChatTTS.Chat()
        self.chat.load_models(local_path = modelPath)
        # 设置文本精炼参数
        self.params_refine_text = {"prompt": "[oral_0][laugh_0][break_0]"}
        # Config the speech style with random generation
        std , mean = torch.load(f"{MODELPATH}/spk_stat.pt").chunk(2)
        rand_spk = torch.randn(768) * std + mean
        self.params_infer_code = {
            "spk_emb": rand_spk,
            "temperature": .3,
            "top_P": 0.7,
            "top_K": 20,
            "prompt": "[speed_5]"
        }

    def setRefineTextConf(self , oralConf = "[oral_0]" , laughConf = "[laugh_0]" , breakConf = "[break_0]"):
        # 定义一个方法setRefineTextConf，用于设置文本精炼的配置
        # 参数oralConf默认值为"[oral_0]"，表示口语化配置
        # 参数laughConf默认值为"[laugh_0]"，表示笑声配置
        # 参数breakConf默认值为"[break_0]"，表示中断配置
        self.params_refine_text = {"prompt": f"{oralConf}{laughConf}{breakConf}"}

    def setInferCode(self , temperature = 0.3 , top_P = 0.7 , top_K = 20 , speed = "[speed_5]"):
        # 设置推理代码的参数
        # temperature: 控制生成文本的随机性，值越大，生成的文本越随机
        self.params_infer_code["temperature"] = temperature
        # top_P: 控制生成文本的多样性，值越大，生成的文本越多样
        self.params_infer_code["top_P"] = top_P
        # top_K: 控制生成文本的词汇量，值越大，生成的文本使用的词汇越多
        self.params_infer_code["top_K"] = top_K
        # speed: 控制生成文本的速度，这里使用了一个字符串表示速度等级
        self.params_infer_code["prompt"] = speed

    def generateSound(self , texts , savePath = "output/" , filePrefix = "output"):
        # 调用chat对象的infer方法，将文本转换为音频波形
        # texts: 要转换为音频的文本列表
        # use_decoder: 是否使用解码器
        # params_refine_text: 文本精炼参数
        # params_infer_code: 音频生成参数
        wavs = self.chat.infer(texts , use_decoder = True , params_refine_text = self.params_refine_text , params_infer_code = self.params_infer_code)
        # 初始化一个空列表，用于存储生成的音频文件路径
        wavFilePath = []
        # 遍历生成的音频波形列表
        for (index, wave) in enumerate(wavs):
            # 使用soundfile库将音频波形写入文件
            # 文件路径由savePath、filePrefix和索引组成
            # wave[0]表示音频数据，24000是采样率
            soundfile.write(f"{savePath}{filePrefix}{index}.wav" , wave[0] , 24000)
            # 将生成的音频文件路径添加到列表中
            wavFilePath.append(f"{savePath}{filePrefix}{index}.wav")
        # 返回生成的音频文件路径列表
        return wavFilePath

if __name__ == "__main__":
    chUtil = ChatTTSUtil()
    texts = [
        "大家好，我是Chat T T S，欢迎来到畅的科技工坊。",
        "太棒了，我竟然是第一位嘉宾。",
        "我是Chat T T S， 是专门为对话场景设计的文本转语音模型，例如大语言助手对话任务。我支持英文和中文两种语言。最大的模型使用了10万小时以上的中英文数据进行训练。目前在huggingface中的开源版本为4万小时训练且未S F T 的版本。",
    "耶，我们开始吧"
    ]
    chUtil.setInferCode(0.8 , 0.7 , 20 , speed = "[speed_5]")
    chUtil.generateSound(texts)


INFO:ChatTTS.core:Load from cache: C:\Users\23668/.cache/huggingface\hub/models--2Noise--ChatTTS/snapshots\1a3c04a8b0651689bd9242fbb55b1f4b5a9aef84
INFO:ChatTTS.core:use cuda:0
  vocos.load_state_dict(torch.load(vocos_ckpt_path, map_location=device))
INFO:ChatTTS.core:vocos loaded.
  dvae.load_state_dict(torch.load(dvae_ckpt_path, map_location=device))
INFO:ChatTTS.core:dvae loaded.
  gpt.load_state_dict(torch.load(gpt_ckpt_path, map_location="cpu"))
  self.pretrain_models["spk_stat"] = torch.load(spk_stat_path).to(device)
INFO:ChatTTS.core:gpt loaded.
  decoder.load_state_dict(torch.load(decoder_ckpt_path, map_location=device))
INFO:ChatTTS.core:decoder loaded.
  tokenizer = torch.load(tokenizer_path, map_location=device)
INFO:ChatTTS.core:tokenizer loaded.
INFO:ChatTTS.core:All initialized.
  std , mean = torch.load(f"{MODELPATH}/spk_stat.pt").chunk(2)
INFO:ChatTTS.core:All initialized.
 28%|██▊       | 106/384 [00:04<00:11, 24.19it/s]
 51%|█████     | 1035/2048 [00:41<00:40, 24.72it

In [4]:
from flask import Flask
import ChatTTS
import torch
import torchaudio
import soundfile
import time
import torch
import random


module_name = "chatutil"

MODELPATH = "models/pzc163/chatTTS/asset"

app = Flask(__name__)
 
class ChatTTSUtil:
    def __init__(self ,
                 modelPath = MODELPATH,
                 saveFilePath = "output/" ,
                 fixSpkStyle = True):
        # 初始化ChatTTSUtil类，设置模型路径、保存文件路径和是否固定说话风格
        self.modelPath = modelPath
        self.wavfilePath = saveFilePath
        self.fixSpkStyle = fixSpkStyle
        self.chat = ChatTTS.Chat()
        self.chat.load_models(local_path = modelPath)
        # 设置文本精炼参数
        self.params_refine_text = {"prompt": "[oral_0][laugh_0][break_0]"}
        # Config the speech style with random generation
        std , mean = torch.load(f"{MODELPATH}/spk_stat.pt").chunk(2)
        rand_spk = torch.randn(768) * std + mean
        self.params_infer_code = {
            "spk_emb": rand_spk,
            "temperature": .3,
            "top_P": 0.7,
            "top_K": 20,
            "prompt": "[speed_5]"
        }

    def setRefineTextConf(self , oralConf = "[oral_0]" , laughConf = "[laugh_0]" , breakConf = "[break_0]"):
        # 定义一个方法setRefineTextConf，用于设置文本精炼的配置
        # 参数oralConf默认值为"[oral_0]"，表示口语化配置
        # 参数laughConf默认值为"[laugh_0]"，表示笑声配置
        # 参数breakConf默认值为"[break_0]"，表示中断配置
        self.params_refine_text = {"prompt": f"{oralConf}{laughConf}{breakConf}"}

    def setInferCode(self , temperature = 0.3 , top_P = 0.7 , top_K = 20 , speed = "[speed_5]"):
        # 设置推理代码的参数
        # temperature: 控制生成文本的随机性，值越大，生成的文本越随机
        self.params_infer_code["temperature"] = temperature
        # top_P: 控制生成文本的多样性，值越大，生成的文本越多样
        self.params_infer_code["top_P"] = top_P
        # top_K: 控制生成文本的词汇量，值越大，生成的文本使用的词汇越多
        self.params_infer_code["top_K"] = top_K
        # speed: 控制生成文本的速度，这里使用了一个字符串表示速度等级
        self.params_infer_code["prompt"] = speed

    def generateSound(self , texts , savePath = "output/" , filePrefix = "output"):
        # 调用chat对象的infer方法，将文本转换为音频波形
        # texts: 要转换为音频的文本列表
        # use_decoder: 是否使用解码器
        # params_refine_text: 文本精炼参数
        # params_infer_code: 音频生成参数
        wavs = self.chat.infer(texts , use_decoder = True , params_refine_text = self.params_refine_text , params_infer_code = self.params_infer_code)
        # 初始化一个空列表，用于存储生成的音频文件路径
        wavFilePath = []
        # 遍历生成的音频波形列表
        for (index, wave) in enumerate(wavs):
            # 使用soundfile库将音频波形写入文件
            # 文件路径由savePath、filePrefix和索引组成
            # wave[0]表示音频数据，24000是采样率
            soundfile.write(f"{savePath}{filePrefix}{index}.wav" , wave[0] , 24000)
            # 将生成的音频文件路径添加到列表中
            wavFilePath.append(f"{savePath}{filePrefix}{index}.wav")
        # 返回生成的音频文件路径列表
        return wavFilePath
    
    
    
    
@app.route('/chat_out', methods=['POST', 'GET'])
def chat_out():
    chUtil = ChatTTSUtil()
    texts = [
        "大家好，我是Chat T T S，欢迎来到畅的科技工坊。",
        "太棒了，我竟然是第一位嘉宾。",
        "我是Chat T T S， 是专门为对话场景设计的文本转语音模型，例如大语言助手对话任务。我支持英文和中文两种语言。最大的模型使用了10万小时以上的中英文数据进行训练。目前在huggingface中的开源版本为4万小时训练且未S F T 的版本。",
    "耶，我们开始吧"
    ]
    out_chat = []
    chUtil.setInferCode(0.8 , 0.7 , 20 , speed = "[speed_3]")
    out_chat = chUtil.generateSound(texts)
    return out_chat

if __name__ == "__main__":
    app.run(host='0.0.0.0', port=5000)



 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.20.3.13:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


In [9]:
from minio import Minio
from minio.error import S3Error

def download_file(bucket_name, object_name, download_path):
    try:
        # 下载文件
        client.fget_object(bucket_name, object_name, download_path)
        print(f"File {object_name} downloaded successfully to {download_path}")
    except S3Error as err:
        print("Error occurred:", err)

# 配置 MinIO 客户端
client = Minio(
    endpoint="http://47.108.214.25:9000/",
    access_key="minioadmin",
    secret_key="minioadmin",
    secure=False
)

# 设置参数
bucket_name = "audios"        # 存储桶名称
object_name = "2025/02/04/be7af6d7cc2a402397be24984a65f39a.flac"      # 要下载的文件名
download_path = "output.mp3"  # 本地保存路径

# 调用下载函数
download_file(bucket_name, object_name, download_path)

ValueError: path in endpoint is not allowed

In [None]:
import requests

url = "http://47.108.214.25:9000/audios/2025/02/04/be7af6d7cc2a402397be24984a65f39a.flac"
output_path = "output.flac"  # 本地保存路径

try:
    response = requests.get(url)
    response.raise_for_status()  # 检查请求是否成功
    with open(output_path, "wb") as file:
        file.write(response.content)
    print(f"文件已成功下载到 {output_path}")
except requests.exceptions.RequestException as e:
    print(f"下载失败：{e}")
    print("请检查 URL 的合法性，确保网络连接正常，并确认服务器是否可用。")

文件已成功下载到 output.flac
