## 语音合成

In [None]:
# coding=utf-8
# Installation instructions for pyaudio:
# APPLE Mac OS X
#   brew install portaudio
#   pip install pyaudio
# Debian/Ubuntu
#   sudo apt-get install python-pyaudio python3-pyaudio
#   or
#   pip install pyaudio
# CentOS
#   sudo yum install -y portaudio portaudio-devel && pip install pyaudio
# Microsoft Windows
#   python -m pip install pyaudio

import pyaudio
import dashscope
from dashscope.audio.tts_v2 import *


from http import HTTPStatus
from dashscope import Generation
import os

# dashscope.api_key = os.environ['']
model = "cosyvoice-v1"
voice = "longxiaochun"


class Callback(ResultCallback):
    _player = None
    _stream = None

    def on_open(self):
        print("websocket is open.")
        self._player = pyaudio.PyAudio()
        self._stream = self._player.open(
            format=pyaudio.paInt16, channels=1, rate=22050, output=True, encoding='utf-8'
        )

    def on_complete(self):
        print("speech synthesis task complete successfully.")

    def on_error(self, message: str):
        print(f"speech synthesis task failed, {message}")

    def on_close(self):
        print("websocket is closed.")
        # stop player
        self._stream.stop_stream()
        self._stream.close()
        self._player.terminate()

    def on_event(self, message):
        print(f"recv speech synthsis message {message}")

    def on_data(self, data: bytes) -> None:
        print("audio result length:", len(data))
        self._stream.write(data)


def synthesizer_with_llm():
    callback = Callback()
    synthesizer = SpeechSynthesizer(
        model=model,
        voice=voice,
        format=AudioFormat.PCM_22050HZ_MONO_16BIT,
        callback=callback,
    )

    messages = [{"role": "user", "content": "请介绍一下你自己"}]
    responses = Generation.call(
        model="qwen-turbo",
        messages=messages,
        result_format="message",  # set result format as 'message'
        stream=True,  # enable stream output
        incremental_output=True,  # enable incremental output 
    )
    for response in responses:
        if response.status_code == HTTPStatus.OK:
            print(response.output.choices[0]["message"]["content"], end="")
            synthesizer.streaming_call(response.output.choices[0]["message"]["content"])
        else:
            print(
                "Request id: %s, Status code: %s, error code: %s, error message: %s"
                % (
                    response.request_id,
                    response.status_code,
                    response.code,
                    response.message,
                )
            )
    synthesizer.streaming_complete()
    print('requestId: ', synthesizer.get_last_request_id())


if __name__ == "__main__":
    synthesizer_with_llm()

In [2]:
%pip install pyaudio

Collecting pyaudio
  Using cached PyAudio-0.2.14-cp310-cp310-macosx_13_0_x86_64.whl
Installing collected packages: pyaudio
Successfully installed pyaudio-0.2.14

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


## 语音识别

In [2]:
# For prerequisites running the following sample, visit https://help.aliyun.com/document_detail/611472.html

import pyaudio
import dashscope
from dashscope.audio.asr import (Recognition, RecognitionCallback,
                                 RecognitionResult)

mic = None
stream = None

class Callback(RecognitionCallback):
    def on_open(self) -> None:
        global mic
        global stream
        print('RecognitionCallback open.')
        mic = pyaudio.PyAudio()
        stream = mic.open(format=pyaudio.paInt16,
                          channels=1,
                          rate=16000,
                          input=True,
                          encoding='utf-8')

    def on_close(self) -> None:
        global mic
        global stream
        print('RecognitionCallback close.')
        stream.stop_stream()
        stream.close()
        mic.terminate()
        stream = None
        mic = None

    def on_event(self, result: RecognitionResult) -> None:
        print('RecognitionCallback sentence: ', result.get_sentence())

callback = Callback()
recognition = Recognition(model='paraformer-realtime-v2',
                          format='pcm',
                          sample_rate=16000,
                          callback=callback)
recognition.start()

while True:
    if stream:
        data = stream.read(3200, exception_on_overflow = False)
        recognition.send_audio_frame(data)
    else:
        break

recognition.stop()

RecognitionCallback open.
RecognitionCallback sentence:  {'begin_time': 1950, 'end_time': None, 'text': '你', 'words': [{'begin_time': 1950, 'end_time': 2520, 'text': '你', 'punctuation': ''}]}
RecognitionCallback sentence:  {'begin_time': 1950, 'end_time': None, 'text': '你好', 'words': [{'begin_time': 1950, 'end_time': 3320, 'text': '你好', 'punctuation': ''}]}
RecognitionCallback sentence:  {'begin_time': 1950, 'end_time': None, 'text': '你好，你能听', 'words': [{'begin_time': 1950, 'end_time': 2658, 'text': '你好', 'punctuation': '，'}, {'begin_time': 2658, 'end_time': 3189, 'text': '你能', 'punctuation': ''}, {'begin_time': 3189, 'end_time': 3720, 'text': '听', 'punctuation': ''}]}
RecognitionCallback sentence:  {'begin_time': 1950, 'end_time': None, 'text': '你好，你能听到我', 'words': [{'begin_time': 1950, 'end_time': 2570, 'text': '你好', 'punctuation': '，'}, {'begin_time': 2570, 'end_time': 3086, 'text': '你能', 'punctuation': ''}, {'begin_time': 3086, 'end_time': 3602, 'text': '听到', 'punctuation': ''}, {'


KeyboardInterrupt



RecognitionCallback close.


In [None]:
import time
import threading
import queue
import sounddevice as sd
import numpy as np
import nls
import sys

# 阿里云配置信息
URL = "wss://nls-gateway-cn-shanghai.aliyuncs.com/ws/v1"
TOKEN = "016ca1620aff421da8fac81b9fb52dc5"  # 参考https://help.aliyun.com/document_detail/450255.html获取token
APPKEY = "ahS8ZDaimkpWALHi"  # 获取Appkey请前往控制台：https://nls-portal.console.aliyun.com/applist


# Queue to hold the recorded audio data
audio_queue = queue.Queue()

# Callback function to capture audio data
def audio_callback(indata, frames, time, status):
    if status:
        print(status, file=sys.stderr)
    audio_queue.put(indata.copy())

class RealTimeSpeechRecognizer:
    def __init__(self, url, token, appkey):
        self.url = url
        self.token = token
        self.appkey = appkey
        self.transcriber = None
        self.__initialize_transcriber()

    def __initialize_transcriber(self):
        self.transcriber = nls.NlsSpeechTranscriber(
            url=self.url,
            token=self.token,
            appkey=self.appkey,
            on_sentence_begin=self.on_sentence_begin,
            on_sentence_end=self.on_sentence_end,
            on_start=self.on_start,
            on_result_changed=self.on_result_changed,
            on_completed=self.on_completed,
            on_error=self.on_error,
            on_close=self.on_close,
            callback_args=[self]
        )
        self.transcriber.start(aformat="pcm", enable_intermediate_result=True,
                               enable_punctuation_prediction=True, enable_inverse_text_normalization=True)

    def send_audio(self, audio_data):
        if self.transcriber:
            self.transcriber.send_audio(audio_data)

    def stop_transcription(self):
        if self.transcriber:
            self.transcriber.stop()

    def on_sentence_begin(self, message, *args):
        print("Sentence begin: {}".format(message))

    def on_sentence_end(self, message, *args):
        print("Sentence end: {}".format(message))

    def on_start(self, message, *args):
        print("Start: {}".format(message))

    def on_result_changed(self, message, *args):
        print("Result changed: {}".format(message))

    def on_completed(self, message, *args):
        print("Completed: {}".format(message))

    def on_error(self, message, *args):
        print("Error: {}".format(message))

    def on_close(self, *args):
        print("Closed: {}".format(args))

# 调用阿里云的语音转文字的接口
def recognize_speech(audio_data, recognizer):
    audio_data = np.concatenate(audio_data)
    recognizer.send_audio(audio_data.tobytes())

# Start the audio stream and process audio data
def start_audio_stream(recognizer):
    with sd.InputStream(callback=audio_callback, channels=1, samplerate=16000, dtype='int16'):
        print("Recording audio... Press Ctrl+C to stop.")
        audio_buffer = []
        try:
            while True:
                while not audio_queue.empty():
                    audio_buffer.append(audio_queue.get())
                if len(audio_buffer) >= 10:  # 调整音频数据块大小
                    audio_data = np.concatenate(audio_buffer)
                    recognize_speech(audio_data, recognizer)
                    audio_buffer = []  # Clear buffer after sending
                time.sleep(0.1)
        except KeyboardInterrupt:
            print("Stopping audio recording.")
            recognizer.stop_transcription()

if __name__ == "__main__":
    recognizer = RealTimeSpeechRecognizer(URL, TOKEN, APPKEY)
    start_audio_stream(recognizer)



In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import base64
import hashlib
import hmac
import requests
import time
import uuid
from urllib import parse
class CosyClone:
    @staticmethod
    def _encode_text(text):
        encoded_text = parse.quote_plus(text)
        return encoded_text.replace('+', '%20').replace('*', '%2A').replace('%7E', '~')
    @staticmethod
    def _encode_dict(dic):
        keys = dic.keys()
        dic_sorted = [(key, dic[key]) for key in sorted(keys)]
        encoded_text = parse.urlencode(dic_sorted)
        return encoded_text.replace('+', '%20').replace('*', '%2A').replace('%7E', '~')
    @staticmethod
    def cosy_clone(access_key_id, access_key_secret, voicePrefix, audio_url):
        parameters = {'AccessKeyId': access_key_id,
                      'Action': 'CosyVoiceClone',
                      'Format': 'JSON',
                      'RegionId': 'cn-shanghai',
                      'SignatureMethod': 'HMAC-SHA1',
                      'SignatureNonce': str(uuid.uuid1()),
                      'SignatureVersion': '1.0',
                      'Timestamp': time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
                      'Version': '2019-08-19',
                      'VoicePrefix': voicePrefix,
                      'Url': audio_url,
                      }
        # 构造规范化的请求字符串
        query_string = CosyClone._encode_dict(parameters)
        print('规范化的请求字符串: %s' % query_string)
        # 构造待签名字符串
        string_to_sign = 'POST' + '&' + CosyClone._encode_text('/') + '&' + CosyClone._encode_text(query_string)
        print('待签名的字符串: %s' % string_to_sign)
        # 计算签名
        secreted_string = hmac.new(bytes(access_key_secret + '&', encoding='utf-8'),
                                   bytes(string_to_sign, encoding='utf-8'),
                                   hashlib.sha1).digest()
        signature = base64.b64encode(secreted_string)
        print('签名: %s' % signature)
        # 进行URL编码
        signature = CosyClone._encode_text(signature)
        print('URL编码后的签名: %s' % signature)
        # 调用服务
        full_url = 'https://nls-slp.cn-shanghai.aliyuncs.com/?Signature=%s&%s' % (signature, query_string)
        print('url: %s' % full_url)
        # 提交HTTP POST请求
        response = requests.post(full_url)
        print(response.text)
    
    @staticmethod
    def cosy_list(access_key_id, access_key_secret, voice_prefix, page_index=1, page_size=10):
        parameters = {'AccessKeyId': access_key_id,
                      'Action': 'ListCosyVoice',
                      'Format': 'JSON',
                      'RegionId': 'cn-shanghai',
                      'SignatureMethod': 'HMAC-SHA1',
                      'SignatureNonce': str(uuid.uuid1()),
                      'SignatureVersion': '1.0',
                      'Timestamp': time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
                      'Version': '2019-08-19',
                      'VoicePrefix': voice_prefix,
                      'PageIndex': page_index,
                      'PageSize': page_size,
                      }
        # 构造规范化的请求字符串
        query_string = CosyClone._encode_dict(parameters)
        print('规范化的请求字符串: %s' % query_string)
        # 构造待签名字符串
        string_to_sign = 'POST' + '&' + CosyClone._encode_text('/') + '&' + CosyClone._encode_text(query_string)
        print('待签名的字符串: %s' % string_to_sign)
        # 计算签名
        secreted_string = hmac.new(bytes(access_key_secret + '&', encoding='utf-8'),
                                   bytes(string_to_sign, encoding='utf-8'),
                                   hashlib.sha1).digest()
        signature = base64.b64encode(secreted_string)
        print('签名: %s' % signature)
        # 进行URL编码
        signature = CosyClone._encode_text(signature)
        print('URL编码后的签名: %s' % signature)
        # 调用服务
        full_url = 'https://nls-slp.cn-shanghai.aliyuncs.com/?Signature=%s&%s' % (signature, query_string)
        print('url: %s' % full_url)
        # 提交HTTP POST请求
        response = requests.post(full_url)
        print(response.text)


if __name__ == "__main__":
    # 用户信息
    access_key_id = os.getenv('ALIYUN_AK_ID')
    access_key_secret = os.getenv('ALIYUN_AK_SECRET')
    voice_prefix = 'your-voice-prefix'
    audio_url = 'your-audio-file-url'

    CosyClone.cosy_clone(access_key_id, access_key_secret, voice_prefix, audio_url)
    CosyClone.cosy_list(access_key_id, access_key_secret, voice_prefix)
   