In [3]:
# For prerequisites running the following sample, visit https://help.aliyun.com/document_detail/xxxxx.html

import pyaudio
import dashscope
from dashscope.audio.asr import *
dashscope.api_key = "sk-615616fb539749dda57c80cc0928669d"

# 若没有将API Key配置到环境变量中，需将your-api-key替换为自己的API Key

mic = None
stream = None

class Callback(TranslationRecognizerCallback):
    def on_open(self) -> None:
        global mic
        global stream
        print("TranslationRecognizerCallback open.")
        mic = pyaudio.PyAudio()
        stream = mic.open(
            format=pyaudio.paInt16, channels=1, rate=16000, input=True
        )

    def on_close(self) -> None:
        global mic
        global stream
        print("TranslationRecognizerCallback close.")
        stream.stop_stream()
        stream.close()
        mic.terminate()
        stream = None
        mic = None

    def on_event(
        self,
        request_id,
        transcription_result: TranscriptionResult,
        translation_result: TranslationResult,
        usage,
    ) -> None:
        print("request id: ", request_id)
        print("usage: ", usage)
        if translation_result is not None:
            print(
                "translation_languages: ",
                translation_result.get_language_list(),
            )
            english_translation = translation_result.get_translation("en")
            print("sentence id: ", english_translation.sentence_id)
            print("translate to english: ", english_translation.text)
            if english_translation.stash is not None:
                print(
                    "translate to english stash: ",
                    translation_result.get_translation("en").stash.text,
                )
        if transcription_result is not None:
            print("sentence id: ", transcription_result.sentence_id)
            print("transcription: ", transcription_result.text)
            if transcription_result.stash is not None:
                print("transcription stash: ", transcription_result.stash.text)


callback = Callback()


translator = TranslationRecognizerRealtime(
    model="gummy-realtime-v1",
    format="pcm",
    sample_rate=16000,
    transcription_enabled=True,
    translation_enabled=True,
    translation_target_languages=["en"],
    callback=callback,
)
translator.start()
print("请您通过麦克风讲话体验实时语音识别和翻译功能")
while True:
    if stream:
        data = stream.read(3200, exception_on_overflow=False)
        translator.send_audio_frame(data)
    else:
        break

translator.stop()

TranslationRecognizerCallback open.
请您通过麦克风讲话体验实时语音识别和翻译功能
request id:  2314248b41284b24b64b8b1a56bf5332
usage:  {}
translation_languages:  ['en']
sentence id:  0
translate to english:  You want
sentence id:  0
transcription:  你要
request id:  2314248b41284b24b64b8b1a56bf5332
usage:  {}
translation_languages:  ['en']
sentence id:  0
translate to english:  What you have to do
sentence id:  0
transcription:  你要做的事情。
request id:  2314248b41284b24b64b8b1a56bf5332
usage:  {'duration': 28}
translation_languages:  ['en']
sentence id:  0
translate to english:  What you have to do.
sentence id:  0
transcription:  你要做的事情。
request id:  2314248b41284b24b64b8b1a56bf5332
usage:  {}
translation_languages:  ['en']
sentence id:  1
translate to english:  You said
sentence id:  1
transcription:  你要说的
request id:  2314248b41284b24b64b8b1a56bf5332
usage:  {}
translation_languages:  ['en']
sentence id:  1
translate to english:  What you have to do is
sentence id:  1
transcription:  你要做的事情
request id:  231424

KeyboardInterrupt: 

In [6]:
# coding=utf-8

import dashscope
from dashscope.audio.tts_v2 import *

from datetime import datetime

def get_timestamp():
    now = datetime.now()
    formatted_timestamp = now.strftime("[%Y-%m-%d %H:%M:%S.%f]")
    return formatted_timestamp

# 若没有将API Key配置到环境变量中，需将your-api-key替换为自己的API Key
# dashscope.api_key = "your-api-key"

model = "cosyvoice-v1"
voice = "longxiaochun"


class Callback(ResultCallback):
    _player = None
    _stream = None

    def on_open(self):
        self.file = open("output.mp3", "wb")
        print(get_timestamp() + " websocket is open.")

    def on_complete(self):
        print(get_timestamp() + " speech synthesis task complete successfully.")

    def on_error(self, message: str):
        print(f"speech synthesis task failed, {message}")

    def on_close(self):
        print(get_timestamp() + " websocket is closed.")
        self.file.close()

    def on_event(self, message):
        pass

    def on_data(self, data: bytes) -> None:
        print(get_timestamp() + " audio result length: " + str(len(data)))
        self.file.write(data)


callback = Callback()

synthesizer = SpeechSynthesizer(
    model=model,
    voice=voice,
    callback=callback,
)

synthesizer.call("今天天气怎么样？")
print('[Metric] requestId: {}, first package delay ms: {}'.format(
    synthesizer.get_last_request_id(),
    synthesizer.get_first_package_delay()))

[2025-03-10 23:30:26.297564] websocket is open.
[Metric] requestId: d427464109fd43d4a8879fa1aad50ddd, first package delay ms: -1741620626056.8052


[2025-03-10 23:30:26.717828] audio result length: 1716
[2025-03-10 23:30:26.717828] audio result length: 2926
[2025-03-10 23:30:26.717828] audio result length: 2926
[2025-03-10 23:30:26.741314] audio result length: 2926
[2025-03-10 23:30:26.743954] audio result length: 2925
[2025-03-10 23:30:26.743954] audio result length: 2926
[2025-03-10 23:30:26.743954] audio result length: 2926
[2025-03-10 23:30:26.743954] audio result length: 1671
[2025-03-10 23:30:26.743954] audio result length: 2090
[2025-03-10 23:30:26.751592] speech synthesis task complete successfully.
[2025-03-10 23:30:26.751592] websocket is closed.
