/
deepgram_text_to_speech.py
91 lines (72 loc) · 2.05 KB
/
deepgram_text_to_speech.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#
# This demo will join a Daily meeting and, given a text file with senteces (one
# per line), will translate text into audio using Deepgram's Text-To-Speech API
# and will send it into the meeting.
#
# The demo requires a Deepgram API key set in the DG_API_KEY environment variable.
#
# See https://developers.deepgram.com/docs/text-to-speech
#
# Usage: python3 deepgram_speech_to_text.py -m MEETING_URL -i FILE
#
import argparse
import os
import time
from daily import *
from deepgram import (
DeepgramClient,
SpeakOptions,
)
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--meeting", required=True, help="Meeting URL")
parser.add_argument(
"-i",
"--input",
required=True,
help="File with sentences (one per line)")
args = parser.parse_args()
Daily.init()
# We create a virtual microphone device so we can read audio samples from the
# meeting.
microphone = Daily.create_microphone_device(
"my-mic", sample_rate=16000, channels=1)
client = CallClient()
print()
print(f"Joining {args.meeting} ...")
# Join and tell our call client that we will be using our new virtual
# microphone.
client.join(args.meeting, client_settings={
"inputs": {
"microphone": {
"isEnabled": True,
"settings": {
"deviceId": "my-mic"
}
}
}
})
# Make sure we are joined. It would be better to use join() completion
# callback.
time.sleep(3)
sentences_file = open(args.input, "r")
deepgram = DeepgramClient(api_key=os.getenv("DG_API_KEY"))
speak_options = SpeakOptions(
model="aura-asteria-en",
encoding="linear16",
sample_rate="16000",
container="none"
)
print()
for sentence in sentences_file.readlines():
print(f"Processing: {sentence.strip()}")
print()
speak_source = {
"text": sentence.strip()
}
response = deepgram.speak.v("1").stream(speak_source, speak_options)
# Send all the audio frames to the microphone.
microphone.write_frames(response.stream.read())
# Let everything finish
time.sleep(2)
client.leave()
client.release()