-
Notifications
You must be signed in to change notification settings - Fork 0
/
Vocab-Builder-TTS.py
171 lines (149 loc) · 7.68 KB
/
Vocab-Builder-TTS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import requests
import numpy as np
from pydub import AudioSegment
import simpleaudio as sa
from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QPushButton, QComboBox, QLabel, QTextEdit, QLineEdit, QMessageBox
from PyQt5.QtCore import Qt
from speechbrain.pretrained import Tacotron2, HIFIGAN
from elevenlabslib import *
import re
import sys
if sys.version_info < (3, 11):
sys.exit("This script requires Python 3.11 or later")
def merge_audio_files(file_paths, output_path):
combined = AudioSegment.empty()
for file_path in file_paths:
sound = AudioSegment.from_wav(file_path)
combined += sound
combined.export(output_path, format="mp3")
class WordDefinitionAndTTS:
def __init__(self, api_key, base_url, tts='Tacotron2', dictionary='merriam', tts2_api_key=None, gpu_opt="No GPU", openai_api_key=None):
self.api_key = api_key
self.openai_api_key = openai_api_key
self.base_url = base_url
self.tts = tts
self.dictionary = dictionary
self.tts2_api_key = tts2_api_key
self.gpu_opt = gpu_opt
def get_definition(self, word):
if self.dictionary == "gpt":
import openai
openai.api_key = self.openai_api_key
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are an expert GRE vocab tutor. The user will input a word and you will return the word , it's definition, an easily rememberable way to memorize the word, an easily understandable example sentence"},
{"role": "user", "content": word}
]
)
content = completion.choices[0].message["content"]
lines = content.split("\n")
definition = None
memorization_tip = None
example_sentence = None
for line in lines:
if line.startswith("Definition:"):
definition = line.split("Definition:")[1].strip()
elif line.startswith("Memorization tip:"):
memorization_tip = line.split("Memorization tip:")[1].strip()
elif line.startswith("Example sentence:"):
example_sentence = line.split("Example sentence:")[1].strip()
return definition + ". " + memorization_tip if definition and memorization_tip else None, example_sentence
else: # For 'merriam' dictionary
response = requests.get(f'{self.base_url}{word}?key={self.api_key}')
data = response.json()
if not isinstance(data, list) or len(data) == 0 or not isinstance(data[0], dict):
QMessageBox.warning(None, "Word not found or no definition available", f"Word not found or no definition available for: {word}")
return None, None
definitions = data[0].get('shortdef', [])
# We need to ensure that the 'def', 'sseq', 'dt' fields exist in the dictionary to avoid KeyError
if 'def' in data[0] and len(data[0]['def']) > 0 and 'sseq' in data[0]['def'][0] and len(data[0]['def'][0]['sseq']) > 0 and 'dt' in data[0]['def'][0]['sseq'][0][0][1] and len(data[0]['def'][0]['sseq'][0][0][1]['dt']) > 1 and len(data[0]['def'][0]['sseq'][0][0][1]['dt'][1][1]) > 0:
example_sentence = data[0]['def'][0]['sseq'][0][0][1]['dt'][1][1][0]['t']
example_sentence = example_sentence.replace('{it}', '').replace('{/it}', '')
return definitions[0], example_sentence
return None, None
def text_to_speech(self, text, word):
run_opts = {"device": "cuda"} if self.gpu_opt == "GPU" else {"device": "cpu"}
if self.tts == "Tacotron2":
#run_opts = {"device": "cuda"} if self.gpu_opt.currentText() == "GPU" else {"device": "cpu"}
tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts", run_opts=run_opts)
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder", run_opts=run_opts)
mel_output, mel_length, alignment = tacotron2.encode_text(text)
waveforms = hifi_gan.decode_batch(mel_output)
audio = waveforms.squeeze().numpy()
audio = (audio * 2**15).astype(np.int16)
sound = AudioSegment(audio.tobytes(), frame_rate=22050, sample_width=2, channels=1)
filename = f"{word.replace(' ', '_')}_TTS.wav"
sound.export(filename, format="wav")
wave_obj = sa.WaveObject.from_wave_file(filename)
play_obj = wave_obj.play()
play_obj.wait_done()
return filename # return the filename
elif self.tts == "ElevenLabs":
user = ElevenLabsUser(self.tts2_api_key)
voice = user.get_voices_by_name("Rachel")[0]
voice.generate_play_audio_v2(text, playbackOptions=PlaybackOptions(runInBackground=False))
def main(self, words):
file_paths = []
for word in words:
definition, example_sentence = self.get_definition(word)
if definition is None or example_sentence is None:
# Skip this word if it was not found
continue
text = f'Word: {word}. Definition: {definition}. Example: {example_sentence}'
file_path = self.text_to_speech(text, word)
file_paths.append(file_path)
merge_audio_files(file_paths, "output.mp3")
def run(self, text):
words = re.split(',|\n', text)
words = [word.strip() for word in words if word.strip() != '']
self.main(words)
class App(QWidget):
def __init__(self):
super().__init__()
self.dict_opt = QComboBox()
self.dict_opt.addItems(["merriam", "gpt"])
self.tts_opt = QComboBox()
self.tts_opt.addItems(["Tacotron2", "ElevenLabs"])
self.gpu_opt = QComboBox()
self.gpu_opt.addItems(["GPU", "No GPU"])
self.tts_api_key = QLineEdit()
self.merriam_api_key = QLineEdit()
self.openai_api_key = QLineEdit()
self.words_input = QTextEdit()
self.start_button = QPushButton('Start')
self.start_button.clicked.connect(self.start_processing)
layout = QVBoxLayout()
layout.addWidget(QLabel("Dictionary:"))
layout.addWidget(self.dict_opt)
layout.addWidget(QLabel("TTS Engine:"))
layout.addWidget(self.tts_opt)
layout.addWidget(QLabel("Run on:"))
layout.addWidget(self.gpu_opt)
layout.addWidget(QLabel("API Keys (TTS):"))
layout.addWidget(self.tts_api_key)
layout.addWidget(QLabel("API Keys (merriam):"))
layout.addWidget(self.merriam_api_key)
layout.addWidget(QLabel("API Keys (OpenAI):"))
layout.addWidget(self.openai_api_key)
layout.addWidget(QLabel("Enter words:"))
layout.addWidget(self.words_input)
layout.addWidget(self.start_button)
self.setLayout(layout)
def start_processing(self):
words = self.words_input.toPlainText()
tts_engine = self.tts_opt.currentText()
dictionary = self.dict_opt.currentText()
if tts_engine == "ElevenLabs":
tts_api_key = self.tts_api_key.text()
else:
tts_api_key = None
merriam_api_key = self.merriam_api_key.text()
gpu_opt = self.gpu_opt.currentText()
openai_api_key = self.openai_api_key.text()
word_definition_and_tts = WordDefinitionAndTTS(merriam_api_key, 'https://www.dictionaryapi.com/api/v3/references/thesaurus/json/', tts_engine, dictionary, tts_api_key, gpu_opt, openai_api_key)
word_definition_and_tts.run(words)
app = QApplication([])
window = App()
window.show()
app.exec_()