In [1]:
import os
import bs4
import json
import time
import lxml
import base64
import requests
from google.cloud import texttospeech_v1beta1
from IPython.display import clear_output as clear

In [2]:
language = 'portuguese'

In [3]:
top_dir = os.path.abspath('').replace('corrections', '')
conjugations_data_dir = os.path.join(top_dir, 'conjugations', 'data', 'language-specific')
audio_data_dir = os.path.join(top_dir, 'audio', 'data')
audio_out_dir = os.path.join(top_dir, 'audio', 'out')

In [4]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(audio_data_dir, 'gckey.json')

In [5]:
class nestedDict(dict):
    def __missing__(self, key):
        value = self[key] = type(self)()
        return value

In [6]:
def ttsSynthesize(phrase, language_code, voice_name):
    client = texttospeech_v1beta1.TextToSpeechClient()

    input = texttospeech_v1beta1.SynthesisInput()
    input.text = phrase

    voice = texttospeech_v1beta1.VoiceSelectionParams()
    voice.language_code = language_code
    voice.name = voice_name

    audio_config = texttospeech_v1beta1.AudioConfig()
    audio_config.audio_encoding = "MP3_64_KBPS"
    audio_config.sample_rate_hertz = 44100

    request = texttospeech_v1beta1.SynthesizeSpeechRequest(
        input = input,
        voice = voice,
        audio_config = audio_config,
    )

    response = client.synthesize_speech(request = request)

    return str(base64.b64encode(response.audio_content))[2:-1]

In [7]:
voices = {
    'spanish': 'es-ES-Wavenet-B',
    'french': 'fr-FR-Wavenet-D',
    'german': 'de-DE-Wavenet-B',
    'italian': 'it-IT-Wavenet-C',
    'portuguese': 'pt-PT-Wavenet-B',
}

language_codes = {
    'spanish': 'es_ES',
    'french': 'fr_FR',
    'german': 'de_DE',
    'italian': 'it_IT',
    'portuguese': 'pt_PT'
}

In [8]:
with open(os.path.join(conjugations_data_dir, f'conjugations_{language}.json'), 'r', encoding = 'utf8') as file:
    conjugations = json.loads(file.read())

In [9]:
with open(os.path.join(audio_out_dir, f'audio_{language}.json'), 'r', encoding = 'utf8') as file:
    audio = json.loads(file.read())

In [10]:
new_audio = {}
no_audio = []

for verb in conjugations:
    if conjugations[verb]['rank'] < 2001:
        if verb in audio.keys():
            new_audio[verb] = audio[verb]
        else:
            no_audio.append(verb)

new_audio = nestedDict(new_audio)
current = conjugations[no_audio[0]]['rank']

print(len(no_audio))

105


In [11]:
while len(new_audio) < 2000:
    
    try:
        for verb in no_audio:
            if conjugations[verb]['rank'] == current:
                for complexity in ['simple', 'compound', 'progressive']:
                    if complexity in conjugations[verb]:
                        for mood in conjugations[verb][complexity]:
                            for tense in conjugations[verb][complexity][mood]:

                                collated = {}

                                for subject in conjugations[verb][complexity][mood][tense]:
                                    
                                    conjugation = conjugations[verb][complexity][mood][tense][subject]

                                    if conjugation not in collated.keys():
                                        collated[conjugation] = ttsSynthesize(conjugation, language_codes[language], voices[language])
                                
                                new_audio[verb][complexity][mood][tense] = collated
                
                clear(); print(f'{verb} ({conjugations[verb]["rank"]}) – {round(current / 20, 3)}%')
                
                current += 1

    except Exception as ex:
        clear(); print(f'ERROR {ex} – SLEEEPING FOR 2 MINUTES')
        time.sleep(120)

afazer (2000) – 100.0%


In [12]:
if len(new_audio) == 2000:
    with open(os.path.join(audio_out_dir, f'audio_{language}.json'), 'w', encoding = 'utf8') as file:
        json.dump(new_audio, file, indent = 4, ensure_ascii = False)