In [1]:
import os
import time
import json
import base64
import requests
from google.cloud import texttospeech_v1beta1
from IPython.display import clear_output as clear

In [2]:
top_dir = os.path.abspath('').replace('src', '')
data_dir = os.path.join(top_dir, 'data')
out_dir = os.path.join(top_dir, 'out')

In [3]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(data_dir, 'gckey.json')

In [4]:
class nestedDict(dict):
    def __missing__(self, key):
        value = self[key] = type(self)()
        return value

In [5]:
with open(os.path.join(data_dir, 'conjugations.json'), 'r', encoding = 'utf8') as file:
    conjugations = json.loads(file.read())

In [6]:
audio = nestedDict(dict())

In [7]:
def ttsSynthesize(phrase, language_code, voice_name):
    client = texttospeech_v1beta1.TextToSpeechClient()

    input = texttospeech_v1beta1.SynthesisInput()
    input.text = phrase

    voice = texttospeech_v1beta1.VoiceSelectionParams()
    voice.language_code = language_code
    voice.name = voice_name

    audio_config = texttospeech_v1beta1.AudioConfig()
    audio_config.audio_encoding = "MP3_64_KBPS"
    audio_config.sample_rate_hertz = 44100

    request = texttospeech_v1beta1.SynthesizeSpeechRequest(
        input = input,
        voice = voice,
        audio_config = audio_config,
    )

    response = client.synthesize_speech(request = request)

    return str(base64.b64encode(response.audio_content))[2:-1]

In [8]:
language = 'spanish'
current = 501

In [9]:
voices = {
    'spanish': 'es-ES-Wavenet-B',
    'french': 'fr-FR-Wavenet-D',
    'german': 'de-DE-Wavenet-B',
    'italian': 'it-IT-Wavenet-C',
    'portuguese': 'pt-PT-Wavenet-B',
}

In [10]:
language_codes = {
    'spanish': 'es_ES',
    'french': 'fr_FR',
    'german': 'de_DE',
    'italian': 'it_IT',
    'portuguse': 'pt_PT'
}

In [11]:
with open(os.path.join(out_dir, f'audio_{language}.json'), 'r', encoding = 'utf8') as file:
    audio = nestedDict(json.loads(file.read()))

In [12]:
while len(audio) < 2000:
    
    try:
        for verb in conjugations[language]:
            if conjugations[language][verb]['rank'] == current:
                for complexity in ['simple', 'compound', 'progressive']:
                    if complexity in conjugations[language][verb]:
                        for mood in conjugations[language][verb][complexity]:
                            for tense in conjugations[language][verb][complexity][mood]:

                                collated = {}

                                for subject in conjugations[language][verb][complexity][mood][tense]:
                                    
                                    conjugation = conjugations[language][verb][complexity][mood][tense][subject]

                                    if conjugation not in collated.keys():
                                        collated[conjugation] = ttsSynthesize(conjugation, language_codes[language], voices[language])
                                
                                audio[verb][complexity][mood][tense] = collated
                
                if conjugations[language][verb]['rank'] % 100 == 0:
                    with open(os.path.join(out_dir, f'audio_{language}.json'), 'w', encoding = 'utf8') as file:
                        json.dump(audio, file, indent = 4, ensure_ascii = False)
                
                clear(); print(f'{verb} ({conjugations[language][verb]["rank"]}) – {round(current / 20, 3)}%')
                
                current += 1

    except Exception as ex:
        clear(); print(f'ERROR {ex} – SLEEEPING FOR 2 MINUTES')
        time.sleep(120)

aventar (2000) – 100.0%


In [13]:
with open(os.path.join(out_dir, f'audio_{language}.json'), 'r') as file:
    audio_loaded = json.loads(file.read())

audio = []
rank = 1

for verb in audio_loaded:
    audio.append(
        {
            '_id': f'spanish_{verb}',
            'language': language,
            'verb': verb,
            'rank': rank,
            'conjugations': audio_loaded[verb]
        }
    )

    rank += 1

with open(os.path.join(out_dir, f'audio_array_{language}.json'), 'w', encoding = 'utf8') as file:
    json.dump(audio, file, indent = 4, ensure_ascii = False)