In [None]:
import os
import time
import json
import base64
import requests
from google.cloud import texttospeech_v1beta1
from IPython.display import clear_output as clear

In [None]:
top_dir = os.path.abspath('').replace('src', '')
key_dir = os.path.join(top_dir, 'data')
data_dir = os.path.join(os.path.dirname(os.path.dirname(top_dir)), 'conjugations', 'data', 'language-specific')
out_dir = os.path.join(top_dir, 'out')
mp3_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(top_dir))), 'server', 'src', 'static', 'audio', 'conjugations')

In [None]:
language = "portuguese"

In [None]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(key_dir, 'gckey.json')

In [None]:
def ttsSynthesize(phrase, language):

    voices = {
        'spanish': 'es-ES-Wavenet-B',
        'french': 'fr-FR-Wavenet-D',
        'german': 'de-DE-Wavenet-B',
        'italian': 'it-IT-Wavenet-C',
        'portuguese': 'pt-PT-Wavenet-B'
    }

    language_codes = {
        'spanish': 'es_ES',
        'french': 'fr_FR',
        'german': 'de_DE',
        'italian': 'it_IT',
        'portuguese': 'pt_PT'
    }

    client = texttospeech_v1beta1.TextToSpeechClient()

    input = texttospeech_v1beta1.SynthesisInput()
    input.text = phrase

    voice = texttospeech_v1beta1.VoiceSelectionParams()
    voice.language_code = language_codes[language]
    voice.name = voices[language]

    audio_config = texttospeech_v1beta1.AudioConfig()
    audio_config.audio_encoding = "MP3_64_KBPS"
    audio_config.sample_rate_hertz = 44100

    request = texttospeech_v1beta1.SynthesizeSpeechRequest(
        input = input,
        voice = voice,
        audio_config = audio_config,
    )

    response = client.synthesize_speech(request = request)

    return response.audio_content

    return str(base64.b64encode(response.audio_content))[2:-1]

In [None]:
with open(os.path.join(out_dir, f'audio_{language}.json'), 'r', encoding = 'utf8') as file:
    existing = json.loads(file.read())

In [None]:
with open(os.path.join(data_dir, f'conjugations_{language}.json'), 'r', encoding = 'utf8') as file:
    conjugations = json.loads(file.read())

conjugations = { verb: conjugations[verb] for i, verb in enumerate(conjugations) if i < 2100 }

In [None]:
collated_audio = {}

for verb in existing:
    collated_audio[verb] = set([verb])

    for complexity in existing[verb]:
        for mood in existing[verb][complexity]:
            for tense in existing[verb][complexity][mood]:
                for conjugation in existing[verb][complexity][mood][tense]:

                    if conjugation not in collated_audio[verb]:
                        collated_audio[verb].add( conjugation )

del existing

In [None]:
collated_conjugations = {}

for verb in conjugations:
    collated_conjugations[verb] = set([verb])

    for complexity in conjugations[verb]:
        if complexity in ['simple', 'compound', 'progressive']:
            for mood in conjugations[verb][complexity]:
                for tense in conjugations[verb][complexity][mood]:
                    for subject in conjugations[verb][complexity][mood][tense]:
                        conjugation = conjugations[verb][complexity][mood][tense][subject]
                        if conjugation not in collated_conjugations[verb]:
                            collated_conjugations[verb].add( conjugation )

In [None]:
folders_raw = os.listdir( os.path.join( mp3_dir, language ) )

if '.DS_Store' in folders_raw:
    folders_raw.remove('.DS_Store')

folders_sorted = sorted(folders_raw, key = lambda x: int(x.split('_')[0]))
folders = list(map(lambda x: x.split('_')[1], folders_sorted))

In [None]:
for conjugation in conjugations:
    if conjugation not in folders:
        print(conjugations[conjugation]['rank'], conjugation)

In [None]:
# most_recent = folders[-1]
# new_rank = conjugations[most_recent]['rank'] + 1
# num = 0

for verb in collated_conjugations:

    if verb in collated_audio and conjugations[verb]['rank'] >= conjugations['exportar']['rank']:

        missing = collated_conjugations[verb].difference( collated_audio[verb] )

        if missing:
            
            print('missing', verb, len(missing))

            for folder in os.listdir( os.path.join( mp3_dir, language ) ):
                if folder.split('_')[1] == verb:
                    rank = int(folder.split('_')[0])
                    break

            file_path = os.path.join(mp3_dir, language, f'{rank}_{verb}')

            for conjugation in missing:

                bin_audio = ttsSynthesize(conjugation, language)
                
                with open( os.path.join(file_path, f'{ conjugation.replace(" ", "_") }.mp3'), 'wb') as file:
                    file.write(bin_audio)


In [None]:
for verb in collated_conjugations:

    if verb not in collated_audio:
        
        rank = conjugations[verb]["rank"]
        file_path = os.path.join(mp3_dir, language, f'{rank}_{verb}')
        
        os.mkdir(file_path)

        for conjugation in collated_conjugations[verb]:
            
            bin_audio = ttsSynthesize(conjugation, language)
                            
            with open( os.path.join(file_path, f'{ conjugation.replace(" ", "_") }.mp3'), 'wb') as file:
                file.write(bin_audio)
            
            del bin_audio

        print('new', rank, verb)

----------------------------------------------------------------------------------------------

In [None]:
verb = 'aportar'

file_path = os.path.join(mp3_dir, language, f'{conjugations[verb]["rank"]}_{verb}')

os.mkdir(file_path)

for conjugation in collated_conjugations[verb]:
    
    bin_audio = ttsSynthesize(conjugation, language)
                    
    with open( os.path.join(file_path, f'{ conjugation.replace(" ", "_") }.mp3'), 'wb') as file:
        file.write(bin_audio)
    
    del bin_audio

In [None]:
for verb in conjugations:
    rank = conjugations[verb]['rank']
    new_path = os.path.join(mp3_dir, language, f'{rank}_{verb}')
    
    for folder in os.listdir( os.path.join(mp3_dir, language ) ):
        if folder != 'DS_Store' and folder.split('_')[1] == verb:
            os.rename(os.path.join(mp3_dir, language, folder), new_path)
            print(os.path.join(mp3_dir, language, folder), new_path)

In [None]:
folders_raw = os.listdir( os.path.join( mp3_dir, language ) )

if '.DS_Store' in folders_raw:
    folders_raw.remove('.DS_Store')

folders_sorted = sorted(folders_raw, key = lambda x: int(x.split('_')[0]))
folders = list(map(lambda x: x.split('_')[1], folders_sorted))

In [None]:
for i, folder in enumerate(folders_sorted):
    rank, verb = folder.split('_')
    if i + 1 != int(rank):
        print(verb)