In [1]:
import os
import json
import shutil

In [None]:
def formatConjugations( conjugations ):
    output = {}

    for key in conjugations:
        if type(conjugations[key]) == dict:
            output[key] = conjugations[key]

    return output

In [None]:
def formatWeightedTranslations( translations, principal ):
    del translations[principal]
    return [( principal, 1.0 )] + list(translations.items())[:9]

In [None]:
translations_path = os.path.join( os.path.abspath(''), 'translations', 'out', 'normalized' )
conjugations_path = os.path.join( os.path.abspath(''), 'conjugations', 'data', 'language-specific' )
audio_path = os.path.join( os.path.abspath('').replace('.development', ''), 'server', 'src', 'static', 'audio', 'conjugations' )

In [None]:
language = 'portuguese'

In [None]:
with open( os.path.join( translations_path, f'translations_{language}.json' ), 'r', encoding = 'utf-8' ) as file:
    translations = json.load( file )

with open( os.path.join( conjugations_path, f'conjugations_{language}.json' ), 'r', encoding = 'utf-8' ) as file:
    conjugations = json.load( file )

conjugations = { verb: conjugations[verb] for verb in conjugations if conjugations[verb]['rank'] < 2101 }

In [None]:
output = {}
rank = 1
no_translations = []

for verb in conjugations:
    if verb in translations:
        output[verb] = {
            'infinitive': verb,
            'rank': rank,
            'regularity': conjugations[verb]['regularity'],
            'translations': { 
                'principal': translations[verb]['principal'],
                # 'weighted': {
                #     translations[verb]['principal']: translations[verb]['weighted'][translations[verb]['principal']],
                #     **translations[verb]['weighted']
                # }
                'weighted': formatWeightedTranslations( translations[verb]['weighted'], translations[verb]['principal'] )
            },
            'conjugations': { 
                **formatConjugations( conjugations[verb] ),
                'participle': conjugations[verb]['participle'] 
            }
        }

        rank += 1
    
    else:
        no_translations.append( verb )

print( f'{no_translations} are invalid' )

In [None]:
folders = os.listdir( os.path.join( audio_path, language ) )
complete = []
no_audio = []

for v, verb in enumerate(output):
    for f, folder in enumerate(folders):
        folder_rank, infinitive = folder.split('_')

        if infinitive == verb:
            if int(folder_rank) != output[verb]['rank']:
                os.rename( os.path.join( audio_path, language, folder ), os.path.join( audio_path, language, f'{output[verb]["rank"]}_{verb}' ) )
                print( os.path.join( audio_path, language, folder ), os.path.join( audio_path, language, f'{output[verb]["rank"]}_{verb}' ) )

            complete.append( folder )
            break

        if f == len(folders) - 1:
            no_audio.append( verb )
            
for folder in complete:
    folders.remove( folder )

print( f'{folders} needs to be deleted' )
print( f'{no_audio} have no audio' )

In [None]:
for folder in folders:
    path = os.path.join( audio_path, language, folder )
    os.chmod(path, 0o777)
    shutil.rmtree( path )

In [None]:
out_array = []

for verb in output:
    out_array.append( { 'id': f'{ language }_{ verb }', **output[verb] } )

In [None]:
out_array.sort( key = lambda x: x['rank'] )

In [None]:
with open( os.path.join( os.getcwd(), f'{language}.json' ), 'w', encoding = 'utf-8' ) as file:
    json.dump( out_array, file, ensure_ascii = False, indent = 4 )

In [2]:
collated = []

for language in [ 'spanish', 'french', 'german', 'italian', 'portuguese']:
    with open( os.path.join( os.getcwd(), f'{language}.json' ), 'r', encoding = 'utf-8' ) as file:
        data = json.load( file )

        collated.extend( [ document for document in data if document['rank'] <= 2000 ] )

with open( os.path.join( os.getcwd(), f'all-languages.json' ), 'w', encoding = 'utf-8' ) as file:
    json.dump( collated, file, ensure_ascii = False, indent = 4 )        

In [12]:
infinitives_array = {}
infinitives_object = {}

for verb in collated:
    
    language, infinitive = verb['id'].split('_')

    if language not in infinitives_array:
        infinitives_array[language] = []
        infinitives_object[language] = {}

    if verb['rank'] <= 2000:
        infinitives_array[language].append( ( infinitive, verb['rank'], verb['regularity'] ) )
        infinitives_object[language][infinitive] = verb['rank']

with open( os.path.join( os.getcwd(), f'infinitives-array.json' ), 'w', encoding = 'utf-8' ) as file:
    json.dump( infinitives_array, file, ensure_ascii = False, indent = 4 ) 

with open( os.path.join( os.getcwd(), f'infinitives-object.json' ), 'w', encoding = 'utf-8' ) as file:
    json.dump( infinitives_object, file, ensure_ascii = False, indent = 4 ) 

In [2]:
import json
with open( './data_array.json', 'r', encoding = 'utf-8' ) as file:
    data = json.load( file )

In [5]:
data[0]

{'_id': 'spanish_ser',
 'language': 'spanish',
 'verb': 'ser',
 'rank': 1,
 'conjugations': {'infinitive': 'ser',
  'rank': 1,
  'regularity': 'i',
  'participle': {'present': 'siendo', 'past': 'sido'},
  'simple': {'indicative': {'present': {'yo': 'soy',
     'tu': 'eres',
     'el': 'es',
     'ella': 'es',
     'usted': 'es',
     'nosotros': 'somos',
     'vosotros': 'sois',
     'ellos': 'son',
     'ellas': 'son',
     'ustedes': 'son'},
    'preterite': {'yo': 'fui',
     'tu': 'fuiste',
     'el': 'fue',
     'ella': 'fue',
     'usted': 'fue',
     'nosotros': 'fuimos',
     'vosotros': 'fuisteis',
     'ellos': 'fueron',
     'ellas': 'fueron',
     'ustedes': 'fueron'},
    'imperfect': {'yo': 'era',
     'tu': 'eras',
     'el': 'era',
     'ella': 'era',
     'usted': 'era',
     'nosotros': 'éramos',
     'vosotros': 'erais',
     'ellos': 'eran',
     'ellas': 'eran',
     'ustedes': 'eran'},
    'conditional': {'yo': 'sería',
     'tu': 'serías',
     'el': 'sería',
   