In [1]:
from pinyin import get as get_pinyin
import translators
import json 
import time
import jieba

language = ''
language = 'vi'
target_lang = 'en'

chars = set()
food_pleco_data_file = f'food_pleco_data.json'


### Prepare dictionary data and translations

In [2]:
from deep_translator import GoogleTranslator

READ_FOOD_LIST = False
TRANSLATE_FOOD_LIST = True

# Open the file in read mode
MAX_COUNT = 10000
count = 0
word_dict = {}


with open(food_pleco_data_file, 'r', encoding='utf-8') as fread:
    word_dict = json.load(fread)

if READ_FOOD_LIST:
        
    with open('Foodlist.txt', 'r', encoding='utf-8') as file:
        next(file)
        # Iterate through each line in the file
        for line in file:
            line = line.strip()

            if not line:
                continue

            count += 1

            if count > MAX_COUNT:
                break
            
            # Split the line into four variables
            chinese, pinyin, translation = line.split('\t')

            chars.update(list(chinese))

            # Do something with the variables, for example, print them
            # print(f"Chinese: {chinese}, Pinyin: {pinyin}, English: {english}")

            word_dict[chinese] = {'pinyin': get_pinyin(chinese), 'chinese': [], 'english': translation}
            
            seg_list = list(jieba.cut(chinese, cut_all=False))

            if len(seg_list) == 1:
                seg_list = []

            for char in set((list(chinese)+seg_list)):
                if char not in word_dict:
                    word_dict[char] = {'pinyin': get_pinyin(char), 'chinese': [chinese]}
                else:
                    word_dict[char]['chinese'].append(chinese)

if TRANSLATE_FOOD_LIST:
    languages = ['english', 'vietnamese']
    for i, word in enumerate(word_dict):
        
        for lang in languages:
            if lang in word_dict[word]:
                print(f'Already has translation: {word_dict[word][lang]}')
            else:
                translation = GoogleTranslator(source='auto', target=lang[:2]).translate(word)  
                
                print(f'Translating to {lang} {i+1}/{len(word_dict)} {word} - {translation}')

                word_dict[word][lang] = translation

                time.sleep(1)

with open(food_pleco_data_file, 'w', encoding='utf-8') as fwrite:
    json.dump(word_dict, fwrite, indent = 4, ensure_ascii=False)

print(f'Data file written {food_pleco_data_file}')

Already has translation: Chinese noodles
Already has translation: Bún
Already has translation: noodle
Already has translation: bún
Already has translation: strip
Already has translation: dải
Already has translation: Chow mein
Already has translation: Mi xào
Already has translation: saute
Already has translation: áp chảo
Already has translation: Noodle soup
Already has translation: hủ tiếu
Already has translation: Soup
Already has translation: Canh
Already has translation: Zhajiangmian
Already has translation: Mi xào
Already has translation: fry
Already has translation: chiên rán
Already has translation: Sauce
Already has translation: Nước xốt
Already has translation: Lamian
Already has translation: Mì kéo tay
Already has translation: pull
Already has translation: sự lôi kéo
Already has translation: White Rice
Already has translation: cơm
Already has translation: rice
Already has translation: cơm
Already has translation: meal
Already has translation: bữa ăn
Already has translation: Frie

In [3]:
food_pleco_data_file = f'food_pleco_data.json'

with open(food_pleco_data_file, 'w', encoding='utf-8') as fwrite:
    json.dump(word_dict, fwrite, indent = 4, ensure_ascii=False)

print(f'Data file written {food_pleco_data_file}')

Data file written food_pleco_data.json


### Build Pleco dict

In [7]:
import json

with open(food_pleco_data_file, 'r', encoding='utf-8') as fread:
    word_dict = json.load(fread)

def pleco_make_bold(text):
    return f'\uEAB2{text}\uEAB3'

def pleco_make_italic(text):
    return f'\uEAB4{text}\uEAB5'

def pleco_make_link(text):
    return f'\uEAB8{text}\uEABB'

PC_NEW_LINE = chr(0xEAB1)

word_list = sorted(word_dict.keys(), key = lambda x: (len(x), x))

languages = ['vietnamese', 'english'] 
for lang in languages:
    pleco_import_file = f'FoodVocabularyPleco-{lang}.txt'
    fpleco = open(pleco_import_file, 'w', encoding='utf-8')

    char_list = []
    dish_list = []

    for word in word_list:
        data = word_dict[word]

        string = f"{word}\t{word_dict[word]['pinyin']}\t{word_dict[word][lang]}"
        
        if data['chinese']:
            string += f'{PC_NEW_LINE}RELATED{PC_NEW_LINE}'
            related_words = sorted((set(data['chinese'])))

            for related in related_words:
                string += f" - {pleco_make_link(related)} {word_dict[related]['pinyin']} {pleco_make_italic(word_dict[related][lang])}{PC_NEW_LINE}"

        if len(word) < 2:
            char_list.append(string)
        else:
            dish_list.append(string)

        # fpleco.write('\n')

    fpleco.write(f'//Food Vocabulary-{lang}/Characters\n')
    for line in char_list:
        fpleco.write(f'{line}\n')
        
    fpleco.write(f'//Food Vocabulary-{lang}/Dishes\n')
    for line in dish_list:
        fpleco.write(f'{line}\n')

    fpleco.close()

    print(f'{pleco_import_file} written')


FoodVocabularyPleco-vietnamese.txt written
FoodVocabularyPleco-english.txt written
