In [34]:
from pymongo import MongoClient
import json
from os import path
import pandas as pd

CONNECTION_STRING = 'mongodb://localhost:27017'
DATABASE = 'quiz-book'
COLLECTION_ENGLISH_WORDS = 'english_words_v1'


client = MongoClient(CONNECTION_STRING)
db = client[DATABASE]


def load_filepaths():
    source_index = '../public/assets/english/words/index.json'

    filenames = []

    with open(source_index, 'r') as f:
        filenames = json.load(f)

    pathroot = path.dirname(source_index)
    return [path.join(pathroot, fname) for fname in filenames]


def load_words(filepath):
    
    df = pd.read_csv(filepath)
    words = df['english'].tolist()

    words_cursor = db[COLLECTION_ENGLISH_WORDS].find({"en": {"$in": words}}, {'_id': 0})
    
    words_in_dict = [r for r in words_cursor]
    # print(words_in_dict)

    df_copy = df.copy()
    for i, row in df_copy.iterrows():
        en = row['english']
        current_record = next(filter(lambda r: r['en']==en, words_in_dict), None)
        if current_record is not None:
            df_copy.at[i, 'symbol'] = current_record['phonetic_us']
            if current_record['zh'] is not None and current_record['zh'] != '':
                df_copy.at[i, 'chinese'] = current_record['zh']

    df_copy.to_csv(filepath, index=False)



def load_to_df(filepath):
    df = pd.read_csv(filepath)
    filename = path.basename(filepath).split('.')[0]
    df['unit'] = filename
    return df


def sync_words():
    for fp in load_filepaths():
        load_words(fp)


def merge_files():
    df = pd.concat([load_to_df(f) for f in load_filepaths()])
    df.to_csv('../public/assets/english/words/index.csv', index=False)


def sync_more_info():
    source_path = '../public/assets/english/words/index.csv'
    df = pd.read_csv(source_path)
    words = df['english'].tolist()
    words_cursor = db[COLLECTION_ENGLISH_WORDS].find({"en": {"$in": words}}, {'_id': 0})
    words_in_dict = [r for r in words_cursor]

    for i, row in df.iterrows():
        en = row['english']
        current_record = next(filter(lambda r: r['en']==en, words_in_dict), None)
        if current_record is not None:
            df.at[i, 'symbol_br'] = current_record['phonetic_br']


    column_phonetic_br = df.pop('symbol_br')
    df.insert(3, column_phonetic_br.name, column_phonetic_br)

    df.to_csv(source_path, index=False)


def run():
    # sync_words()
    # merge_files()
    sync_more_info()


run()