# Utils

## Settings

In [1]:
from datetime import datetime
import re
from joblib import Parallel, delayed
from tqdm import tqdm

from config import DATE_FORMAT, DECK_NAME, N_JOBS_EXTRACT, N_JOBS_UPDATE
from utils.app import extract_word_ipa__single, fetch_words_to_update, update_card_ipa__single
from utils.file import save
from utils.scraper import extract_ipa_for_language, get_content
from utils.utils import load_anki_json, load_most_recent_anki_json

TEST_PARSING = True
TEST_CREATING = False
TEST_UPDATING = False
TEST_RETRY_UPDATING = False

# Main

## Parser Test

In [2]:
# possible words: "책", "저", "libro", "놀다", "오다", "돈", "돌"
import unicodedata
from utils.utils import preprocess_word, is_word



if TEST_PARSING:
    phrase = "No -tEngo comida? (2)"
    language = "spanish"

    ipas = dict()
    words = phrase.split()
    for word in words:
        word = preprocess_word(word)
        if is_word(word):
            content = get_content(word)
            ipa = extract_ipa_for_language(content, language, word, verbose=True)
            if ipa:
                ipas[word] = ipa
            else:
                ipas[word] = None

    for word, ipa in ipas.items():
        print(f"{word}: {ipa}")

no: ('/ˈno/', True)
tengo: ('/ˈtenɡo/', False)
comida: ('/koˈmida/', False)


## Anki

### Fetch And Generate

In [3]:
if TEST_CREATING:
    # Fetch words
    words_ids = fetch_words_to_update()
    print(f"Words to update: {len(words_ids)}")

    # Process words in parallel
    results = Parallel(n_jobs=N_JOBS_EXTRACT)(
        delayed(extract_word_ipa__single)(word, note_id, ipa) 
        for word, (note_id, ipa) in tqdm(words_ids.items())
    )

    # Process results
    skipped_dict = {}
    updated_words = {}

    for word, (note_id, result), success in results:
        if not success:
            skipped_dict[word] = (note_id, result)
        else:
            try:
                ipa, extra_ipa = result
                updated_words[word] = {"note_id": note_id, "ipa": ipa, "extra_ipa": extra_ipa}
            except Exception as e:
                print(f"Error updating word {word}: {e}")
                skipped_dict[word] = (note_id, result)

    # Save the output
    output = {
        'skipped_words': skipped_dict,
        'updated_words': updated_words
    }

    current_time = datetime.now().strftime(DATE_FORMAT)
    save(output, f"anki@{current_time}.json")

    len(updated_words)

### Update and Retry Update

In [4]:
if TEST_UPDATING:
    # Usage
    anki_json, original_time = load_most_recent_anki_json()
    updated_words = anki_json.get('updated_words', {})

    # Prepare the arguments for parallel processing
    args = [(word, info['note_id'], info['ipa'], info['extra_ipa']) 
            for word, info in updated_words.items()]

    # Process in parallel with progress bar
    results = Parallel(n_jobs=N_JOBS_UPDATE)(
        delayed(update_card_ipa__single)(word, note_id, ipa, extra_ipa) 
        for word, note_id, ipa, extra_ipa in tqdm(args, desc="Updating IPAs")
    )

    # Process results
    success = []
    errors = []
    for word, status, error in results:
        if status:
            success.append(word)
        else:
            errors.append((word, error))

    # Save the output
    error_words = [word for word, error in errors]
    after_skipped_words = {word: info for word, info in updated_words.items() if word in error_words}
    after_updated_words = {word: info for word, info in updated_words.items() if word not in error_words}

    after_output = {
        "skipped_words": after_skipped_words,
        "updated_words": after_updated_words,
    }

    save(after_output, f"after_anki@{original_time}.json")

if TEST_RETRY_UPDATING:
    # Here we don't parallelize, since I only found errors originating from too many handles at the same time so far
    after_anki_json, _ = load_anki_json(f"after_anki@{original_time}.json")

    skipped_words = after_anki_json.get("skipped_words", {})

    args = [(word, info['note_id'], info['ipa'], info['extra_ipa']) 
            for word, info in skipped_words.items()]
    
    final_skipped_words = {}
    final_updated_words = {}

    for word, note_id, ipa, extra_ipa in tqdm(args, desc="Updating IPAs"):
        try:
            update_card_ipa__single(word, note_id, ipa, extra_ipa)
            final_updated_words[word] = {"note_id": note_id, "ipa": ipa, "extra_ipa": extra_ipa}
        except Exception as e:
            print(f"Error updating word {word}: {e}")
            final_skipped_words[word] = {"note_id": note_id, "ipa": ipa, "extra_ipa": extra_ipa}
            continue

    # Save the output
    final_output = {
        "skipped_words": final_skipped_words,
        "updated_words": final_updated_words,
    }

    save(final_output, f"final_anki@{original_time}.json")