# Utils

## Settings

In [1]:
import sys
import time
from typing import Tuple
import requests
from tqdm import tqdm
from datetime import datetime
from joblib import Parallel, delayed
import os
import json
import re

from utils.scraper import get_content, extract_ipa_for_language
from utils.file import save
from config import OUTPUT_DIRECTORY, ANKI_CONNECT_URL, DECK_NAME, LANGUAGE, VOCAB_FIELD, N_CORES, N_JOBS_EXTRACT, N_JOBS_UPDATE, DATE_FORMAT

TEST_PARSING = True
TEST_CREATING = True
TEST_UPDATING = False
TEST_RETRY_UPDATING = False

# Main

## Parser Test

In [2]:
# possible words: "책", "저", "libro", "놀다", "오다", "돈", "돌"

if TEST_PARSING:
    word = "-되다"
    content = get_content(word, save_response=True)

    # ipa = extract_ipa_for_language(content, "italian", word)
    # if ipa:
    #     print(ipa)

    ipa = extract_ipa_for_language(content, "korean", word)
    if ipa:
        print(ipa)

Data saved to ./outputs/-되다_response_text_content.html as HTML


## Anki

### Fetch And Generate

In [3]:
if TEST_CREATING:
    # Fetch words
    words_ids = fetch_words_to_update()
    print(f"Words to update: {len(words_ids)}")

    # Process words in parallel
    results = Parallel(n_jobs=N_JOBS_EXTRACT)(
        delayed(extract_word_ipa__single)(word, note_id, ipa) 
        for word, (note_id, ipa) in tqdm(words_ids.items())
    )

    # Process results
    skipped_dict = {}
    updated_words = {}

    for word, (note_id, result), success in results:
        if not success:
            skipped_dict[word] = (note_id, result)
        else:
            try:
                ipa, extra_ipa = result
                updated_words[word] = {"note_id": note_id, "ipa": ipa, "extra_ipa": extra_ipa}
            except Exception as e:
                print(f"Error updating word {word}: {e}")
                skipped_dict[word] = (note_id, result)

    # Save the output
    output = {
        'skipped_words': skipped_dict,
        'updated_words': updated_words
    }

    current_time = datetime.now().strftime(DATE_FORMAT)
    save(output, f"anki@{current_time}.json")

    len(updated_words)

NameError: name 'fetch_words_to_update' is not defined

### Update

In [None]:
if TEST_UPDATING:
    # Usage
    anki_json, original_time = load_most_recent_anki_json()
    updated_words = anki_json.get('updated_words', {})

    # Prepare the arguments for parallel processing
    args = [(word, info['note_id'], info['ipa'], info['extra_ipa']) 
            for word, info in updated_words.items()]

    # Process in parallel with progress bar
    results = Parallel(n_jobs=N_JOBS_UPDATE)(
        delayed(update_card_ipa__single)(word, note_id, ipa, extra_ipa) 
        for word, note_id, ipa, extra_ipa in tqdm(args, desc="Updating IPAs")
    )

    # Process results
    success = []
    errors = []
    for word, status, error in results:
        if status:
            success.append(word)
        else:
            errors.append((word, error))

    # Save the output
    error_words = [word for word, error in errors]
    after_skipped_words = {word: info for word, info in updated_words.items() if word in error_words}
    after_updated_words = {word: info for word, info in updated_words.items() if word not in error_words}

    after_output = {
        "skipped_words": after_skipped_words,
        "updated_words": after_updated_words,
    }

    save(after_output, f"after_anki@{original_time}.json")

In [None]:
if TEST_RETRY_UPDATING:
    # Here we don't parallelize, since I only found errors originating from too many handles at the same time so far
    after_anki_json, _ = load_anki_json(f"after_anki@{original_time}.json")

    skipped_words = after_anki_json.get("skipped_words", {})

    args = [(word, info['note_id'], info['ipa'], info['extra_ipa']) 
            for word, info in skipped_words.items()]
    
    final_skipped_words = {}
    final_updated_words = {}

    for word, note_id, ipa, extra_ipa in tqdm(args, desc="Updating IPAs"):
        try:
            update_card_ipa__single(word, note_id, ipa, extra_ipa)
            final_updated_words[word] = {"note_id": note_id, "ipa": ipa, "extra_ipa": extra_ipa}
        except Exception as e:
            print(f"Error updating word {word}: {e}")
            final_skipped_words[word] = {"note_id": note_id, "ipa": ipa, "extra_ipa": extra_ipa}
            continue

    # Save the output
    final_output = {
        "skipped_words": final_skipped_words,
        "updated_words": final_updated_words,
    }

    save(final_output, f"final_anki@{original_time}.json")

In [None]:
after_anki_json, _ = load_anki_json(f"after_anki@20250217-171308.json")
if len(after_anki_json["skipped_words"]) == 0:
    print("There are still skipped words")
else :
    print("All words have been updated")
# skipped_words = after_anki_json.get("skipped_words", {})

There are still skipped words
