In [21]:
%%capture --no-stderr
%pip install --quiet -U langchain_core langgraph langchain_openai openpyxl pandas IPython

# First, we need to set the environment variables

This script requires OPENAI_API_KEY and LANGSMITH_API_KEY to be set in the environment variables.

Also, you need to choose the model to use for the AI.

In [22]:
import os, getpass
from langchain_openai import ChatOpenAI


def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

_set_env("OPENAI_API_KEY")
_set_env("LANGSMITH_API_KEY")
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_PROJECT"] = "anki_deck_generator"

gen_data_ai_chat = ChatOpenAI(model="gpt-4.1", temperature=0)
check_data_ai_chat = ChatOpenAI(model="gpt-4.1", temperature=0)

# Second, we need to define the data structure

You need to define the data structure for anki deck by `data_structure` variable:

```python
data_structure = {
    "field_1_name": "field_1_description",
    "field_name_2": "field_2_description",
    "field_name_3": "field_3_description"
}
```

Example:

```python
data_structure = {
    "vocabulary": "The word to be learned",
    "meaning_en": "The meaning of the word in English",
    "meaning_vi": "The meaning of the word in Japanese",
    "hiragana": "The hiragana of the word",
    "kanji_vi": "Explain the kanji of the word in Vietnamese as they are related to Chinese characters",
    "sentence": "The sentence using the word",
    "sentence_translation_en": "The translation of the sentence in English",
    "sentence_translation_vi": "The translation of the sentence in Vietnamese",
    "note": "Any additional information about the word"
}
```



In [23]:
data_structure = {
    "vocabulary": "The word to be learned",
    "meaning_en": "The meaning of the word in English",
    "meaning_vi": "The meaning of the word in Japanese",
    "hiragana": "The hiragana of the word",
    "kanji_vi": "Explain the kanji of the word in Vietnamese as they are related to Chinese characters",
    "sentence": "The sentence using the word",
    "sentence_translation_en": "The translation of the sentence in English",
    "sentence_translation_vi": "The translation of the sentence in Vietnamese",
    "note": "Any additional information about the word"
}

# Third, we need to list out all the words to be learned, with note for the AI if any

You can/should generate this list using AI.

```python
vocabularies = ["word_without_note_1", "word_without_note_2", "word_without_note_3"]
vocabularies_with_notes = {
    "word_with_note_1": "note_to_ai_for_word_with_note_1",
    "word_with_note_2": "note_to_ai_for_word_with_note_2",
    "word_with_note_3": "note_to_ai_for_word_with_note_3"
}
```

Example:

```python
vocabularies = ["猫", "電車", "公園"] 
vocabularies_with_notes = {
    "勉強": "I am truggle to remember it, so please explain it more in detail with more examples",
    "学校": "This word is quite easy, so no need to explain it in detail",
    "食べ物": "", # Empty note is also fine
}
```

In [24]:
from typing import List, Dict

vocabularies: List[str] = ["猫", "電車", "公園"] 
vocabularies_with_notes: Dict[str, str] = {
    "勉強": "I am truggle to remember it, so please explain it more in detail with more examples",
    "学校": "This word is quite easy, so no need to explain it in detail",
    "食べ物": "", # Empty note is also fine
}

# Finally, let's verify the data you added

In [25]:
def verify_vocabularies_data(vocabularies: List[str], vocabularies_with_notes: Dict[str, str]) -> bool:
    """
    Verify that vocabularies and vocabularies_with_notes follow the expected types and structure.
    
    Args:
        vocabularies: List of vocabulary words
        vocabularies_with_notes: Dictionary mapping vocabulary words to notes
        
    Returns:
        bool: True if data is valid, False otherwise
    """
    try:
        # Check if vocabularies is a list
        if not isinstance(vocabularies, list):
            print("Error: vocabularies must be a list")
            return False
            
        # Check if vocabularies_with_notes is a dictionary
        if not isinstance(vocabularies_with_notes, dict):
            print("Error: vocabularies_with_notes must be a dictionary")
            return False
            
        # Check if all items in vocabularies are strings
        for i, word in enumerate(vocabularies):
            if not isinstance(word, str):
                print(f"Error: vocabularies[{i}] must be a string, got {type(word)}")
                return False
                
        # Check if all keys and values in vocabularies_with_notes are strings
        for word, note in vocabularies_with_notes.items():
            if not isinstance(word, str):
                print(f"Error: Key in vocabularies_with_notes must be a string, got {type(word)}")
                return False
            if not isinstance(note, str):
                print(f"Error: Value in vocabularies_with_notes must be a string, got {type(note)}")
                return False
                
        print("✓ Data verification passed!")
        return True
        
    except Exception as e:
        print(f"Error during verification: {e}")
        return False

assert verify_vocabularies_data(vocabularies, vocabularies_with_notes)

✓ Data verification passed!


# All is good, now we can start to generate the anki deck

You can find the csv under `./output/anki_deck_data.csv`

In [26]:
# Merge vocabularies and vocabularies_with_notes into a single dictionary
for word in vocabularies:
    if word not in vocabularies_with_notes:
        vocabularies_with_notes[word] = ""

print(f"Total vocabularies after merging: {len(vocabularies_with_notes)}")
print("All vocabularies with notes:")
for word, note in vocabularies_with_notes.items():
    if note:
        print(f"  {word}: {note}")
    else:
        print(f"  {word}: (no note)")
        
print("\n✓ Vocabularies merged successfully!")


Total vocabularies after merging: 6
All vocabularies with notes:
  勉強: I am truggle to remember it, so please explain it more in detail with more examples
  学校: This word is quite easy, so no need to explain it in detail
  食べ物: (no note)
  猫: (no note)
  電車: (no note)
  公園: (no note)

✓ Vocabularies merged successfully!


In [27]:
# Define the data structure for the AI LangGraph

from typing import Annotated, TypedDict

class AnkiDeckData(TypedDict):
    vocabularies_with_notes: Dict[str, str] # The vocabularies with notes
    ai_gen_data: List[Dict[str, str]] # The data generated by the AI

In [28]:
# Define the graph

from IPython.display import Image, display
from langgraph.graph import StateGraph, START, END

def node_generate_anki_deck_data(deck_data: AnkiDeckData) -> AnkiDeckData:
    
    pass

def node_check_anki_deck_data(deck_data: AnkiDeckData) -> AnkiDeckData:
    pass