# Week 4 Exercise

## Goal
Create a script to translate Hirigana (ひりがな）to Katakana (カタカナ）to Kanji (漢字) using a specific LLM model (closed or open source).


In [1]:
import os
from dotenv import load_dotenv

**Application-level configurations**

In [74]:
DEFAULT_OPENAI_MODEL = "gpt-4o"
DEFAULT_CLAUDE_MODEL = "claude-3-5-sonnet-20240620"
# Toggle gradio auto-launching the UI.
DEFAULT_GRADIO_UI_AUTO_LAUNCH = True

_CONFIG = {
    'OPENAI_MODEL': DEFAULT_OPENAI_MODEL,
    'CLAUDE_MODEL': DEFAULT_CLAUDE_MODEL,
    'GRADIO_UI_AUTO_LAUNCH': DEFAULT_GRADIO_UI_AUTO_LAUNCH
}


**Load API Keys**

In [75]:
from translate.error import EnvironmentException


# Load API Keys.
try:
    load_dotenv(override=True)
    os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
    os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')
    os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')
except Exception as e:
    error_message = "Failure to setup environment variables, please check your configuration."
    print(error_message)
    raise EnvironmentException(message=error_message, cause=e)
print("API Keys loaded!")

API Keys loaded!


**Defining System Prompt**

In [76]:
_SYSTEM_PROMPT = "You are an assistant that translates Japanese hirigana (ひりがな) to its closest kanji (漢字). "
_SYSTEM_PROMPT = "The input hirigana might be in romaji or actual hirigana characters. "
_SYSTEM_PROMPT += "If you find multiple matches for the input hirigana, use your best reasoning skills to match the entire word or phrase. "
_SYSTEM_PROMPT += "If you're unable to find any match for the input hirigana, let the user know."

**User Prompt**

In [77]:
def user_prompt_for(hirigana: str) -> str:
    """
    Return the user prompt for a given hirigana input.

    param: hirigana: The input hirigana to translate to Kanji.
    """
    if not hirigana:
        raise ValueError("hirigana value is required!")
    _user_prompt = f"Translate the following hirigana to kanji: {hirigana}"
    return _user_prompt


## Translation Model Interface

In [79]:
from translate.clients.gpt import OpenAiApiClient, LanguageModel

def translate(hirigana: str, model: str):
    response = _select_model(hirigana, language_model=model)
    for stream_so_far in response:
        yield stream_so_far

def _select_model(
        hirigana: str,
        language_model: str
):
    """
    Choose the desired language model and execute the translation.
    """
    if LanguageModel.GPT.casefold() == language_model.casefold():
        return execute_gpt(hirigana=hirigana)
    elif LanguageModel.CLAUDE.casefold() == language_model.casefold():
        raise NotImplementedError("todo")
    raise ValueError(f"Invalid model: {model}")

def execute_gpt(hirigana: str):
    # Open GPT connection with system prompt.
    gpt_client = OpenAiApiClient(system_prompt=_SYSTEM_PROMPT)
    # Set user prompt for input hirigana
    gpt_client.set_user_prompt(user_prompt_for(hirigana))
    # Execute streaming
    response = gpt_client.chat_stream()
    print(f'response: {response}')
    return response

def test_execute_gpt():
    execute_gpt(hirigana="かんじ")

test_execute_gpt()

## UI

In [66]:
import gradio as gr

with gr.Blocks() as ui:
    # gr.Markdown('### Translate Hirigana (ひらがな) to Kanji（漢字）')
    with gr.Row():
        # Input text - Corrected variable name here
        input_hirigana = gr.Textbox(label="hirigana word or phrase", lines=10)
        output_kanji = gr.Textbox(label="kanji word or phrase", lines=10)
    with gr.Row():
        # Model selection
        model = gr.Dropdown(["GPT", "Claude"], label="Select model", value="GPT")
        # Translate Button
        translate_btn = gr.Button("Translate")
    # Translation execute - Corrected input reference
    translate_btn.click(translate, inputs=[input_hirigana, model], outputs=[output_kanji])

# Launch UI.
ui.launch(inbrowser=True)

In [50]:
def translate(hirigana: str, model: str):
    response = _select_model(hirigana, language_model=model)
    for stream_so_far in response:
        yield stream_so_far

def _select_model(
        hirigana: str,
        language_model: str
):
    """
    Choose the desired language model and execute the translation.
    """
    if LanguageModel.GPT.casefold() == language_model.casefold():
        return execute_gpt(hirigana=hirigana)
    elif LanguageModel.CLAUDE.casefold() == language_model.casefold():
        raise NotImplementedError("todo")
    raise ValueError(f"Invalid model: {model}")


### UI


In [72]:
import gradio as gr

with gr.Blocks() as ui:
    # gr.Markdown('### Translate Hirigana (ひらがな) to Kanji（漢字）')
    with gr.Row():
        # Input text - Corrected variable name here
        input_hirigana = gr.Textbox(label="hirigana word or phrase", lines=10)
        output_kanji = gr.Textbox(label="kanji word or phrase", lines=10)
    with gr.Row():
        # Model selection
        model = gr.Dropdown(["GPT", "Claude"], label="Select model", value="GPT")
        # Translate Button
        translate_btn = gr.Button("Translate")
    # Translation execute - Corrected input reference
    translate_btn.click(translate, inputs=[input_hirigana, model], outputs=[output_kanji])

# Launch UI.
ui.launch(inbrowser=True)

### Deterministic Unit Tests
Unit tests for deterministic logic.

#### Prerequisites:
- `pytest`

In [41]:
import unittest
from unittest.mock import MagicMock, patch
import io
import sys

# Assume these are defined elsewhere or mock them for the test
# For a bare-bones test, we'll define simple versions.
class RoleName:
    SYSTEM = "system"
    USER = "user"
    ASSISTANT = "assistant"

DEFAULT_OPENAI_MODEL = "gpt-3.5-turbo"

# Placeholder for the OpenAI class.
# In a real application, this would be 'from openai import OpenAI'.
# We define it here so OpenAiApiClient can be defined without a NameError,
# and then we can patch it correctly.
class OpenAI:
    """A dummy OpenAI class to allow OpenAiApiClient definition."""
    def __init__(self):
        pass # No actual initialization needed for the dummy

# The class to be tested (provided by the user)
class OpenAiApiClient:
    def __init__(
        self,
        system_prompt: str,
        # todo: model version should derive from application-level config.
        model_version: str = DEFAULT_OPENAI_MODEL,
        user_prompt: str | None = None,
    ) -> None:
        """Create an OpenAI API client.

        :param system_prompt: The prompt to use for prompting for the system prompt.
                              This can be set with a later setter method.
        :param user_prompt: Optional user prompt to initialize with.
        """
        # This is the line we want to mock.
        # When the test runs, the 'OpenAI' class will be replaced by a MagicMock
        # due to the @patch decorator in the test class.
        self._client = OpenAI()
        # Setup a basic message hash for the model.
        self._message_hashes = [
            {"role": RoleName.SYSTEM, "content": system_prompt},
            {"role": RoleName.USER, "content": user_prompt},
        ]
        self._model_version = model_version

# Bare-bones unit test class
class TestOpenAiApiClient(unittest.TestCase):
    """
    A bare-bones unit test class for the OpenAiApiClient.
    This class uses unittest.mock.patch to mock the external OpenAI dependency,
    allowing for isolated testing of the OpenAiApiClient's initialization logic.
    """

    # The patch target is now 'OpenAI' in the current module (__main__ in Jupyter).
    # This will replace the 'OpenAI' class defined above with a MagicMock.
    @patch('__main__.OpenAI')
    def setUp(self, MockOpenAI):
        """
        Set up the test environment before each test method.
        This method is called automatically by the unittest framework.
        It initializes a mock for the OpenAI client and creates an instance
        of OpenAiApiClient with predefined prompts.
        """
        # MockOpenAI is the MagicMock replacing the actual OpenAI class.
        # mock_openai_instance is the result of calling MockOpenAI(),
        # which is what self._client will be assigned to in OpenAiApiClient's __init__.
        self.mock_openai_instance = MockOpenAI.return_value

        self.system_prompt = "You are a helpful assistant."
        self.user_prompt = "Hello, world!"
        self.client = OpenAiApiClient(
            system_prompt=self.system_prompt,
            user_prompt=self.user_prompt
        )
        self.client._client.assert_called()

    def test_initialization(self):
        """
        Test that the OpenAiApiClient is initialized correctly.
        This test verifies:
        1. The internal _client attribute is an instance of the mocked OpenAI client.
        2. The _message_hashes list is correctly populated with system and user prompts.
        3. The _model_version is set to the default if not specified.
        """
        # Assert that the _client attribute is the mocked OpenAI instance
        # This confirms that self._client = OpenAI() inside the class
        # indeed called the mocked OpenAI and got its return value.
        self.assertEqual(self.client._client, self.mock_openai_instance)

        # Assert that _message_hashes is correctly initialized
        expected_message_hashes = [
            {"role": RoleName.SYSTEM, "content": self.system_prompt},
            {"role": RoleName.USER, "content": self.user_prompt},
        ]
        self.assertEqual(self.client._message_hashes, expected_message_hashes)

        # Assert that _model_version is set to the default
        self.assertEqual(self.client._model_version, DEFAULT_OPENAI_MODEL)

    def test_initialization_no_user_prompt(self):
        """
        Test initialization when no user prompt is provided.
        Ensures that _message_hashes handles a None user_prompt correctly.
        """
        # Create a new client instance without a user prompt
        client_no_user = OpenAiApiClient(system_prompt="Another system prompt.")

        expected_message_hashes = [
            {"role": RoleName.SYSTEM, "content": "Another system prompt."},
            {"role": RoleName.USER, "content": None}, # User prompt should be None
        ]
        self.assertEqual(client_no_user._message_hashes, expected_message_hashes)
        self.assertEqual(client_no_user._model_version, DEFAULT_OPENAI_MODEL)

    def test_initialization_custom_model_version(self):
        """
        Test initialization with a custom model version.
        Verifies that the provided model_version is correctly assigned.
        """
        custom_model = "gpt-4-turbo"
        client_custom_model = OpenAiApiClient(
            system_prompt="System for custom model.",
            model_version=custom_model
        )

        self.assertEqual(client_custom_model._model_version, custom_model)
        # Ensure other attributes are still correctly set
        expected_message_hashes = [
            {"role": RoleName.SYSTEM, "content": "System for custom model."},
            {"role": RoleName.USER, "content": None},
        ]
        self.assertEqual(client_custom_model._message_hashes, expected_message_hashes)

    def test_update_message_hashes(self):
        pass




# This block is modified to run tests in a Jupyter Notebook cell
# It collects tests and runs them using TextTestRunner,
# which prints results without trying to exit the interpreter.
if __name__ == '__main__':
    # Create a test suite from the TestOpenAiApiClient class
    # suite = unittest.TestSuite()
    unittest.main()
    print("All Unit Tests Passed")

Collecting pytest
  Downloading pytest-8.4.1-py3-none-any.whl.metadata (7.7 kB)
Collecting iniconfig>=1 (from pytest)
  Downloading iniconfig-2.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting pluggy<2,>=1.5 (from pytest)
  Downloading pluggy-1.6.0-py3-none-any.whl.metadata (4.8 kB)
Downloading pytest-8.4.1-py3-none-any.whl (365 kB)
Downloading pluggy-1.6.0-py3-none-any.whl (20 kB)
Downloading iniconfig-2.1.0-py3-none-any.whl (6.0 kB)
Installing collected packages: pluggy, iniconfig, pytest
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [pytest]
[1A[2KSuccessfully installed iniconfig-2.1.0 pluggy-1.6.0 pytest-8.4.1


platform darwin -- Python 3.11.13, pytest-8.4.1, pluggy-1.6.0 -- /opt/anaconda3/envs/llms/bin/python3.11
cachedir: .pytest_cache
rootdir: /Users/aponte/personal_workspace/llm_engineering/week4
plugins: anyio-4.9.0, langsmith-0.4.4, dash-3.0.4
collected 1 item                                                               [0m

tests/test_openai_client.py::test_client [32mPASSED[0m[32m                          [100%][0m

tests/test_openai_client.py::test_client
  Did you mean to use `assert` instead of `return`?
  See https://docs.pytest.org/en/stable/how-to/assert.html#return-not-none for more information.

