In [None]:
# | default_exp _code_generator.helper

In [None]:
# | export

from typing import *
import os
import re
import functools
import logging
from collections import defaultdict
from tempfile import TemporaryDirectory
from pathlib import Path
from contextlib import contextmanager
import unittest.mock
import zipfile
import importlib.util
import time

import typer
import requests
from langchain.schema.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

from faststream_gen._components.logger import get_logger, set_level
from faststream_gen._components.logger import suppress_timestamps
from faststream_gen._code_generator.constants import (
    OPENAI_KEY_EMPTY_ERROR,
    OPENAI_KEY_NOT_SET_ERROR,
    TOKEN_TYPES,
    MAX_RESTARTS,
    MAX_RETRIES,
    STEP_LOG_DIR_NAMES,
)
from faststream_gen._components.package_data import get_root_data_path

In [None]:
import sys
from unittest.mock import patch

from faststream_gen._code_generator.constants import FASTSTREAM_DOCS_DIR_SUFFIX, FASTSTREAM_REPO_ZIP_URL, OpenAIModel

import pytest
import openai

In [None]:
# | export

logger = get_logger(__name__, level=logging.WARNING)

In [None]:
suppress_timestamps()
logger = get_logger(__name__, level=20)
logger.info("ok")

[INFO] __main__: ok


In [None]:
# | export


def set_logger_level(func: Callable[..., Any]) -> Callable[..., Any]:
    """Decorator to set the logger level based on verbosity.

    Args:
        func: The function to be decorated.

    Returns:
        The decorated function.
    """

    @functools.wraps(func)
    def wrapper_decorator(*args, **kwargs): # type: ignore
        if ("verbose" in kwargs) and kwargs["verbose"]:
            set_level(logging.INFO)
        else:
            set_level(logging.WARNING)
        return func(*args, **kwargs)

    return wrapper_decorator

In [None]:
@set_logger_level
def _test_logger():
    logger.debug("INFO")
    logger.info("WARNING")

    
_test_logger()
display(logger.getEffectiveLevel())
assert logger.getEffectiveLevel() == logging.WARNING

30

In [None]:
@set_logger_level
def _test_logger(**kwargs):
    logger.debug("INFO")
    logger.info("WARNING")

    
_test_logger(verbose=True)
display(logger.getEffectiveLevel())
assert logger.getEffectiveLevel() == logging.INFO



20

In [None]:
# | export


def retry_on_error(max_retries: int = MAX_RESTARTS, delay: int = 1):  # type: ignore
    def decorator(func):  # type: ignore
        def wrapper(*args, **kwargs):  # type: ignore
            for i in range(max_retries):
                try:
                    kwargs["attempt"] = i
                    return func(*args, **kwargs)
                except ValueError as e:
                    # Log the error here
                    logger.info(f"Attempt {i} failed. Restarting step.")
                    time.sleep(delay)
                    # Capture exception details here
                    last_exception = e
            return last_exception.args[0], last_exception.args[1]
        return wrapper

    return decorator

In [None]:
@retry_on_error(max_retries=3)
def my_function(attempt):
    # Code that may raise an exception
    raise ValueError([], False)


actual = my_function()
print(actual)
expected = ([], False) 
assert actual == expected 

[INFO] __main__: Attempt 0 failed. Restarting step.
[INFO] __main__: Attempt 1 failed. Restarting step.
[INFO] __main__: Attempt 2 failed. Restarting step.
([], False)


In [None]:
@retry_on_error(max_retries=3)
def my_function(attempt):
    # Code that may raise an exception
    return "hi"

# Call the decorated function
actual = my_function()
print(actual)

assert actual == "hi"

hi


In [None]:
# | export



def ensure_openai_api_key_set() -> None:
    """Ensure the 'OPENAI_API_KEY' environment variable is set and is not empty.

    Raises:
        KeyError: If the 'OPENAI_API_KEY' environment variable is not found.
        ValueError: If the 'OPENAI_API_KEY' environment variable is found but its value is empty.
    """
    try:
        openai_api_key = os.environ["OPENAI_API_KEY"]
        if openai_api_key == "":
            raise ValueError(OPENAI_KEY_EMPTY_ERROR)
    except KeyError:
        raise KeyError(OPENAI_KEY_NOT_SET_ERROR)

In [None]:
with patch.dict(os.environ, {"OPENAI_API_KEY": ""}):
    with pytest.raises(ValueError) as e:
        ensure_openai_api_key_set()

print(e.value)
assert str(e.value) == OPENAI_KEY_EMPTY_ERROR

Error: OPENAI_API_KEY cannot be empty. Please set a valid OpenAI API key in OPENAI_API_KEY environment variable and try again.
You can generate API keys in the OpenAI web interface. See https://platform.openai.com/account/api-keys for details.


In [None]:
with patch.dict(os.environ, {}, clear=True):
    with pytest.raises(KeyError) as e:
        ensure_openai_api_key_set()
        
print(e.value)
assert str(e.value) == f"'{OPENAI_KEY_NOT_SET_ERROR}'"

'Error: OPENAI_API_KEY not found in environment variables. Set a valid OpenAI API key in OPENAI_API_KEY environment variable and try again. You can generate API keys in the OpenAI web interface. See https://platform.openai.com/account/api-keys for details.'


In [None]:
with patch.dict(os.environ, {"OPENAI_API_KEY": "INVALID_KEY"}):
    ensure_openai_api_key_set()

In [None]:
# | export


def add_tokens_usage(usage_list: List[Dict[str, int]]) -> Dict[str, int]:
    """Add list of OpenAI "usage" dictionaries by categories defined in TOKEN_TYPES (prompt_tokens, completion_tokens and total_tokens).

    Args:
        usage_list: List of OpenAI "usage" dictionaries


    Returns:
        Dict[str, int]: Dictionary where the keys are TOKEN_TYPES and their values are the sum of OpenAI "usage" dictionaries
    """
    added_tokens: Dict[str, int] = defaultdict(int)
    for usage in usage_list:
        for token_type in TOKEN_TYPES:
            added_tokens[token_type] += usage[token_type]
            
    return added_tokens

In [None]:
usage = {
    "prompt_tokens": 129,
    "completion_tokens": 1,
    "total_tokens": 130
  }
assert add_tokens_usage([usage, usage]) == {
    "prompt_tokens": 258,
    "completion_tokens": 2,
    "total_tokens": 260
}

In [None]:
usage = {
    "prompt_tokens": 129,
    "completion_tokens": 1,
    "total_tokens": 130
  }
assert add_tokens_usage([defaultdict(int), usage]) == {
    "prompt_tokens": 129,
    "completion_tokens": 1,
    "total_tokens": 130
}

In [None]:
# | export

examples_delimiter = {
    "description": {
        "start": "==== description.txt starts ====",
        "end": "==== description.txt ends ====",
    },
    "skeleton": {
        "start": "==== app_skeleton.py starts ====",
        "end": "==== app_skeleton.py ends ====",
    },
    "app": {
        "start": "==== app.py starts ====",
        "end": "==== app.py ends ====",
    },
    "test_app": {
        "start": "==== test_app.py starts ====",
        "end": "==== test_app.py ends ====",
    },
}


def _split_text(text: str, delimiter: Dict[str, str]) -> str:
    return text.split(delimiter["start"])[-1].split(delimiter["end"])[0]


def _format_examples(parent_docs_str: List[str]) -> Dict[str, str]:
    """Format and extract examples from parent document.

    Args:
        parent_docs_str (List[str]): A list of parent document strings containing example sections.

    Returns:
        Dict[str, List[str]]: A dictionary with sections as keys and lists of formatted examples as values.
    """
    ret_val = {"description_to_skeleton": "", "skeleton_to_app_and_test": ""}
    for d in parent_docs_str:
        description = _split_text(d, examples_delimiter["description"])
        skeleton = _split_text(d, examples_delimiter["skeleton"])
        app = _split_text(d, examples_delimiter["app"])
        test_app = _split_text(d, examples_delimiter["test_app"])

        ret_val[
            "description_to_skeleton"
        ] += f"\n==== EXAMPLE APP DESCRIPTION ====\n{description}\n\n==== YOUR RESPONSE ====\n\n{skeleton}"
        ret_val[
            "skeleton_to_app_and_test"
        ] += f"\n==== EXAMPLE APP DESCRIPTION ====\n{description}\n\n==== EXAMPLE APP SKELETON ====\n{skeleton}\n==== YOUR RESPONSE ====\n\n### application.py ###\n{app}\n### test.py ###\n{test_app}"

    return ret_val

In [None]:
fixture = [
    """
==== description.txt starts ====
description.txt
==== description.txt ends ====
==== app_skeleton.py starts ====
app_skeleton.py
==== app_skeleton.py ends ====
==== app.py starts ====
app.py
==== app.py ends ====
==== test_app.py starts ====
test_app.py
==== test_app.py ends ====
"""
]
expected = {
    "description_to_skeleton": "\n==== EXAMPLE APP DESCRIPTION ====\n\ndescription.txt\n\n\n==== YOUR RESPONSE ====\n\n\napp_skeleton.py\n",
    "skeleton_to_app_and_test": "\n==== EXAMPLE APP DESCRIPTION ====\n\ndescription.txt\n\n\n==== EXAMPLE APP SKELETON ====\n\napp_skeleton.py\n\n==== YOUR RESPONSE ====\n\n### application.py ###\n\napp.py\n\n### test.py ###\n\ntest_app.py\n",
}

actual = _format_examples(fixture)
print(actual)

assert actual == expected

{'description_to_skeleton': '\n==== EXAMPLE APP DESCRIPTION ====\n\ndescription.txt\n\n\n==== YOUR RESPONSE ====\n\n\napp_skeleton.py\n', 'skeleton_to_app_and_test': '\n==== EXAMPLE APP DESCRIPTION ====\n\ndescription.txt\n\n\n==== EXAMPLE APP SKELETON ====\n\napp_skeleton.py\n\n==== YOUR RESPONSE ====\n\n### application.py ###\n\napp.py\n\n### test.py ###\n\ntest_app.py\n'}


In [None]:
# | export

def get_relevant_prompt_examples(query: str) -> Dict[str, str]:
    """Load the vector database and retrieve the most relevant examples based on the given query for each step.

    Args:
        query: The query for relevance-based document retrieval.

    Returns:
        The dictionary of the most relevant examples for each step.
    """
    db_path = get_root_data_path() / "examples"
    db = FAISS.load_local(db_path, OpenAIEmbeddings()) # type: ignore
    results = db.similarity_search(query, k=3, fetch_k=5)
    results_page_content = [r.page_content for r in results]
    prompt_examples = _format_examples(results_page_content)
    return prompt_examples

In [None]:
query = """
Create a FastStream application using localhost broker for testing and use the default port number. 
It should consume messages from the "input_data" topic, where each message is a JSON encoded object containing a single attribute: 'data'. 
For each consumed message, create a new message object and increment the value of the data attribute by 1. Finally, send the modified message to the 'output_data' topic.
"""

actual = get_relevant_prompt_examples(query)



assert "==== EXAMPLE APP DESCRIPTION ====" in actual["description_to_skeleton"]
assert "==== app_skeleton.py starts ====" not in actual["description_to_skeleton"]
print(actual["description_to_skeleton"])

[INFO] faiss.loader: Loading faiss with AVX2 support.
[INFO] faiss.loader: Successfully loaded faiss with AVX2 support.

==== EXAMPLE APP DESCRIPTION ====

Develop a FastStream application using localhost kafka broker.
The app should consume messages from the input_data topic.
The input message is a JSON encoded object including two attributes:
    - x: float
    - y: float
    - time: datetime

input_data topic should use partition key.
While consuming the message, increment x and y attributes by 1 and publish that message to the output_data topic.
The same partition key should be used in the input_data and output_data topic.



==== YOUR RESPONSE ====


from datetime import datetime

from pydantic import BaseModel, Field

from faststream import Context, FastStream, Logger
from faststream.kafka import KafkaBroker


class Point(BaseModel):
    x: float = Field(
        ..., examples=[0.5], description="The X Coordinate in the coordinate system"
    )
    y: float = Field(
        ..., 

In [None]:
# | export


def strip_white_spaces(description: str) -> str:
    """Remove and strip excess whitespaces from a given description

    Args:
        description: The description string to be processed.

    Returns:
        The cleaned description string.
    """
    pattern = re.compile(r"\s+")
    return pattern.sub(" ", description).strip()

In [None]:
fixture = """
    I have   a                  lot
                of whitespaces
                
                
"""

expected = "I have a lot of whitespaces"
actual = strip_white_spaces(fixture)
print(actual)
assert actual == expected

I have a lot of whitespaces


In [None]:
# | export


def write_file_contents(output_file: str, contents: str) -> None:
    """Write the given contents to the specified output file.

    Args:
        output_file: The path to the output file where the contents will be written.
        contents: The contents to be written to the output file.

    Raises:
        OSError: If there is an issue while attempting to save the file.
    """
    try:
        Path(output_file).parent.mkdir(parents=True, exist_ok=True)

        with open(output_file, "w", encoding="utf-8") as f:
            f.write(contents)

    except OSError as e:
        raise OSError(
            f"Error: Failed to save file at '{output_file}' due to: '{e}'. Please ensure that the specified 'output_path' is valid and that you have the necessary permissions to write files to it."
        )

In [None]:
contents = """
print("Hello World")
"""


with TemporaryDirectory() as d:
    output_path = f"{str(d)}/grand-parent/parent/child"
    output_file = f"{output_path}/application.py"
    
    write_file_contents(output_file, contents)
    
    with open(output_file, 'r', encoding="utf-8") as f:
        actual = f.read()
    print(f"{output_file}\n\n{actual}")

assert actual == contents

/tmp/tmpt18_9eo2/grand-parent/parent/child/application.py


print("Hello World")



In [None]:
# | export


def read_file_contents(output_file: str) -> str:
    """Read and return the contents from the specified file.

    Args:
        output_file: The path to the file to be read.

    Returns:
        The contents of the file as string.

    Raises:
        FileNotFoundError: If the specified file does not exist.
    """
    try:
        with open(output_file, "r", encoding="utf-8") as f:
            contents = f.read()
        return contents
    except FileNotFoundError:
        raise FileNotFoundError(
            f"Error: The file '{output_file}' does not exist. Please ensure that the specified 'output_path' is valid and that you have the necessary permissions to access it."
        )

In [None]:
contents = """
print("Hello World")
"""


with TemporaryDirectory() as d:
    output_path = f"{str(d)}/grand-parent/parent/child"
    output_file = f"{output_path}/application.py"
    
    write_file_contents(output_file, contents)
    
    actual = read_file_contents(output_file)
    print(f"{output_file}\n\n{actual}")

assert actual == contents

/tmp/tmp_720mamc/grand-parent/parent/child/application.py


print("Hello World")



In [None]:
contents = """
print("Hello World")
"""

with pytest.raises(FileNotFoundError) as e:
    with TemporaryDirectory() as d:
        output_path = f"{str(d)}/grand-parent/parent/child"
        output_file = f"{output_path}/application.py"

        actual = read_file_contents(output_file)

print(str(e))

<ExceptionInfo FileNotFoundError("Error: The file '/tmp/tmp8t0kjf51/grand-parent/parent/child/application.py' does not exist. Please ensure that the specified 'output_path' is valid and that you have the necessary permissions to access it.") tblen=2>


In [None]:
# | export


@contextmanager
def mock_openai_create(test_response):
    mock_choices = {
        "choices": [{"message": {"content": test_response}}],
        "usage": { 
            "prompt_tokens": 129,
            "completion_tokens": 1,
            "total_tokens": 130
        },
    }

    with unittest.mock.patch("openai.ChatCompletion") as mock:
        mock.create.return_value = mock_choices
        yield

In [None]:
test_response = "This is a mock response"

with mock_openai_create(test_response):
    response = openai.ChatCompletion.create()
    ret_val = response['choices'][0]['message']['content']
    print(ret_val)
    assert ret_val == test_response

This is a mock response


In [None]:
# | export


def _fetch_content(url: str) -> requests.models.Response: # type: ignore
    """Fetch content from a URL using an HTTP GET request.

    Args:
        url (str): The URL to fetch content from.

    Returns:
        Response: The response object containing the content and HTTP status.

    Raises:
        requests.exceptions.Timeout: If the request times out.
        requests.exceptions.RequestException: If an error occurs during the request.
    """
    attempt = 0
    while attempt < 4:
        try:
            response = requests.get(url, timeout=50)
            response.raise_for_status()  # Raises an exception for HTTP errors
            return response
        except requests.exceptions.Timeout:
            if attempt == 3:  # If this was the fourth attempt, raise the Timeout exception
                raise requests.exceptions.Timeout(
                    "Request timed out. Please check your internet connection or try again later."
                )
            time.sleep(1)  # Sleep for one second before retrying
            attempt += 1
        except requests.exceptions.RequestException as e:
            raise requests.exceptions.RequestException(f"An error occurred: {e}")

In [None]:
response = _fetch_content("https://fastkafka.airt.ai/")
print(response.content[:200])
assert len(response.content) > 0

b'<!doctype html>\n<html lang="en" dir="ltr" class="plugin-pages plugin-id-default">\n<head>\n<meta charset="UTF-8">\n<meta name="generator" content="Docusaurus v2.4.0">\n<title data-rh="true">Effortless Kaf'


In [None]:
# | export


@contextmanager
def download_and_extract_faststream_archive(url: str) -> Generator[Path, None, None]:
    with TemporaryDirectory() as d:
        try:
            input_path = Path(f"{d}/archive.zip")
            extrated_path = Path(f"{d}/extrated_path")
            extrated_path.mkdir(parents=True, exist_ok=True)

            response = _fetch_content(url)

            with open(input_path, "wb") as f:
                f.write(response.content)

            with zipfile.ZipFile(input_path, "r") as zip_ref:
                for member in zip_ref.namelist():
                    zip_ref.extract(member, extrated_path)

            yield extrated_path

        except Exception as e:
            fg = typer.colors.RED
            typer.secho(f"Unexpected internal error: {e}", err=True, fg=fg)
            raise typer.Exit(code=1)

In [None]:
with download_and_extract_faststream_archive(FASTSTREAM_REPO_ZIP_URL) as extracted_path:
    files = [p.stem for p in list(Path(extracted_path/FASTSTREAM_DOCS_DIR_SUFFIX).glob("*"))]
    print(files)
    assert "index" in files

['api', 'kafka', 'getting-started', 'index', 'release', 'rabbit']


In [None]:
# | export


def validate_python_code(file_name: str, **kwargs: Dict[str, Any]) -> List[str]:
    """Validate and report errors in the provided Python code.

    Args:
        file_name: Python file to validate

    Returns:
        A list of error messages encountered during validation. If no errors occur, an empty list is returned.
    """
    try:
        # Import the module using importlib
        spec = importlib.util.spec_from_file_location("tmp_module", file_name)
        module = importlib.util.module_from_spec(spec)  # type: ignore
        spec.loader.exec_module(module)  # type: ignore

    except Exception as e:
        return [f"{type(e).__name__}: {e}"]

    return []

In [None]:
fixture = """
import os
def say_hello():
    print("hello")
"""

with TemporaryDirectory() as d:
    app_file = Path(d) / "application.py"
    write_file_contents(str(app_file), fixture)

    actual = validate_python_code(app_file)
    expected = []

    print(actual)
    assert actual == expected

[]


In [None]:
fixture = """
import os
import invalid_module
def say_hello():
    print("hello")
"""

with TemporaryDirectory() as d:
    app_file = Path(d) / "application.py"
    write_file_contents(str(app_file), fixture)


    actual = validate_python_code(app_file)
    expected = ["ModuleNotFoundError: No module named 'invalid_module'"]

    print(actual)
    assert actual == expected

["ModuleNotFoundError: No module named 'invalid_module'"]


In [None]:
fixture = """
import os
def say_hello()
    print("hello")
"""

with TemporaryDirectory() as d:
    app_file = Path(d) / "application.py"
    write_file_contents(str(app_file), fixture)


    actual = validate_python_code(app_file)

    expected = (
        ["SyntaxError: invalid syntax (application.py, line 3)"]
        if sys.version_info < (3, 10)
        else ["SyntaxError: expected ':' (application.py, line 3)"]
    )

    print(actual)
    assert (
        actual == expected
    ), f"actual = {actual} - expected = {expected} - sys.version_info = {sys.version_info}"

["SyntaxError: expected ':' (application.py, line 3)"]
