In [None]:
# | default_exp _code_generator.chat

In [None]:
# | export

from typing import *
import random
import logging
import time

import openai
from langchain.schema.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

from faststream_gen._code_generator.constants import DEFAULT_PARAMS
from faststream_gen._components.logger import get_logger, set_level
from faststream_gen._code_generator.prompts import SYSTEM_PROMPT

from faststream_gen._components.package_data import get_root_data_path

[INFO] numexpr.utils: Note: NumExpr detected 64 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
[INFO] numexpr.utils: NumExpr defaulting to 8 threads.


In [None]:
import pytest

from faststream_gen._components.logger import suppress_timestamps
from faststream_gen._code_generator.constants import OpenAIModel

In [None]:
# | export

logger = get_logger(__name__, level=logging.WARNING)

In [None]:
suppress_timestamps()
logger = get_logger(__name__, level=20)
logger.info("ok")

[INFO] __main__: ok


In [None]:
# | export

# Reference: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_handle_rate_limits.ipynb


def _retry_with_exponential_backoff(
    initial_delay: float = 1,
    exponential_base: float = 2,
    jitter: bool = True,
    max_retries: int = 10,
    max_wait: float = 60,
    errors: tuple = (
        openai.error.RateLimitError,
        openai.error.ServiceUnavailableError,
        openai.error.APIError,
    ),
) -> Callable:
    """Retry a function with exponential backoff."""

    def decorator(
        func: Callable[[str], Tuple[str, str]]
    ) -> Callable[[str], Tuple[str, str]]:
        def wrapper(*args, **kwargs):  # type: ignore
            num_retries = 0
            delay = initial_delay

            while True:
                try:
                    return func(*args, **kwargs)

                except errors as e:
                    num_retries += 1
                    if num_retries > max_retries:
                        raise Exception(
                            f"Maximum number of retries ({max_retries}) exceeded."
                        )
                    delay = min(
                        delay
                        * exponential_base
                        * (1 + jitter * random.random()),  # nosec
                        max_wait,
                    )
                    logger.info(
                        f"Note: OpenAI's API rate limit reached. Command will automatically retry in {int(delay)} seconds. For more information visit: https://help.openai.com/en/articles/5955598-is-api-usage-subject-to-any-rate-limits",
                    )
                    time.sleep(delay)

                except Exception as e:
                    raise e

        return wrapper

    return decorator

In [None]:
@_retry_with_exponential_backoff()
def mock_func():
    return "Success"

actual = mock_func()
expected = "Success"

print(actual)
assert actual == expected

Success


In [None]:
# Test max retries exceeded
@_retry_with_exponential_backoff(max_retries=1)
def mock_func_error():
    raise openai.error.RateLimitError


with pytest.raises(Exception) as e:
    mock_func_error()

print(e.value)
assert str(e.value) == "Maximum number of retries (1) exceeded."

[INFO] __main__: Note: OpenAI's API rate limit reached. Command will automatically retry in 2 seconds. For more information visit: https://help.openai.com/en/articles/5955598-is-api-usage-subject-to-any-rate-limits
Maximum number of retries (1) exceeded.


In [None]:
# | export

def _get_relevant_document(query: str) -> str:
    """Load the vector database and retrieve the most relevant document based on the given query.

    Args:
        query: The query for relevance-based document retrieval.

    Returns:
        The content of the most relevant document as a string.
    """
    db_path = get_root_data_path() / "docs"
    db = FAISS.load_local(db_path, OpenAIEmbeddings()) # type: ignore
    results = db.max_marginal_relevance_search(query, k=1, fetch_k=3)
    results_str = "\n".join([result.page_content for result in results])
    return results_str

In [None]:
query = "What is FastStream?"
actual = _get_relevant_document(query)
print(actual[:200])
assert len(actual) > 0

[INFO] faiss.loader: Loading faiss with AVX2 support.
[INFO] faiss.loader: Successfully loaded faiss with AVX2 support.
hide:
  - navigation
  - footer

Release Notes

FastStream is a new package based on the ideas and experiences gained from FastKafka and Propan. By joining our forces, we picked up the best from both 


In [None]:
# | export


class CustomAIChat:
    """Custom class for interacting with OpenAI

    Attributes:
        model: The OpenAI model to use. If not passed, defaults to gpt-3.5-turbo-16k.
        system_prompt: Initial system prompt to the AI model. If not passed, defaults to SYSTEM_PROMPT.
        initial_user_prompt: Initial user prompt to the AI model.
        params: Parameters to use while initiating the OpenAI chat model. DEFAULT_PARAMS used if not provided.
    """

    def __init__(
        self,
        model: str,
        user_prompt: Optional[str] = None,
        params: Dict[str, float] = DEFAULT_PARAMS,
        semantic_search_query: Optional[str] = None,
    ):
        """Instantiates a new CustomAIChat object.

        Args:
            model: The OpenAI model to use. If not passed, defaults to gpt-3.5-turbo-16k.
            user_prompt: The user prompt to the AI model.
            params: Parameters to use while initiating the OpenAI chat model. DEFAULT_PARAMS used if not provided.
            semantic_search_query: A query string to fetch relevant documents from the database
        """
        self.model = model
        self.messages = [
            {"role": role, "content": content}
            for role, content in [
                ("system", SYSTEM_PROMPT),
                ("user", self._get_doc(semantic_search_query)),
                ("user", user_prompt),
            ]
            if content is not None
        ]
        self.params = params

    @staticmethod
    def _get_doc(semantic_search_query: Optional[str] = None) -> str:
        if semantic_search_query is None:
            return ""
        return _get_relevant_document(semantic_search_query)
    
    @_retry_with_exponential_backoff()
    def __call__(self, user_prompt: str) -> Tuple[str, Dict[str, int]]:
        """Call OpenAI API chat completion endpoint and generate a response.

        Args:
            user_prompt: A string containing user's input prompt.

        Returns:
            A tuple with AI's response message content and the total number of tokens used while generating the response.
        """
        self.messages.append(
            {"role": "user", "content": f"{user_prompt}\n==== YOUR RESPONSE ====\n"}
        )
        prompt_str = "\n\n".join([f"===Role:{m['role']}===\n\nMessage:\n{m['content']}" for m in self.messages])
        logger.info(f"\n\nPrompt to the model: \n\n{prompt_str}")
        
        response = openai.ChatCompletion.create(
            model=self.model,
            messages=self.messages,
            temperature=self.params["temperature"],
        )

        return (
            response["choices"][0]["message"]["content"],
            response["usage"],
        )

In [None]:
# | notest

TEST_INITIAL_USER_PROMPT = """
You should respond with 0, 1 or 2 and nothing else. Below are your rules:

==== RULES: ====

If the ==== APP DESCRIPTION: ==== section is not related to FastKafka or contains violence, self-harm, harassment/threatening or hate/threatening information then you should respond with 0.

If the ==== APP DESCRIPTION: ==== section is related to FastKafka but focuses on what is it and its general information then you should respond with 1. 

If the ==== APP DESCRIPTION: ==== section is related to FastKafka but focuses how to use it and instructions to create a new app then you should respond with 2. 
"""

ai = CustomAIChat(user_prompt = TEST_INITIAL_USER_PROMPT, model=OpenAIModel.gpt3.value)
response, usage = ai("Name the tallest mountain in the world")

print(response)
print(usage)

assert response == "0"

[INFO] __main__: 

Prompt to the model: 

===Role:system===

Message:

You are an expert Python developer, tasked to generate executable Python code as a part of your work with the FastStream framework. 

You are to abide by the following guidelines:

1. You must never enclose the generated Python code with ``` python. It is mandatory that the output is a valid and executable Python code. Please ensure this rule is never broken.

2. Some prompts might require you to generate code that contains async functions. For example:

async def app_setup(context: ContextRepo):
    raise NotImplementedError()

In such cases, it is necessary to add the "import asyncio" statement at the top of the code. 

You will encounter sections marked as:

==== APP DESCRIPTION: ====

These sections contain the description of the FastStream app you need to implement. Treat everything below this line, until the end of the prompt, as the description to follow for the app implementation.


===Role:user===

Message: