In [1]:
from langchain_openai import ChatOpenAI
from langchain.agents import tool, Tool, initialize_agent, AgentType
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from langchain.tools import DuckDuckGoSearchRun
from langchain.memory import ConversationBufferMemory
from langchain.output_parsers import PydanticOutputParser

from pydantic import BaseModel, Field
from IPython.display import display, clear_output
import ipywidgets as widgets
import textwrap

from function_code_with_unit_tests import FunctionCodeWithUnitTests
from extract import (
    extract_function_names,
    extract_function_by_name,
    extract_imports,
)
from run_tests import run_unit_test


llm = ChatOpenAI(temperature=0, model="gpt-4o")



In [3]:
function_and_unit_tests_code_block_str = """
import numpy as np

def add(a, b):
    'add numbers together'
    return a - b

def test_add_basic():
    assert add(2,3) == 5, "basic test failed"

def test_add_negative_numbers():
    assert add(-1, -3) == -4, "this one failed"
"""


@tool
def find_all_failing_unit_tests(function_and_unit_tests_str: str) -> str:
    """
    function_and_unit_tests_code_block_str: str that is a python block of code. The 
    function to test is the first function, followed by all the unit tests.

    returns a string with the failing unit tests and their error messages.
    """
    # print(function_and_unit_tests_str)

    imports = extract_imports(function_and_unit_tests_str)
    # print(imports)

    function_and_unit_test_names = extract_function_names(function_and_unit_tests_str)
    function_name = function_and_unit_test_names[0]
    unit_test_names = function_and_unit_test_names[1:]
    # print(function_name)
    # _ = [print(n) for n in unit_test_names]

    function_code_with_unit_tests = FunctionCodeWithUnitTests(
        function_code=[
            extract_function_by_name(
                code_string=function_and_unit_tests_str, function_name=function_name
            )
        ],
        unit_tests={
            name: [
                extract_function_by_name(
                    code_string=function_and_unit_tests_str, function_name=name
                )
            ]
            for name in unit_test_names
        },
        unit_test_imports="\n".join(imports),
    )

    message_list = []
    for i, unit_test_name in enumerate(function_code_with_unit_tests.unit_tests):
        # Example code to execute
        message = run_unit_test(
            function_code_with_unit_tests=function_code_with_unit_tests,
            unit_test_name=unit_test_name,
        )
        if message.test_pass_status == False:
            message_list.append(message.message_details())

    if len(message_list) == 0:
        return "All unit tests pass."
    return "\n".join(message_list)


find_all_failing_unit_tests(function_and_unit_tests_code_block_str)

'unit_test_name test_add_basic failed. Output: Assertion failed: basic test failed\nunit_test_name test_add_negative_numbers failed. Output: Assertion failed: this one failed'

In [7]:
system_message = """
When tasked with writing unit tests, consider the following;
    1. Carefully analyze the given description.
    2. Create diverse unit tests that cover all aspects of the function's expected behavior, including:
    - Basic functionality
    - Edge cases
    - Corner cases
    - Boundary conditions
    - Invalid inputs
    - Empty inputs
    - Large inputs
    - Special characters
    - Unicode characters (if applicable)
    - Case sensitivity (if applicable)
    - Multiline inputs (if applicable)
    - Non-matching scenarios
    3. If you need to use a unit test framework, use pytest.
    4. Write individual test functions, not a test class.
    5. Use descriptive test names that clearly indicate the specific scenario being tested.
    6. Include detailed assertions that thoroughly check expected outcomes.
    7. Each assert statement should have a message with the expected output and the observed output.
    8. Do not test input longer than 200 characters.
    9. For regex functions, consider testing:
    - Pattern matching accuracy
    - Capturing groups
    - Non-capturing groups
    - Lookahead and lookbehind assertions
    - Greedy vs. non-greedy quantifiers
    - Character classes and negated character classes
    - Anchors (start, end, word boundaries)
    - Flags (e.g., re.IGNORECASE, re.MULTILINE, re.DOTALL)
    - Escape sequences
    10. Do not write the actual function implementation or code to run the tests.
    11. Write the name of the function you are testing as a comment at the top of the code block (do not write a place holder function).
    12. Generate at least 10-15 diverse tests to ensure comprehensive coverage.

When tasked with writing a python function, consider the following;
    1. Consider the function description and the unit tests carefully to understand the function's required behavior.
    2. Write a single Python function that passes all the provided tests.
    3. Use type hints for parameters and return values.
    4. Include a clear and concise docstring explaining the function's purpose and parameters.
    5. Follow Python best practices and PEP 8 style guidelines.
    6. Do not include comments within the function body.
    7. Ensure the function handles all edge cases and scenarios covered in the tests.
    8. If the tests imply the use of regular expressions, import the 're' module and use it appropriately.

Instructions
You are a helpful assistant that assists the human user to write unit tests and a regex function.
The user will provide a description for the python function they want.
    1. Write the unit tests for that function, and only the unit tests. Do not use any tools yet! Confirm with the user the unit test coverage is sufficient. Write the unit tests in the chat.
    2. Write the function that will pass the unit tests, and only the function. Do not use any tools yet! Confirm with the user that they are ready to run the unit tests. Write the function in the chat.
    3. Use the 'find_all_failing_unit_tests' tool to check if all the unit tests pass.
        - If some unit tests fail, suggest changes that will fix the issues.
        - If the user agrees, fix the issues, whether they are in the function or the unit test.
    If all the tests pass, that's the end, you're job is done. If some unit tests still fail, repeat step 3 until they all pass.

Tool use;
    - The 'find_all_failing_unit_tests' tool requires the input to be python code with the function first, followed by the unit tests.
    - After using 'find_all_failing_unit_tests', tell the user which unit tests failed and why.

Reminder;
    - If you make changes to the unit tests or the function, write the new unit test or function in the chat!
    - DO NOT USE ANY TOOLS (INCLUDING 'find_all_failing_unit_tests') UNTIL THE USER GIVES PERMISSION!
"""


In [8]:
class CodeBlockModel(BaseModel):
    code_block: str = Field(description="A Python code block containing imports, a function and its unit tests")

class CodeBlockOutputParser(PydanticOutputParser):
    def __init__(self):
        super().__init__(pydantic_object=CodeBlockModel)

    def parse(self, text: str) -> CodeBlockModel:
        # Remove any markdown code block syntax if present
        clean_text = text.strip('`').strip()
        if clean_text.startswith('python\n'):
            clean_text = clean_text[7:]
        return CodeBlockModel(code_block=clean_text)

# Usage in your agent setup
output_parser = CodeBlockOutputParser()

In [9]:
tools = [find_all_failing_unit_tests]

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
memory.chat_memory.add_message(SystemMessage(content=system_message))
memory.chat_memory.add_message(AIMessage(content="Please provide a regex function description."))
# query = """For a given string, please extract all the email addresses.
# Including instances of 'name at email dot com', for example 'cat at gmail dot com' should extract 'cat@gmail.com'
# and 'dog at hotmail dot com' should extract 'dog@hotmail.com'. Should should work for any email domain."""
# memory.chat_memory.add_message(HumanMessage(content=query))

# Initialize the agent
agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    verbose=True,
    memory=memory,
    output_parser=output_parser,
    handle_parsing_errors=True,
)

# Create widgets for the chat interface
text_input = widgets.Text(placeholder="Type your message here...")
send_button = widgets.Button(description="Send")
output = widgets.Output()


def format_message(role, content):
    wrapped_content = textwrap.fill(content, width=120)
    return f"<strong>{role}:</strong> <pre style='white-space: pre-wrap; word-wrap: break-word;'>{wrapped_content}</pre>"


def on_send_button_clicked(b):
    user_input = text_input.value
    text_input.value = ""  # Clear the input field

    # Get AI response using the agent
    ai_response = agent.run(input=user_input)

    # Display the full conversation history
    with output:
        clear_output()
        conversation_html = ""
        for message in memory.chat_memory.messages:
            if isinstance(message, SystemMessage):
                conversation_html += format_message("System", message.content)
            elif isinstance(message, HumanMessage):
                conversation_html += format_message("Human", message.content)
            elif isinstance(message, AIMessage):
                conversation_html += format_message("AI", message.content)
            conversation_html += "<br>"

        display(widgets.HTML(conversation_html))


# Connect the button click event to the function
send_button.on_click(on_send_button_clicked)

# Create a container for the chat interface
chat_container = widgets.VBox(
    [
        widgets.HTML("<h3>Chat Interface</h3>"),
        widgets.VBox(
            [output], layout=widgets.Layout(height="400px", overflow_y="auto")
        ),
        widgets.HBox([text_input, send_button]),
    ]
)

# Display the chat interface
display(chat_container)

VBox(children=(HTML(value='<h3>Chat Interface</h3>'), VBox(children=(Output(),), layout=Layout(height='400px',…



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mCould not parse LLM output: Let's start by writing the unit tests for the function that extracts email addresses from a given string, including instances of 'name at email dot com' format.

Here are the unit tests:

```python
# Function to test: extract_emails

import re
import pytest

def test_basic_email_extraction():
    assert extract_emails("Contact us at support@example.com") == ["support@example.com"], "Failed to extract basic email"

def test_multiple_emails_extraction():
    assert extract_emails("Emails: first@example.com, second@example.com") == ["first@example.com", "second@example.com"], "Failed to extract multiple emails"

def test_no_emails():
    assert extract_emails("No emails here!") == [], "Failed to return empty list when no emails are present"

def test_at_dot_format():
    assert extract_emails("Contact: user at example dot com") == ["user@example.com"], "Failed to extract email in 'at' and 'dot' format



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "find_all_failing_unit_tests",
    "action_input": "import re\nimport pytest\n\n# Function to test: extract_emails\n\ndef test_basic_email_extraction():\n    assert extract_emails(\"Contact us at support@example.com\") == [\"support@example.com\"], \"Failed to extract basic email\"\n\ndef test_multiple_emails_extraction():\n    assert extract_emails(\"Emails: first@example.com, second@example.com\") == [\"first@example.com\", \"second@example.com\"], \"Failed to extract multiple emails\"\n\ndef test_no_emails():\n    assert extract_emails(\"No emails here!\") == [], \"Failed to return empty list when no emails are present\"\n\ndef test_at_dot_format():\n    assert extract_emails(\"Contact: user at example dot com\") == [\"user@example.com\"], \"Failed to extract email in 'at' and 'dot' format\"\n\ndef test_mixed_formats():\n    assert extract_emails(\"Emails: user@example.com, user at example dot com\"



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "find_all_failing_unit_tests",
    "action_input": "import re\nimport pytest\n\n# Function to test: extract_emails\n\ndef extract_emails(text: str) -> list[str]:\n    # Regex pattern to match standard email format\n    standard_email_pattern = r'[\\w\\.-]+@[\\w\\.-]+\\.[a-zA-Z]{2,}'\n    # Regex pattern to match 'name at email dot com' format\n    at_dot_pattern = r'([\\w\\.-]+)\\s+at\\s+([\\w\\.-]+)\\s+dot\\s+([a-zA-Z]{2,})'\n\n    # Find all matches for both patterns\n    standard_emails = re.findall(standard_email_pattern, text)\n    at_dot_emails = re.findall(at_dot_pattern, text)\n\n    # Convert 'name at email dot com' format to standard email format\n    converted_emails = [f'{user}@{domain}.{tld}' for user, domain, tld in at_dot_emails]\n\n    # Combine both lists of emails\n    all_emails = standard_emails + converted_emails\n\n    return all_emails\n\n\ndef test_basic_email_extraction():\n   



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "find_all_failing_unit_tests",
    "action_input": "import re\nfrom typing import List\n\n# Function to test: extract_emails\n\ndef extract_emails(text: str) -> List[str]:\n    # Regex pattern to match standard email format\n    standard_email_pattern = r'[\\w\\.-]+@[\\w\\.-]+\\.[a-zA-Z]{2,}'\n    # Regex pattern to match 'name at email dot com' format\n    at_dot_pattern = r'([\\w\\.-]+)\\s+at\\s+([\\w\\.-]+)\\s+dot\\s+([a-zA-Z]{2,})'\n\n    # Find all matches for both patterns\n    standard_emails = re.findall(standard_email_pattern, text)\n    at_dot_emails = re.findall(at_dot_pattern, text)\n\n    # Convert 'name at email dot com' format to standard email format\n    converted_emails = [f'{user}@{domain}.{tld}' for user, domain, tld in at_dot_emails]\n\n    # Combine both lists of emails\n    all_emails = standard_emails + converted_emails\n\n    return all_emails\n\nimport pytest\n\ndef test_basic



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "find_all_failing_unit_tests",
    "action_input": "import re\nfrom typing import List\n\n# Function to test: extract_emails\n\ndef extract_emails(text: str) -> List[str]:\n    # Regex pattern to match standard email format\n    standard_email_pattern = r'[\\w\\.-]+@[\\w\\.-]+\\.[a-zA-Z]{2,}'\n    # Regex pattern to match 'name at email dot com' format\n    at_dot_pattern = r'([\\w\\.-]+)\\s+at\\s+([\\w\\.-]+)\\s+dot\\s+([a-zA-Z]{2,})'\n\n    # Find all matches for both patterns\n    standard_emails = re.findall(standard_email_pattern, text)\n    at_dot_emails = re.findall(at_dot_pattern, text)\n\n    # Convert 'name at email dot com' format to standard email format\n    converted_emails = [f'{user}@{domain}.{tld}' for user, domain, tld in at_dot_emails]\n\n    # Combine both lists of emails\n    all_emails = standard_emails + converted_emails\n\n    return all_emails\n\nimport pytest\n\n\ndef test_bas



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mCould not parse LLM output: Here is the updated function to handle special characters in email addresses:

```python
import re
from typing import List

def extract_emails(text: str) -> List[str]:
    """
    Extracts all email addresses from a given string, including instances of 'name at email dot com' format.

    Args:
    text (str): The input string containing email addresses.

    Returns:
    List[str]: A list of extracted email addresses.
    """
    # Regex pattern to match standard email format
    standard_email_pattern = r'[\w\.\+\-]+@[\w\.-]+\.[a-zA-Z]{2,}'
    # Regex pattern to match 'name at email dot com' format
    at_dot_pattern = r'([\w\.\+\-]+)\s+at\s+([\w\.-]+)\s+dot\s+([a-zA-Z]{2,})'

    # Find all matches for both patterns
    standard_emails = re.findall(standard_email_pattern, text)
    at_dot_emails = re.findall(at_dot_pattern, text)

    # Convert 'name at email dot com' format to standard email f



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "import re\nfrom typing import List\n\n# Function to test: extract_emails\n\ndef extract_emails(text: str) -> List[str]:\n    \"\"\"\n    Extracts all email addresses from a given string, including instances of 'name at email dot com'.\n    \"\"\"\n    # Regex pattern to match standard email format\n    standard_email_pattern = r'[\\w\\.-]+@[\\w\\.-]+\\.[a-zA-Z]{2,}'\n    # Regex pattern to match 'name at email dot com' format\n    at_dot_pattern = r'([\\w\\.-]+)\\s+at\\s+([\\w\\.-]+)\\s+dot\\s+([a-zA-Z]{2,})'\n\n    # Find all matches for both patterns\n    standard_emails = re.findall(standard_email_pattern, text)\n    at_dot_emails = re.findall(at_dot_pattern, text)\n\n    # Convert 'name at email dot com' format to standard email format\n    converted_emails = [f'{user}@{domain}.{tld}' for user, domain, tld in at_dot_emails]\n\n    # Combine both lists of emails\