In [58]:
import re

from typing import List

from langchain_openai import ChatOpenAI
from langchain_experimental.tools import PythonREPLTool
from langchain_core.prompts import ChatPromptTemplate

from extract import (
    extract_function_names,
    extract_function_by_name,
    extract_imports,
    is_valid_python,
)

python_repl = PythonREPLTool()

llm = ChatOpenAI(temperature=0, model="gpt-4o")

In [9]:
unit_test_system_prompt_string = """
You are a specialized assistant designed to create thorough unit tests for functions involving regular expressions (regex). \
Your task is to generate comprehensive, self-contained unit tests based on a natural language \
description of the function's intended behavior.

Instructions:
1. Carefully analyze the given description.
2. Create diverse unit tests that cover all aspects of the function's expected behavior, including:
   - Basic functionality
   - Edge cases
   - Corner cases
   - Boundary conditions
   - Invalid inputs
   - Empty inputs
   - Large inputs
   - Special characters
   - Unicode characters (if applicable)
   - Case sensitivity (if applicable)
   - Multiline inputs (if applicable)
   - Overlapping matches
   - Non-matching scenarios
3. If you need to use a unit test framework, use pytest.
4. Write individual test functions, not a test class.
5. Use descriptive test names that clearly indicate the specific scenario being tested.
6. Include detailed assertions that thoroughly check expected outcomes.
7. For regex functions, consider testing:
   - Pattern matching accuracy
   - Capturing groups
   - Non-capturing groups
   - Lookahead and lookbehind assertions
   - Greedy vs. non-greedy quantifiers
   - Character classes and negated character classes
   - Anchors (start, end, word boundaries)
   - Flags (e.g., re.IGNORECASE, re.MULTILINE, re.DOTALL)
   - Escape sequences
8. Do not write the actual function implementation or code to run the tests.
9. Write the name of the function you are testing as a comment at the top of the code block (do not write a place holder function).
10. Generate at least 15-20 diverse tests to ensure comprehensive coverage.

Provide your response as a Python code block containing only the unit tests. \
Ensure that the tests are varied and cover a wide range of scenarios to thoroughly validate the regex function.

Example input: "Create tests for a function that extracts all valid email addresses from a given text."

Your task is to generate appropriate unit tests based on similar natural language descriptions, \
focusing on comprehensive testing of regex functionality.
"""

unit_test_prompt = ChatPromptTemplate.from_messages(
    [("system", unit_test_system_prompt_string), ("user", "{input}")]
)

unit_test_chain = unit_test_prompt | llm
unit_test_llm_message = unit_test_chain.invoke(
    {
        "input": "Extract emails (including instances of 'name at email dot com') from a text."
    }
)

In [10]:
print(unit_test_llm_message.content)

```python
# Function to be tested: extract_emails

import re
import pytest

def test_extract_emails_basic():
    text = "Contact us at support@example.com for more info."
    expected = ["support@example.com"]
    assert extract_emails(text) == expected

def test_extract_emails_multiple():
    text = "Emails: first@example.com, second@example.org, third@example.net"
    expected = ["first@example.com", "second@example.org", "third@example.net"]
    assert extract_emails(text) == expected

def test_extract_emails_with_name_at_email_dot_com():
    text = "Please email john at example dot com for details."
    expected = ["john@example.com"]
    assert extract_emails(text) == expected

def test_extract_emails_mixed_formats():
    text = "Reach out at jane.doe@example.com or jane at example dot com."
    expected = ["jane.doe@example.com", "jane@example.com"]
    assert extract_emails(text) == expected

def test_extract_emails_no_emails():
    text = "There are no email addresses here."
  

In [11]:
def extract_python_code(text):
    pattern = r'```python\n(.*?)```'
    code_block = re.findall(pattern, text, re.DOTALL)[0]
    is_valid_python(code_block)
    return code_block

unit_test_code_as_string = extract_python_code(unit_test_llm_message.content)
print(unit_test_code_as_string)

# Function to be tested: extract_emails

import re
import pytest

def test_extract_emails_basic():
    text = "Contact us at support@example.com for more info."
    expected = ["support@example.com"]
    assert extract_emails(text) == expected

def test_extract_emails_multiple():
    text = "Emails: first@example.com, second@example.org, third@example.net"
    expected = ["first@example.com", "second@example.org", "third@example.net"]
    assert extract_emails(text) == expected

def test_extract_emails_with_name_at_email_dot_com():
    text = "Please email john at example dot com for details."
    expected = ["john@example.com"]
    assert extract_emails(text) == expected

def test_extract_emails_mixed_formats():
    text = "Reach out at jane.doe@example.com or jane at example dot com."
    expected = ["jane.doe@example.com", "jane@example.com"]
    assert extract_emails(text) == expected

def test_extract_emails_no_emails():
    text = "There are no email addresses here."
    expected

In [12]:
unit_test_function_names = extract_function_names(code_string=unit_test_code_as_string)
unit_test_function_names

['test_extract_emails_basic',
 'test_extract_emails_multiple',
 'test_extract_emails_with_name_at_email_dot_com',
 'test_extract_emails_mixed_formats',
 'test_extract_emails_no_emails',
 'test_extract_emails_invalid_emails',
 'test_extract_emails_edge_case',
 'test_extract_emails_with_special_characters',
 'test_extract_emails_with_unicode',
 'test_extract_emails_case_insensitive',
 'test_extract_emails_multiline',
 'test_extract_emails_overlapping',
 'test_extract_emails_with_lookahead',
 'test_extract_emails_with_lookbehind',
 'test_extract_emails_greedy_vs_nongreedy',
 'test_extract_emails_with_anchors',
 'test_extract_emails_with_flags',
 'test_extract_emails_escape_sequences',
 'test_extract_emails_large_input',
 'test_extract_emails_empty_input']

In [13]:
unit_test_imports = extract_imports(unit_test_code_as_string)
unit_test_imports

['import re', 'import pytest']

In [14]:
function_writer_system_prompt_string = """
You are a Python code generation assistant. Your task is to create a Python function that satisfies all the provided pytest unit tests. Follow these guidelines:

1. Analyze the given unit tests carefully to understand the function's required behavior.
2. Write a single Python function that passes all the provided tests.
3. Use type hints for parameters and return values.
4. Include a clear and concise docstring explaining the function's purpose and parameters.
5. Follow Python best practices and PEP 8 style guidelines.
6. Do not include comments within the function body.
7. Ensure the function handles all edge cases and scenarios covered in the tests.
8. If the tests imply the use of regular expressions, import the 're' module and use it appropriately.
9. Provide only the function definition and its implementation, nothing else.

Your response should be a Python code block containing only the requested function.
"""

function_writer_prompt = ChatPromptTemplate.from_messages(
    [("system", function_writer_system_prompt_string), ("user", "{input}")]
)

function_writer_chain = function_writer_prompt | llm
function_writer_llm_message = function_writer_chain.invoke(
    {
        "input": unit_test_code_as_string
    }
)

In [15]:
# print(function_writer_llm_message.content)
target_function_code_as_string = extract_python_code(function_writer_llm_message.content)
print(target_function_code_as_string)

import re
from typing import List

def extract_emails(text: str) -> List[str]:
    """
    Extracts email addresses from the given text. Supports both standard email formats
    and the 'name at domain dot com' format.

    Args:
        text (str): The input text containing potential email addresses.

    Returns:
        List[str]: A list of extracted email addresses.
    """
    standard_emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
    converted_emails = re.findall(r'\b([A-Za-z0-9._%+-]+) at ([A-Za-z0-9.-]+) dot ([A-Z|a-z]{2,})\b', text)
    converted_emails = [f"{user}@{domain}.{tld}" for user, domain, tld in converted_emails]
    return standard_emails + converted_emails



In [83]:
target_function_and_unit_test_code_as_string = f"""
{target_function_code_as_string}

{unit_test_code_as_string}

"""
print(target_function_and_unit_test_code_as_string)


import re
from typing import List

def extract_emails(text: str) -> List[str]:
    """
    Extracts email addresses from the given text. Supports both standard email formats
    and the 'name at domain dot com' format.

    Args:
        text (str): The input text containing potential email addresses.

    Returns:
        List[str]: A list of extracted email addresses.
    """
    standard_emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
    converted_emails = re.findall(r'\b([A-Za-z0-9._%+-]+) at ([A-Za-z0-9.-]+) dot ([A-Z|a-z]{2,})\b', text)
    converted_emails = [f"{user}@{domain}.{tld}" for user, domain, tld in converted_emails]
    return standard_emails + converted_emails


# Function to be tested: extract_emails

import re
import pytest

def test_extract_emails_basic():
    text = "Contact us at support@example.com for more info."
    expected = ["support@example.com"]
    assert extract_emails(text) == expected

def test_extract_emails_multip

In [72]:
def failing_unit_tests(
    target_function_and_unit_test_code_as_string: str,
) -> List[str]:
    """
    returns a list of strings with the unit tests that didn't pass, including the error.
    If all tests pass, this function will return an empty list.
    """

    # The first function is the target function
    unit_test_names = extract_function_names(
        target_function_and_unit_test_code_as_string
    )[1:]

    test_function_pass_status_and_message = []

    for t in unit_test_names:
        # Example code to execute
        code_to_run = f"""
        {target_function_and_unit_test_code_as_string}

try:
    {t}()
    message = "test passed"
except AssertionError as e:
    message = "Assertion failed: " + str(e)
except Exception as e:
    message = "Error: " + str(e)

print(message)
        """

        # Execute the code
        # print(code_to_run)
        output = python_repl.run(code_to_run)[:-1]
        test_function_pass_status_and_message.append(t + "(): " + output)

    return [
        i
        for i in test_function_pass_status_and_message
        if not i.endswith("test passed")
    ]

In [84]:
failing_unit_tests_and_error_message = failing_unit_tests(target_function_and_unit_test_code_as_string)
failing_unit_tests_and_error_message


['test_extract_emails_with_unicode(): Assertion failed: ',
 'test_extract_emails_overlapping(): Assertion failed: ']

In [91]:
def fixer_llm(
    llm,
    target_function_and_unit_test_code_as_string: str,
    failing_unit_tests_and_error_message: List[str],
) -> str:

    code_fixer_system_prompt_string = """
    You are a Python code generation assistant.
    You will be given a function and many unit tests to test that function.
    You will be told which unit tests have failed.

    Instructions;

    1. Consider carefully the function and unit tests to determine what went wrong. The error could be caused by a bug in the function, or a bug in the unit test.
    2. Rewrite the function and all the unit tests correctly, so that all the unit tests will pass.
    3. Do not write and code to run the tests.

    Your response should be a single Python code block containing only the requested function and unit tests.
    """

    failed_unit_tests = "\n".join(failing_unit_tests_and_error_message)

    code_fixer_input_string = f"""
    {target_function_and_unit_test_code_as_string}

    {failed_unit_tests}
    """

    code_fixer_prompt = ChatPromptTemplate.from_messages(
        [("system", code_fixer_system_prompt_string), ("user", "{input}")]
    )

    code_fixer_chain = code_fixer_prompt | llm
    code_fixer_llm_message = code_fixer_chain.invoke({"input": code_fixer_input_string})
    code_as_string = extract_python_code(code_fixer_llm_message.content)
    return code_as_string

In [92]:
updated_target_function_and_unit_test_code_as_string = fixer_llm(
    llm=llm,
    target_function_and_unit_test_code_as_string=target_function_and_unit_test_code_as_string,
    failing_unit_tests_and_error_message=failing_unit_tests_and_error_message,
)
print(updated_target_function_and_unit_test_code_as_string)

import re
from typing import List

def extract_emails(text: str) -> List[str]:
    """
    Extracts email addresses from the given text. Supports both standard email formats
    and the 'name at domain dot com' format.

    Args:
        text (str): The input text containing potential email addresses.

    Returns:
        List[str]: A list of extracted email addresses.
    """
    standard_emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
    converted_emails = re.findall(r'\b([A-Za-z0-9._%+-]+) at ([A-Za-z0-9.-]+) dot ([A-Z|a-z]{2,})\b', text)
    converted_emails = [f"{user}@{domain}.{tld}" for user, domain, tld in converted_emails]
    return standard_emails + converted_emails


# Function to be tested: extract_emails

import pytest

def test_extract_emails_basic():
    text = "Contact us at support@example.com for more info."
    expected = ["support@example.com"]
    assert extract_emails(text) == expected

def test_extract_emails_multiple():
    t

In [93]:
failing_unit_tests_and_error_message = failing_unit_tests(updated_target_function_and_unit_test_code_as_string)
failing_unit_tests_and_error_message


['test_extract_emails_with_unicode(): Assertion failed: ',
 'test_extract_emails_overlapping(): Assertion failed: ']