In [1]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

from extract import (
    extract_function_names,
    extract_function_by_name,
    extract_imports,
)

from output_parser import (
    PythonCodeExtractorParser,
)

from llm_calls import (
    llm_generate_unit_tests,
    llm_generate_function_from_unit_tests,
    llm_find_bug,
    llm_fix_bug,
)

from run_tests import (
    find_first_failing_unit_test,
    FindFirstFailingUnitTestFunctionMessage,
)

from function_code_with_unit_tests import FunctionCodeWithUnitTests

llm = ChatOpenAI(temperature=0, model="gpt-4o")

query = "Extract emails (including instances of 'name at email dot com') from a text."

In [2]:
unit_test_code_as_string = llm_generate_unit_tests(llm, query)
print(unit_test_code_as_string)

# Function to be tested: extract_emails

import re
import pytest

def test_extract_emails_basic():
    text = "Contact us at support@example.com for more info."
    expected = ["support@example.com"]
    assert extract_emails(text) == expected

def test_extract_emails_multiple():
    text = "Emails: first@example.com, second@example.org, third@example.net"
    expected = ["first@example.com", "second@example.org", "third@example.net"]
    assert extract_emails(text) == expected

def test_extract_emails_with_name_at_email_dot_com():
    text = "Reach out at name at email dot com or at support@example.com"
    expected = ["name at email dot com", "support@example.com"]
    assert extract_emails(text) == expected

def test_extract_emails_mixed_formats():
    text = "Emails: first@example.com, name at email dot com, second@example.org"
    expected = ["first@example.com", "name at email dot com", "second@example.org"]
    assert extract_emails(text) == expected

def test_extract_emails_no_

In [3]:
unit_test_function_names = extract_function_names(code_string=unit_test_code_as_string)
unit_test_function_names

['test_extract_emails_basic',
 'test_extract_emails_multiple',
 'test_extract_emails_with_name_at_email_dot_com',
 'test_extract_emails_mixed_formats',
 'test_extract_emails_no_emails',
 'test_extract_emails_empty_string',
 'test_extract_emails_special_characters',
 'test_extract_emails_unicode_characters',
 'test_extract_emails_case_sensitivity',
 'test_extract_emails_multiline',
 'test_extract_emails_overlapping_matches',
 'test_extract_emails_non_matching_scenarios',
 'test_extract_emails_with_subdomains',
 'test_extract_emails_with_hyphens',
 'test_extract_emails_with_plus_signs',
 'test_extract_emails_with_numbers',
 'test_extract_emails_with_long_text',
 'test_extract_emails_with_escaped_characters',
 'test_extract_emails_with_lookahead',
 'test_extract_emails_with_lookbehind']

In [4]:
unit_test_imports = extract_imports(unit_test_code_as_string)
unit_test_imports

['import re', 'import pytest']

In [5]:
target_function_code_as_string = llm_generate_function_from_unit_tests(
    llm, unit_test_code_as_string
)
print(target_function_code_as_string)

import re
from typing import List

def extract_emails(text: str) -> List[str]:
    """
    Extracts email addresses from the given text. The function identifies both standard email formats
    and the format 'name at email dot com'.

    Parameters:
    text (str): The input text from which to extract email addresses.

    Returns:
    List[str]: A list of extracted email addresses.
    """
    email_pattern = re.compile(
        r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b|'
        r'\b[A-Za-z0-9._%+-]+ at [A-Za-z0-9.-]+ dot [A-Z|a-z]{2,}\b'
    )
    return email_pattern.findall(text)



In [6]:
function_code_with_unit_tests = FunctionCodeWithUnitTests(
    function_code=[target_function_code_as_string],
    unit_tests={
        name: [
            extract_function_by_name(
                code_string=unit_test_code_as_string, function_name=name
            )
        ]
        for name in unit_test_function_names
    },
    unit_test_imports="\n".join(unit_test_imports)
)


In [7]:
first_failing_unit_test_message = find_first_failing_unit_test(
    function_code_with_unit_tests=function_code_with_unit_tests
)
first_failing_unit_test_message.print_message_details()

Python REPL can execute arbitrary code. Use with caution.


all_tests_pass: False
unit_test_name: test_extract_emails_overlapping_matches
output: Assertion failed: 


In [8]:
bug_report_dict = llm_find_bug(
    llm=llm,
    function_code_with_unit_tests=function_code_with_unit_tests,
    first_failing_unit_test_message=first_failing_unit_test_message,
)
bug_report_dict

{'Likely Bug Location': 'Unit Test',
 'Explanation': "The unit test `test_extract_emails_overlapping_matches` is testing for overlapping email matches, which is not a typical scenario for email extraction. The expected result `['first@example.com', 'first@example.com']` suggests that the function should identify overlapping email addresses, but standard email extraction should not consider overlapping matches as separate entities. The function `extract_emails` is correctly designed to find distinct email addresses based on the given pattern. The test case should be revised to test for distinct email addresses without overlap."}

In [10]:
updated_unit_test = llm_fix_bug(
    llm=llm,
    bug_report_dict=bug_report_dict,
    function_code_with_unit_tests=function_code_with_unit_tests,
    first_failing_unit_test_message=first_failing_unit_test_message,
)
print(updated_unit_test)

def test_extract_emails_overlapping_matches():
    text = 'Emails: first@example.comfirst@example.com'
    expected = ['first@example.com']
    assert extract_emails(text) == expected

