In [19]:
from langchain_openai import ChatOpenAI

from extract import (
    extract_function_names,
    extract_function_by_name,
    extract_imports,
)

from llm_calls import (
    llm_generate_unit_tests,
    llm_generate_function_from_unit_tests,
    llm_find_bug,
    llm_fix_bug,
)

from run_tests import (
    find_first_failing_unit_test,
    run_unit_test,
)

from function_code_with_unit_tests import FunctionCodeWithUnitTests

llm = ChatOpenAI(temperature=0, model="gpt-4o")

# query = "Extract emails (including instances of 'name at email dot com') from a text."
query = "Swap the words 'cat' and 'dog' in the text."

# query = "Please replace all instances of 'Alice' with 'Bob', 'Bob' with 'Carol', and 'Carol' with 'Alice'. \
#     Whatever the case in the input string, the output should all be lower case."

In [20]:
unit_test_code_as_string = llm_generate_unit_tests(llm, query)
print(unit_test_code_as_string)

# swap_cat_and_dog

import re
import pytest

def test_swap_cat_and_dog_basic():
    text = "I have a cat and a dog."
    expected = "I have a dog and a cat."
    result = swap_cat_and_dog(text)
    assert result == expected, f"Expected '{expected}', but got '{result}'"

def test_swap_cat_and_dog_multiple_occurrences():
    text = "The cat chased the dog, and the dog chased the cat."
    expected = "The dog chased the cat, and the cat chased the dog."
    result = swap_cat_and_dog(text)
    assert result == expected, f"Expected '{expected}', but got '{result}'"

def test_swap_cat_and_dog_no_occurrences():
    text = "There are no pets here."
    expected = "There are no pets here."
    result = swap_cat_and_dog(text)
    assert result == expected, f"Expected '{expected}', but got '{result}'"

def test_swap_cat_and_dog_only_cat():
    text = "The cat is sleeping."
    expected = "The dog is sleeping."
    result = swap_cat_and_dog(text)
    assert result == expected, f"Expected '{expecte

In [21]:
unit_test_function_names = extract_function_names(code_string=unit_test_code_as_string)
unit_test_function_names

['test_swap_cat_and_dog_basic',
 'test_swap_cat_and_dog_multiple_occurrences',
 'test_swap_cat_and_dog_no_occurrences',
 'test_swap_cat_and_dog_only_cat',
 'test_swap_cat_and_dog_only_dog',
 'test_swap_cat_and_dog_case_sensitivity',
 'test_swap_cat_and_dog_special_characters',
 'test_swap_cat_and_dog_unicode_characters',
 'test_swap_cat_and_dog_empty_string',
 'test_swap_cat_and_dog_large_input',
 'test_swap_cat_and_dog_multiline_input',
 'test_swap_cat_and_dog_word_boundaries',
 'test_swap_cat_and_dog_lookahead_lookbehind',
 'test_swap_cat_and_dog_non_matching_scenario',
 'test_swap_cat_and_dog_escape_sequences']

In [22]:
unit_test_imports = extract_imports(unit_test_code_as_string)
unit_test_imports

['import re', 'import pytest']

In [23]:
target_function_code_as_string = llm_generate_function_from_unit_tests(
    llm=llm,
    unit_test_code_as_string=unit_test_code_as_string,
    query=query,
)
print(target_function_code_as_string)

import re

def swap_cat_and_dog(text: str) -> str:
    """
    Swap the words 'cat' and 'dog' in the given text.

    Parameters:
    text (str): The input text where 'cat' and 'dog' need to be swapped.

    Returns:
    str: The text with 'cat' and 'dog' swapped.
    """
    return re.sub(r'\bcat\b|\bdog\b', lambda match: 'dog' if match.group() == 'cat' else 'cat', text)



In [24]:
function_code_with_unit_tests = FunctionCodeWithUnitTests(
    function_code=[target_function_code_as_string],
    unit_tests={
        name: [
            extract_function_by_name(
                code_string=unit_test_code_as_string, function_name=name
            )
        ]
        for name in unit_test_function_names
    },
    unit_test_imports="\n".join(unit_test_imports)
)


In [26]:
for i in range(10):

    first_failing_unit_test_message = find_first_failing_unit_test(
        function_code_with_unit_tests=function_code_with_unit_tests
    )

    first_failing_unit_test_message.print_message_details()

    if first_failing_unit_test_message.test_pass_status:
        print("All tests are passing")
        break

    print("Ask llm if the function or unit test needs to be updated.")
    bug_report_dict = llm_find_bug(
        llm=llm,
        query=query,
        function_code_with_unit_tests=function_code_with_unit_tests,
        first_failing_unit_test_message=first_failing_unit_test_message,
    )

    if bug_report_dict["Likely Bug Location"] == "Unit Test":
        
        print("The llm said the unit test should be updated.\nAsk the llm to fix the unit test.\n")
        updated_unit_test_code = llm_fix_bug(
            llm=llm,
            query=query,
            bug_report_dict=bug_report_dict,
            function_code_with_unit_tests=function_code_with_unit_tests,
            first_failing_unit_test_message=first_failing_unit_test_message,
        )
        
        print("Unit test before update;")
        print(function_code_with_unit_tests.get_unit_test_code(first_failing_unit_test_message.unit_test_name) + "\n")
        
        function_code_with_unit_tests.replace_unit_test(
            unit_test_name=first_failing_unit_test_message.unit_test_name,
            new_unit_test_code=updated_unit_test_code,
        )
        
        print("Unit test after update;")
        print(function_code_with_unit_tests.get_unit_test_code(first_failing_unit_test_message.unit_test_name) + "\n")

    elif bug_report_dict["Likely Bug Location"] == "Function":
        
        print("The llm said the function should be updated.\nAsk the llm to fix the function.\n")
        updated_function_code = llm_fix_bug(
            llm=llm,
            query=query,
            bug_report_dict=bug_report_dict,
            function_code_with_unit_tests=function_code_with_unit_tests,
            first_failing_unit_test_message=first_failing_unit_test_message,
        )
        
        print("Function before update;")
        print(function_code_with_unit_tests.get_function_code() + "\n")
        
        function_code_with_unit_tests.replace_function_code(
            new_function_code=updated_function_code,
        )
        
        print("Function after update;")
        print(function_code_with_unit_tests.get_function_code() + "\n")
        
    else:
        raise ValueError("bug_report_dict keys not as expected")

    unit_test_message = run_unit_test(
        function_code_with_unit_tests=function_code_with_unit_tests,
        unit_test_name=first_failing_unit_test_message.unit_test_name,
    )
    
    if unit_test_message.test_pass_status:
    
        print(f"Unit test {first_failing_unit_test_message.unit_test_name}() now passing.")
    
    else:
    
        unit_test_code_to_delete = function_code_with_unit_tests.get_unit_test_code(
            first_failing_unit_test_message.unit_test_name
        )
        print(
            f"Unable to make this unit test pass\n{unit_test_code_to_delete}It will be deleted\n"
        )
        function_code_with_unit_tests.delete_unit_test_code(
            first_failing_unit_test_message.unit_test_name
        )
        if bug_report_dict["Likely Bug Location"] == "Function":
            print("Roll back update to function.")
            function_code_with_unit_tests.delete_function_changes()


if first_failing_unit_test_message.test_pass_status == False:
    print("Some tests are still not passing")

unit test test_swap_cat_and_dog_basic() is currently passing.
unit test test_swap_cat_and_dog_multiple_occurrences() is currently passing.
unit test test_swap_cat_and_dog_no_occurrences() is currently passing.
unit test test_swap_cat_and_dog_only_cat() is currently passing.
unit test test_swap_cat_and_dog_only_dog() is currently passing.
unit test test_swap_cat_and_dog_case_sensitivity() is currently passing.
unit test test_swap_cat_and_dog_special_characters() is currently passing.
unit test test_swap_cat_and_dog_unicode_characters() is currently passing.
unit test test_swap_cat_and_dog_empty_string() is currently passing.
unit test test_swap_cat_and_dog_large_input() is currently passing.
unit test test_swap_cat_and_dog_multiline_input() is currently passing.
unit test test_swap_cat_and_dog_word_boundaries() is currently passing.

test_pass_status: False
unit_test_name: test_swap_cat_and_dog_lookahead_lookbehind
output: Assertion failed: Expected 'dogcat', but got 'catdog'

Ask llm i

In [18]:
import re
def test_extract_cat_and_dog_basic():
    text = 'I have a cat and a dog.'
    pattern = '\\b(cat|dog)\\b'
    matches = re.findall(pattern, text)
    assert matches == ['cat', 'dog'
        ], f"Expected ['cat', 'dog'], but got {matches}"



test_extract_cat_and_dog_basic()

In [8]:
print(function_code_with_unit_tests.get_function_code())

import re
from typing import List

def extract_emails(text: str) -> List[str]:
    """
    Extract emails from a given text, including instances of 'name at email dot com'.

    Parameters:
    text (str): The input text from which to extract emails.

    Returns:
    List[str]: A list of extracted email addresses.
    """
    # Convert 'name at email dot com' to 'name@email.com'
    text = re.sub(r'(\S+) at (\S+) dot (\S+)', r'\1@\2.\3', text)
    
    # Regular expression to match standard email formats
    email_pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
    
    return email_pattern.findall(text)



In [9]:
for unit_test_name in function_code_with_unit_tests.unit_tests:
    print(
        function_code_with_unit_tests.get_unit_test_code(unit_test_name=unit_test_name)
    )

def test_extract_emails_basic():
    text = 'Contact us at support@example.com for more info.'
    expected = ['support@example.com']
    assert extract_emails(text) == expected

def test_extract_emails_multiple():
    text = 'Emails: first@example.com, second@example.org, third@example.net'
    expected = ['first@example.com', 'second@example.org', 'third@example.net']
    assert extract_emails(text) == expected

def test_extract_emails_with_name_at_email_dot_com():
    text = 'Reach out at john.doe at example dot com for details.'
    expected = ['john.doe@example.com']
    assert extract_emails(text) == expected

def test_extract_emails_mixed_formats():
    text = 'Emails: jane.doe@example.com, jane.doe at example dot com'
    expected = ['jane.doe@example.com', 'jane.doe@example.com']
    assert extract_emails(text) == expected

def test_extract_emails_no_emails():
    text = 'There are no emails in this text.'
    expected = []
    assert extract_emails(text) == expected

def test

Before the fix
```
import re
from typing import List

def extract_emails(text: str) -> List[str]:
    """
    Extracts email addresses from the given text. Supports standard email formats
    and some common obfuscated formats like 'name at domain dot com'.

    Parameters:
    text (str): The input text from which to extract email addresses.

    Returns:
    List[str]: A list of extracted email addresses.
    """
    # Regular expression to match standard email formats
    email_pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
    # Regular expression to match obfuscated email formats like 'name at domain dot com'
    obfuscated_pattern = re.compile(r'\b([A-Za-z0-9._%+-]+)\s+at\s+([A-Za-z0-9.-]+)\s+dot\s+([A-Z|a-z]{2,})\b')

    # Find all standard email addresses
    emails = email_pattern.findall(text)
    
    # Find all obfuscated email addresses and convert them to standard format
    obfuscated_emails = obfuscated_pattern.findall(text)
    for obfuscated in obfuscated_emails:
        emails.append(f"{obfuscated[0]}@{obfuscated[1]}.{obfuscated[2]}")

    return emails
```

After the fix
```
import re
from typing import List

def extract_emails(text: str) -> List[str]:
    """
    Extracts email addresses from the given text. Supports standard email formats
    and some common obfuscated formats like 'name at domain dot com'.

    Parameters:
    text (str): The input text from which to extract email addresses.

    Returns:
    List[str]: A list of extracted email addresses.
    """
    # Regular expression to match standard email formats, including Unicode characters
    email_pattern = re.compile(r'\b[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{1,}\b', re.UNICODE)
    # Regular expression to match obfuscated email formats like 'name at domain dot com'
    obfuscated_pattern = re.compile(r'\b([\w.%+-]+)\s+at\s+([A-Za-z0-9.-]+)\s+dot\s+([A-Z|a-z]{1,})\b', re.UNICODE)

    # Find all standard email addresses
    emails = email_pattern.findall(text)
    
    # Find all obfuscated email addresses and convert them to standard format
    obfuscated_emails = obfuscated_pattern.findall(text)
    for obfuscated in obfuscated_emails:
        emails.append(f"{obfuscated[0]}@{obfuscated[1]}.{obfuscated[2]}")

    return emails
```


Before the fix
```
def test_extract_emails_overlapping():
    text = "Overlapping: user@example.comuser@example.com"
    expected = ["user@example.com", "user@example.com"]
    assert extract_emails(text) == expected
```

After the fix
```
def test_extract_emails_overlapping():
    text = 'Overlapping: user@example.com, user@example.com'
    expected = ['user@example.com', 'user@example.com']
    assert extract_emails(text) == expected
```

In [10]:
import difflib

def string_diff(string1, string2):
    d = difflib.Differ()
    diff = list(d.compare(string1.splitlines(), string2.splitlines()))
    return '\n'.join(diff)

# Example usage
string1 = '''
def extract_emails(text: str) -> List[str]:
    """
    Extracts email addresses from the given text. Supports standard email formats
    and some common obfuscated formats like 'name at domain dot com'.

    Parameters:
    text (str): The input text from which to extract email addresses.

    Returns:
    List[str]: A list of extracted email addresses.
    """
    # Regular expression to match standard email formats
    email_pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
    # Regular expression to match obfuscated email formats like 'name at domain dot com'
    obfuscated_pattern = re.compile(r'\b([A-Za-z0-9._%+-]+)\s+at\s+([A-Za-z0-9.-]+)\s+dot\s+([A-Z|a-z]{2,})\b')

    # Find all standard email addresses
    emails = email_pattern.findall(text)
    
    # Find all obfuscated email addresses and convert them to standard format
    obfuscated_emails = obfuscated_pattern.findall(text)
    for obfuscated in obfuscated_emails:
        emails.append(f"{obfuscated[0]}@{obfuscated[1]}.{obfuscated[2]}")

    return emails
'''

string2 = '''
def extract_emails(text: str) -> List[str]:
    """
    Extracts email addresses from the given text. Supports standard email formats
    and some common obfuscated formats like 'name at domain dot com'.

    Parameters:
    text (str): The input text from which to extract email addresses.

    Returns:
    List[str]: A list of extracted email addresses.
    """
    # Regular expression to match standard email formats, including Unicode characters
    email_pattern = re.compile(r'\b[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{1,}\b', re.UNICODE)
    # Regular expression to match obfuscated email formats like 'name at domain dot com'
    obfuscated_pattern = re.compile(r'\b([\w.%+-]+)\s+at\s+([A-Za-z0-9.-]+)\s+dot\s+([A-Z|a-z]{1,})\b', re.UNICODE)

    # Find all standard email addresses
    emails = email_pattern.findall(text)
    
    # Find all obfuscated email addresses and convert them to standard format
    obfuscated_emails = obfuscated_pattern.findall(text)
    for obfuscated in obfuscated_emails:
        emails.append(f"{obfuscated[0]}@{obfuscated[1]}.{obfuscated[2]}")

    return emails
'''

print(string_diff(string1, string2))

  
  def extract_emails(text: str) -> List[str]:
      """
      Extracts email addresses from the given text. Supports standard email formats
      and some common obfuscated formats like 'name at domain dot com'.
  
      Parameters:
      text (str): The input text from which to extract email addresses.
  
      Returns:
      List[str]: A list of extracted email addresses.
      """
-     # Regular expression to match standard email formats
+     # Regular expression to match standard email formats, including Unicode characters
?                                                         ++++++++++++++++++++++++++++++

-     email_pattern = re.compile(r[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,')
?                                    ^^^^^^^^^ -                                ^

+     email_pattern = re.compile(r[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{1,', re.UNICODE)
?                                    ^^                                 ^    ++++++++++++

      # Regular expression t