In [1]:
from langchain_openai import ChatOpenAI

from extract import (
    extract_function_names,
    extract_function_by_name,
    extract_imports,
)

from llm_calls import (
    llm_generate_unit_tests,
    llm_generate_function_from_unit_tests,
    llm_find_bug,
    llm_fix_bug,
)

from run_tests import (
    find_first_failing_unit_test,
)

from function_code_with_unit_tests import FunctionCodeWithUnitTests

llm = ChatOpenAI(temperature=0, model="gpt-4o")

query = "Please replace all instances of 'Alice' with 'Bob', 'Bob' with 'Carol', and 'Carol' with 'Alice'. \
    Ignore case, except for Carol's name, which should only be changed if it's capitalised."

In [2]:
unit_test_code_as_string = llm_generate_unit_tests(llm, query)
print(unit_test_code_as_string)

# Function to test: replace_names(text)

import re
import pytest

def test_replace_names_basic_functionality():
    text = "Alice and Bob went to see Carol."
    expected = "Bob and Carol went to see Alice."
    assert replace_names(text) == expected

def test_replace_names_ignore_case_for_alice_and_bob():
    text = "alice and bob went to see carol."
    expected = "Bob and Carol went to see carol."
    assert replace_names(text) == expected

def test_replace_names_case_sensitive_for_carol():
    text = "Alice and Bob went to see carol."
    expected = "Bob and Carol went to see carol."
    assert replace_names(text) == expected

def test_replace_names_mixed_case():
    text = "aLiCe and bOb went to see Carol."
    expected = "Bob and Carol went to see Alice."
    assert replace_names(text) == expected

def test_replace_names_multiple_instances():
    text = "Alice, Bob, and Carol met Alice, Bob, and Carol."
    expected = "Bob, Carol, and Alice met Bob, Carol, and Alice."
    assert 

In [3]:
unit_test_function_names = extract_function_names(code_string=unit_test_code_as_string)
unit_test_function_names

['test_replace_names_basic_functionality',
 'test_replace_names_ignore_case_for_alice_and_bob',
 'test_replace_names_case_sensitive_for_carol',
 'test_replace_names_mixed_case',
 'test_replace_names_multiple_instances',
 'test_replace_names_no_match',
 'test_replace_names_empty_input',
 'test_replace_names_special_characters',
 'test_replace_names_unicode_characters',
 'test_replace_names_large_input',
 'test_replace_names_multiline_input',
 'test_replace_names_non_matching_scenarios']

In [4]:
unit_test_imports = extract_imports(unit_test_code_as_string)
unit_test_imports

['import re', 'import pytest']

In [5]:
target_function_code_as_string = llm_generate_function_from_unit_tests(
    llm, unit_test_code_as_string
)
print(target_function_code_as_string)

import re

def replace_names(text: str) -> str:
    """
    Replaces occurrences of 'Alice' with 'Bob', 'Bob' with 'Carol', and 'Carol' with 'Alice' in the given text.
    The replacement is case-insensitive for 'Alice' and 'Bob', but case-sensitive for 'Carol'.

    Parameters:
    text (str): The input text where names need to be replaced.

    Returns:
    str: The text with names replaced accordingly.
    """
    def replace(match):
        word = match.group(0)
        if re.match(r'(?i)alice', word):
            return 'Bob'
        elif re.match(r'(?i)bob', word):
            return 'Carol'
        elif word == 'Carol':
            return 'Alice'
        return word

    return re.sub(r'\b(Alice|Bob|Carol|alice|bob|carol)\b', replace, text)



In [7]:
function_code_with_unit_tests = FunctionCodeWithUnitTests(
    function_code=[target_function_code_as_string],
    unit_tests={
        name: [
            extract_function_by_name(
                code_string=unit_test_code_as_string, function_name=name
            )
        ]
        for name in unit_test_function_names
    },
    unit_test_imports="\n".join(unit_test_imports)
)


In [8]:
latest_unit_test_to_fix = ""

for i in range(10):

    first_failing_unit_test_message = find_first_failing_unit_test(
        function_code_with_unit_tests=function_code_with_unit_tests
    )
    print("")
    first_failing_unit_test_message.print_message_details()
    print("")

    if first_failing_unit_test_message.all_tests_pass:
        break

    # If it causes an error two times in a row, delete the unit test
    if latest_unit_test_to_fix == first_failing_unit_test_message.unit_test_name:
        unit_test_code_to_delete = function_code_with_unit_tests.get_unit_test_code(
            first_failing_unit_test_message.unit_test_name
        )
        print(
            f"Unable to fix unit test:\n{unit_test_code_to_delete}It will be deleted\n"
        )
        function_code_with_unit_tests.delete_unit_test_code(
            first_failing_unit_test_message.unit_test_name
        )
        continue

    latest_unit_test_to_fix = first_failing_unit_test_message.unit_test_name

    print("llm find bug call")
    bug_report_dict = llm_find_bug(
        llm=llm,
        function_code_with_unit_tests=function_code_with_unit_tests,
        first_failing_unit_test_message=first_failing_unit_test_message,
    )

    if bug_report_dict["Likely Bug Location"] == "Unit Test":
        print("llm fix unit test bug call")
        updated_unit_test_code = llm_fix_bug(
            llm=llm,
            bug_report_dict=bug_report_dict,
            function_code_with_unit_tests=function_code_with_unit_tests,
            first_failing_unit_test_message=first_failing_unit_test_message,
        )
        function_code_with_unit_tests.replace_unit_test(
            unit_test_name=first_failing_unit_test_message.unit_test_name,
            new_unit_test_code=updated_unit_test_code,
        )
    elif bug_report_dict["Likely Bug Location"] == "Function":
        print("llm fix function bug call")
        updated_function_code = llm_fix_bug(
            llm=llm,
            bug_report_dict=bug_report_dict,
            function_code_with_unit_tests=function_code_with_unit_tests,
            first_failing_unit_test_message=first_failing_unit_test_message,
        )
        function_code_with_unit_tests.replace_function_code(
            new_function_code=updated_function_code,
        )
    else:
        raise ValueError("bug_report_dict keys not as expected")


print("All tests are passing")

Python REPL can execute arbitrary code. Use with caution.



all_tests_pass: False
unit_test_name: test_replace_names_mixed_case
output: Assertion failed: 

llm find bug call
llm fix function bug call

all_tests_pass: False
unit_test_name: test_replace_names_ignore_case_for_alice_and_bob
output: Assertion failed: 

llm find bug call
llm fix unit test bug call

all_tests_pass: False
unit_test_name: test_replace_names_case_sensitive_for_carol
output: Assertion failed: 

llm find bug call
llm fix unit test bug call

all_tests_pass: False
unit_test_name: test_replace_names_mixed_case
output: Assertion failed: 

llm find bug call
llm fix function bug call

all_tests_pass: False
unit_test_name: test_replace_names_ignore_case_for_alice_and_bob
output: Assertion failed: 

llm find bug call
llm fix unit test bug call

all_tests_pass: False
unit_test_name: test_replace_names_ignore_case_for_alice_and_bob
output: Assertion failed: 

Unable to fix unit test:
def test_replace_names_ignore_case_for_alice_and_bob():
    text = 'alice and bob went to see carol

In [8]:
print(function_code_with_unit_tests.get_function_code())

import re

def replace_names(text: str) -> str:
    """
    Replaces occurrences of 'Fred' with 'George', 'George' with 'Percy', and 'Percy' with 'Fred' in the given text.
    The replacement is case-insensitive for 'Fred' and 'George', but case-sensitive for 'Percy'.

    Parameters:
    text (str): The input text where the replacements will be made.

    Returns:
    str: The text with the names replaced accordingly.
    """
    # First, replace 'Fred' with a temporary placeholder
    text = re.sub(r'\b(Fred|fred)\b', 'TEMP_FRED', text)
    # Then, replace 'George' with a different temporary placeholder
    text = re.sub(r'\b(George|george)\b', 'TEMP_GEORGE', text)
    # Then, replace 'Percy' with 'Fred'
    text = re.sub(r'\bPercy\b', 'Fred', text)
    # Replace the temporary placeholder for 'George' with 'Percy'
    text = re.sub(r'\bTEMP_GEORGE\b', 'Percy', text)
    # Finally, replace the temporary placeholder for 'Fred' with 'George'
    text = re.sub(r'\bTEMP_FRED\b', 'George',

In [9]:
for unit_test_name in function_code_with_unit_tests.unit_tests:
    print(
        function_code_with_unit_tests.get_unit_test_code(unit_test_name=unit_test_name)
    )

def test_replace_names_basic_functionality():
    text = 'Fred and George went to see Percy.'
    expected = 'George and Percy went to see Fred.'
    assert replace_names(text) == expected

def test_replace_names_ignore_case_for_fred_and_george():
    text = 'fred and george went to see percy.'
    expected = 'George and Percy went to see percy.'
    assert replace_names(text) == expected

def test_replace_names_case_sensitive_percy():
    text = 'Fred and George went to see percy.'
    expected = 'George and Percy went to see percy.'
    assert replace_names(text) == expected

def test_replace_names_multiple_instances():
    text = 'Fred, George, and Percy met Fred and George.'
    expected = 'George, Percy, and Fred met George and Percy.'
    assert replace_names(text) == expected

def test_replace_names_no_replacements():
    text = 'Harry and Ron went to see Hermione.'
    expected = 'Harry and Ron went to see Hermione.'
    assert replace_names(text) == expected

def test_replace_

Before the fix
```
def test_replace_names_overlapping_matches():
    text = "FredGeorgePercyFredGeorgePercy"
    expected = "GeorgePercyFredGeorgePercyFred"
    assert replace_names(text) == expected
```

After the fix
```
def test_replace_names_overlapping_matches():
    text = 'Fred George Percy Fred George Percy'
    expected = 'George Percy Fred George Percy Fred'
    assert replace_names(text) == expected

```


Before the fix
```
import re

def replace_names(text: str) -> str:
    """
    Replaces occurrences of 'Fred' with 'George', 'George' with 'Percy', and 'Percy' with 'Fred' in the given text.
    The replacement is case-insensitive for 'Fred' and 'George', but case-sensitive for 'Percy'.

    Parameters:
    text (str): The input text where the replacements will be made.

    Returns:
    str: The text with the names replaced accordingly.
    """
    def replace(match):
        word = match.group(0)
        if word.lower() == 'fred':
            return 'George'
        elif word.lower() == 'george':
            return 'Percy'
        elif word == 'Percy':
            return 'Fred'
        return word

    return re.sub(r'\b(Fred|George|Percy|fred|george)\b', replace, text)
```

After the fix
```
import re

def replace_names(text: str) -> str:
    """
    Replaces occurrences of 'Fred' with 'George', 'George' with 'Percy', and 'Percy' with 'Fred' in the given text.
    The replacement is case-insensitive for 'Fred' and 'George', but case-sensitive for 'Percy'.

    Parameters:
    text (str): The input text where the replacements will be made.

    Returns:
    str: The text with the names replaced accordingly.
    """
    # First, replace 'Fred' with a temporary placeholder
    text = re.sub(r'\b(Fred|fred)\b', 'TEMP_FRED', text)
    # Then, replace 'George' with a different temporary placeholder
    text = re.sub(r'\b(George|george)\b', 'TEMP_GEORGE', text)
    # Then, replace 'Percy' with 'Fred'
    text = re.sub(r'\bPercy\b', 'Fred', text)
    # Replace the temporary placeholder for 'George' with 'Percy'
    text = re.sub(r'\bTEMP_GEORGE\b', 'Percy', text)
    # Finally, replace the temporary placeholder for 'Fred' with 'George'
    text = re.sub(r'\bTEMP_FRED\b', 'George', text)
    
    return text```

In [40]:
import difflib

def string_diff(string1, string2):
    d = difflib.Differ()
    diff = list(d.compare(string1.splitlines(), string2.splitlines()))
    return '\n'.join(diff)

# Example usage
string1 = '''
def extract_emails(text: str) -> List[str]:
    """
    Extracts email addresses from the given text. Supports standard email formats
    and some common obfuscated formats like 'name at domain dot com'.

    Parameters:
    text (str): The input text from which to extract email addresses.

    Returns:
    List[str]: A list of extracted email addresses.
    """
    # Regular expression to match standard email formats
    email_pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
    # Regular expression to match obfuscated email formats like 'name at domain dot com'
    obfuscated_pattern = re.compile(r'\b([A-Za-z0-9._%+-]+)\s+at\s+([A-Za-z0-9.-]+)\s+dot\s+([A-Z|a-z]{2,})\b')

    # Find all standard email addresses
    emails = email_pattern.findall(text)
    
    # Find all obfuscated email addresses and convert them to standard format
    obfuscated_emails = obfuscated_pattern.findall(text)
    for obfuscated in obfuscated_emails:
        emails.append(f"{obfuscated[0]}@{obfuscated[1]}.{obfuscated[2]}")

    return emails
'''

string2 = '''
def extract_emails(text: str) -> List[str]:
    """
    Extracts email addresses from the given text. Supports standard email formats
    and some common obfuscated formats like 'name at domain dot com'.

    Parameters:
    text (str): The input text from which to extract email addresses.

    Returns:
    List[str]: A list of extracted email addresses.
    """
    # Regular expression to match standard email formats, including Unicode characters
    email_pattern = re.compile(r'\b[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{1,}\b', re.UNICODE)
    # Regular expression to match obfuscated email formats like 'name at domain dot com'
    obfuscated_pattern = re.compile(r'\b([\w.%+-]+)\s+at\s+([A-Za-z0-9.-]+)\s+dot\s+([A-Z|a-z]{1,})\b', re.UNICODE)

    # Find all standard email addresses
    emails = email_pattern.findall(text)
    
    # Find all obfuscated email addresses and convert them to standard format
    obfuscated_emails = obfuscated_pattern.findall(text)
    for obfuscated in obfuscated_emails:
        emails.append(f"{obfuscated[0]}@{obfuscated[1]}.{obfuscated[2]}")

    return emails
'''

print(string_diff(string1, string2))

  
  def extract_emails(text: str) -> List[str]:
      """
      Extracts email addresses from the given text. Supports standard email formats
      and some common obfuscated formats like 'name at domain dot com'.
  
      Parameters:
      text (str): The input text from which to extract email addresses.
  
      Returns:
      List[str]: A list of extracted email addresses.
      """
-     # Regular expression to match standard email formats
+     # Regular expression to match standard email formats, including Unicode characters
?                                                         ++++++++++++++++++++++++++++++

-     email_pattern = re.compile(r[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,')
?                                    ^^^^^^^^^ -                                ^

+     email_pattern = re.compile(r[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{1,', re.UNICODE)
?                                    ^^                                 ^    ++++++++++++

      # Regular expression t