In [24]:
from langchain_openai import ChatOpenAI

from extract import (
    extract_function_names,
    extract_function_by_name,
    extract_imports,
)

from llm_calls import (
    llm_generate_unit_tests,
    llm_generate_function_from_unit_tests,
    llm_find_bug,
    llm_fix_bug,
)

from run_tests import (
    find_first_failing_unit_test,
    run_unit_test,
)

from function_code_with_unit_tests import FunctionCodeWithUnitTests

llm = ChatOpenAI(temperature=0, model="gpt-4o")

# query = "Extract emails (including instances of 'name at email dot com') from a text."
# query = "Swap the words 'cat' and 'dog' in the text."

query = """
For a given string, please extract all the email addresses.
Including instances of 'name at email dot com', for example "cat at gmail dot com" should extract "cat@gmail.com"
and "dog at hotmail dot com" should extract "dog@hotmail.com". Should should work for any email domain.
    """

In [25]:
unit_test_code_as_string = llm_generate_unit_tests(llm, query)
print(unit_test_code_as_string)

# Function to be tested: replace_names

import re
import pytest

def test_replace_names_alice_first_word():
    input_text = "Alice went to the park. Alice met Bob and Carol."
    expected_output = "Bob went to the park. Bob met Carol and Alice."
    assert replace_names(input_text) == expected_output, f"Expected: {expected_output}, but got: {replace_names(input_text)}"

def test_replace_names_bob_first_word():
    input_text = "Bob went to the park. Bob met Alice and Carol."
    expected_output = "Bobby went to the park. Bobby met Alice and Carol."
    assert replace_names(input_text) == expected_output, f"Expected: {expected_output}, but got: {replace_names(input_text)}"

def test_replace_names_david_present():
    input_text = "David went to the park. David met Alice and Bob."
    expected_output = "🦇 went to the park. 🦇 met Alice and Bob."
    assert replace_names(input_text) == expected_output, f"Expected: {expected_output}, but got: {replace_names(input_text)}"

def test_replace_

In [26]:
unit_test_function_names = extract_function_names(code_string=unit_test_code_as_string)
unit_test_function_names

['test_replace_names_alice_first_word',
 'test_replace_names_bob_first_word',
 'test_replace_names_david_present',
 'test_replace_names_alice_case_insensitive',
 'test_replace_names_bob_case_insensitive',
 'test_replace_names_david_case_insensitive',
 'test_replace_names_no_replacements',
 'test_replace_names_empty_string',
 'test_replace_names_special_characters',
 'test_replace_names_multiline',
 'test_replace_names_unicode_characters',
 'test_replace_names_partial_match',
 'test_replace_names_word_boundaries',
 'test_replace_names_mixed_case',
 'test_replace_names_no_names']

In [27]:
unit_test_imports = extract_imports(unit_test_code_as_string)
unit_test_imports

['import re', 'import pytest']

In [28]:
target_function_code_as_string = llm_generate_function_from_unit_tests(
    llm=llm,
    unit_test_code_as_string=unit_test_code_as_string,
    query=query,
)
print(target_function_code_as_string)

import re

def replace_names(input_text: str) -> str:
    def replace_case_insensitive(text: str, old: str, new: str) -> str:
        return re.sub(r'\b' + re.escape(old) + r'\b', new, text, flags=re.IGNORECASE)
    
    if re.search(r'\bDavid\b', input_text, re.IGNORECASE):
        input_text = replace_case_insensitive(input_text, 'David', '🦇')
        return input_text

    first_word = re.match(r'\b\w+\b', input_text, re.IGNORECASE)
    if first_word:
        first_word = first_word.group(0).lower()
        if first_word == 'alice':
            input_text = replace_case_insensitive(input_text, 'Alice', 'TEMP_ALICE')
            input_text = replace_case_insensitive(input_text, 'Bob', 'Carol')
            input_text = replace_case_insensitive(input_text, 'Carol', 'Alice')
            input_text = replace_case_insensitive(input_text, 'TEMP_ALICE', 'Bob')
        elif first_word == 'bob':
            input_text = replace_case_insensitive(input_text, 'Bob', 'Bobby')
    
    return inpu

In [29]:
function_code_with_unit_tests = FunctionCodeWithUnitTests(
    function_code=[target_function_code_as_string],
    unit_tests={
        name: [
            extract_function_by_name(
                code_string=unit_test_code_as_string, function_name=name
            )
        ]
        for name in unit_test_function_names
    },
    unit_test_imports="\n".join(unit_test_imports)
)


In [30]:
for i in range(10):

    first_failing_unit_test_message = find_first_failing_unit_test(
        function_code_with_unit_tests=function_code_with_unit_tests
    )

    first_failing_unit_test_message.print_message_details()

    if first_failing_unit_test_message.test_pass_status:
        print("All tests are passing")
        break

    print("Ask llm if the function or unit test needs to be updated.")
    bug_report_dict = llm_find_bug(
        llm=llm,
        query=query,
        function_code_with_unit_tests=function_code_with_unit_tests,
        first_failing_unit_test_message=first_failing_unit_test_message,
    )

    if bug_report_dict["Likely Bug Location"] == "Unit Test":
        
        print("The llm said the unit test should be updated.\nAsk the llm to fix the unit test.\n")
        updated_unit_test_code = llm_fix_bug(
            llm=llm,
            query=query,
            bug_report_dict=bug_report_dict,
            function_code_with_unit_tests=function_code_with_unit_tests,
            first_failing_unit_test_message=first_failing_unit_test_message,
        )
        
        print("Unit test before update;")
        print(function_code_with_unit_tests.get_unit_test_code(first_failing_unit_test_message.unit_test_name) + "\n")
        
        function_code_with_unit_tests.replace_unit_test(
            unit_test_name=first_failing_unit_test_message.unit_test_name,
            new_unit_test_code=updated_unit_test_code,
        )
        
        print("Unit test after update;")
        print(function_code_with_unit_tests.get_unit_test_code(first_failing_unit_test_message.unit_test_name) + "\n")

    elif bug_report_dict["Likely Bug Location"] == "Function":
        
        print("The llm said the function should be updated.\nAsk the llm to fix the function.\n")
        updated_function_code = llm_fix_bug(
            llm=llm,
            query=query,
            bug_report_dict=bug_report_dict,
            function_code_with_unit_tests=function_code_with_unit_tests,
            first_failing_unit_test_message=first_failing_unit_test_message,
        )
        
        print("Function before update;")
        print(function_code_with_unit_tests.get_function_code() + "\n")
        
        function_code_with_unit_tests.replace_function_code(
            new_function_code=updated_function_code,
        )
        
        print("Function after update;")
        print(function_code_with_unit_tests.get_function_code() + "\n")
        
    else:
        raise ValueError("bug_report_dict keys not as expected")

    unit_test_message = run_unit_test(
        function_code_with_unit_tests=function_code_with_unit_tests,
        unit_test_name=first_failing_unit_test_message.unit_test_name,
    )
    
    if unit_test_message.test_pass_status:
    
        print(f"Unit test {first_failing_unit_test_message.unit_test_name}() now passing.")
    
    else:
    
        unit_test_code_to_delete = function_code_with_unit_tests.get_unit_test_code(
            first_failing_unit_test_message.unit_test_name
        )
        print(
            f"Unable to make this unit test pass\n{unit_test_code_to_delete}It will be deleted\n"
        )
        function_code_with_unit_tests.delete_unit_test_code(
            first_failing_unit_test_message.unit_test_name
        )
        if bug_report_dict["Likely Bug Location"] == "Function":
            print("Roll back update to function.")
            function_code_with_unit_tests.delete_function_changes()


if first_failing_unit_test_message.test_pass_status == False:
    print("Some tests are still not passing")


test_pass_status: False
unit_test_name: test_replace_names_alice_first_word
output: Assertion failed: Expected: Bob went to the park. Bob met Carol and Alice., but got: Bob went to the park. Bob met Alice and Alice.

Ask llm if the function or unit test needs to be updated.
The llm said the function should be updated.
Ask the llm to fix the function.

Function before update;
import re

def replace_names(input_text: str) -> str:
    def replace_case_insensitive(text: str, old: str, new: str) -> str:
        return re.sub(r'\b' + re.escape(old) + r'\b', new, text, flags=re.IGNORECASE)
    
    if re.search(r'\bDavid\b', input_text, re.IGNORECASE):
        input_text = replace_case_insensitive(input_text, 'David', '🦇')
        return input_text

    first_word = re.match(r'\b\w+\b', input_text, re.IGNORECASE)
    if first_word:
        first_word = first_word.group(0).lower()
        if first_word == 'alice':
            input_text = replace_case_insensitive(input_text, 'Alice', 'TEMP_AL

In [8]:
print(function_code_with_unit_tests.get_function_code())

import re

def replace_names(input_text: str) -> str:
    def replace(match):
        name = match.group(0).capitalize()
        if name == 'Alice':
            return 'Bob'
        elif name == 'Bob':
            return 'Carol'
        elif name == 'Carol':
            return 'Alice'
        return name

    return re.sub(r'(Alice|Bob|Carol)', replace, input_text, flags=re.IGNORECASE)



In [9]:
for unit_test_name in function_code_with_unit_tests.unit_tests:
    print(
        function_code_with_unit_tests.get_unit_test_code(unit_test_name=unit_test_name)
    )

def test_replace_names_basic():
    input_text = 'Alice went to see Bob and Carol.'
    expected_output = 'Bob went to see Carol and Alice.'
    assert replace_names(input_text
        ) == expected_output, f'Expected: {expected_output}, but got: {replace_names(input_text)}'

def test_replace_names_case_insensitive():
    input_text = 'alice went to see bob and carol.'
    expected_output = 'Bob went to see Carol and Alice.'
    assert replace_names(input_text
        ) == expected_output, f'Expected: {expected_output}, but got: {replace_names(input_text)}'

def test_replace_names_mixed_case():
    input_text = 'Alice went to see bob and CAROL.'
    expected_output = 'Bob went to see Carol and Alice.'
    assert replace_names(input_text
        ) == expected_output, f'Expected: {expected_output}, but got: {replace_names(input_text)}'

def test_replace_names_multiple_instances():
    input_text = 'Alice and Bob went to see Carol. Bob and Alice were happy.'
    expected_output = (
      