In [64]:
from langchain_openai import ChatOpenAI
import re

from langchain_experimental.tools import PythonREPLTool

from extract import (
    extract_function_names,
    extract_function_by_name,
    extract_imports,
    is_valid_python,
)

python_repl = PythonREPLTool()

llm = ChatOpenAI(temperature=0, model="gpt-4o")

In [67]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages([
    ("system", "You will be asked to write a python function. Use best practice python code that uses type hints and doc strings. \
     Your response should be valid python code only. No extra explanations. \
     You should wrote robust and thorough unit tests (pytest) to test the function you write. \
     Each unit test should be self contained, concise, and only test one thing. \
     Do not write code to run the unit tests."),
    ("user", "{input}")
])

chain = prompt | llm 
llm_message_regex_function_and_unit_tests = chain.invoke({"input": "Write a python function that uses regex to extract emails (including instances of 'name at email dot com') from a text."})

In [69]:
def extract_python_code(text):
    pattern = r'```python\n(.*?)```'
    code_blocks = re.findall(pattern, text, re.DOTALL)
    return '\n\n'.join(code_blocks)

code_from_llm_as_str = extract_python_code(llm_message_regex_function_and_unit_tests.content)
print(code_from_llm_as_str)

import re
from typing import List

def extract_emails(text: str) -> List[str]:
    """
    Extracts emails from a given text. Supports standard email format and 'name at email dot com' format.

    Args:
        text (str): The input text containing emails.

    Returns:
        List[str]: A list of extracted email addresses.
    """
    # Regex for standard email format
    standard_email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
    
    # Regex for 'name at email dot com' format
    at_dot_email_regex = r'([a-zA-Z0-9._%+-]+)\s+at\s+([a-zA-Z0-9.-]+)\s+dot\s+([a-zA-Z]{2,})'
    
    # Find all standard emails
    standard_emails = re.findall(standard_email_regex, text)
    
    # Find all 'name at email dot com' emails and convert them to standard format
    at_dot_emails = re.findall(at_dot_email_regex, text)
    converted_emails = [f"{user}@{domain}.{tld}" for user, domain, tld in at_dot_emails]
    
    return standard_emails + converted_emails


import pytest

def 

In [70]:
function_names = extract_function_names(code_string=code_from_llm_as_str)
function_names

['extract_emails',
 'test_extract_standard_email',
 'test_extract_at_dot_email',
 'test_extract_multiple_emails',
 'test_extract_no_emails',
 'test_extract_mixed_emails']

In [71]:
imports = extract_imports(code_from_llm_as_str)
imports

['import re', 'from typing import List', 'import pytest']

In [124]:
imports_as_one_string = "\n".join(imports)
function_to_test = function_names[0]
test_functions = function_names[1:]

test_function_pass_status_and_message = []

for f in test_functions:
    # Example code to execute
    code_to_run = f"""
{imports_as_one_string}

{extract_function_by_name(code_string=code_from_llm_as_str, function_name=function_to_test)}

{extract_function_by_name(code_from_llm_as_str, function_name=f)}

try:
    {f}()
    message = "test passed"
except AssertionError as e:
    message = "Assertion failed: " + str(e)
except Exception as e:
    message = "Error: " + str(e)

print(message)
"""

    # Execute the code
    # print(code_to_run)
    output = python_repl.run(code_to_run)
    test_function_pass_status_and_message.append(f + "(): " + output)

In [125]:
test_function_pass_status_and_message

['test_extract_standard_email(): test passed\n',
 'test_extract_at_dot_email(): test passed\n',
 'test_extract_multiple_emails(): test passed\n',
 'test_extract_no_emails(): test passed\n',
 'test_extract_mixed_emails(): test passed\n']