In [1]:
import dspy
from dspy import InputField, OutputField, Signature
from dspy.functional import TypedPredictor
import pydantic
from dspy import Example
from dspy.evaluate.evaluate import Evaluate
from dspy.teleprompt.random_search import BootstrapFewShotWithRandomSearch


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv

load_dotenv()

True

see https://chatgpt.com/share/cbecf607-78fb-484e-a3fb-788caae8dba9

In [7]:
from pydantic import BaseModel, Field
from typing import Optional

# 🛠️ ClassComponent — Represents a single class extracted from the Python file
class ClassComponent(BaseModel):
    class_name: str = Field(..., description="Name of the extracted Python class.")
    class_code: str = Field(..., description="The full code of the extracted class.")


In [8]:
# 🕵️ ClassExtraction — Extracts class definitions from the Python code
class ClassExtraction(dspy.Signature):
    file_content: str = dspy.InputField(desc="Full content of the Python file as a string.")
    classes: list[ClassComponent] = dspy.OutputField(desc="Extracted classes with names and code snippets.")

# 🔍 ClassVerification — Verifies the syntax of each extracted class
class ClassVerification(dspy.Signature):
    class_code: str = dspy.InputField(desc="Code of a single Python class.")
    is_valid: bool = dspy.OutputField(desc="Boolean indicating if the class is valid Python syntax.")
    error_message: Optional[str] = dspy.OutputField(desc="Error message if the class is invalid.")
    
# 🛠️ ClassReformatting — Reformats verified classes into dspy.Modules
class ClassReformatting(dspy.Signature):
    class_name: str = dspy.InputField(desc="Name of the Python class.")
    class_code: str = dspy.InputField(desc="Code of the Python class.")
    dspy_module_code: str = dspy.OutputField(desc="Reformatted code of the class as a dspy.Module.")


In [24]:
from dspy.predict.avatar import Avatar


# 📜 ClassExtractor — Extracts classes from the Python file content
class ClassExtractor(dspy.Module):
    def __init__(self):
        self.extract_classes = Avatar(ClassExtraction, tools=[])

    def forward(self, file_content: str):
        result = self.extract_classes(file_content=file_content)
        # 🔍 Suggest: Make sure we have extracted some classes
        dspy.Suggest(len(result.classes) > 0, "No classes found in the file content. Check your extraction logic.")
        return result

# ✅ ClassVerifier — Verifies each extracted class for valid Python syntax
class ClassVerifier(dspy.Module):
    def __init__(self):
        self.verify_class = dspy.ChainOfThought(ClassVerification)

    def forward(self, class_code: str):
        result = self.verify_class(class_code=class_code)
        # ❌ Suggest: Handle invalid classes with detailed feedback
        if not result.is_valid:
            dspy.Suggest(False, f"Class verification failed: {result.error_message}. Check class syntax.")
        return result

# 🛠️ ClassFormatter — Reformats verified classes into dspy.Module format
class ClassFormatter(dspy.Module):
    def __init__(self):
        self.reformat_class = dspy.ChainOfThought(ClassReformatting)

    def forward(self, class_name: str, class_code: str):
        result = self.reformat_class(class_name=class_name, class_code=class_code)
        # 📝 Suggest: Ensure the reformatting meets dspy.Module standards
        dspy.Suggest("dspy.Module" in result.dspy_module_code, "Reformatted class should be a valid dspy.Module.")
        return result

# 🎯 ClassProcessor — Main module that orchestrates extraction, verification, and reformatting
class ClassProcessor(dspy.Module):
    def __init__(self):
        self.extractor = ClassExtractor()
        self.verifier = ClassVerifier()
        self.formatter = ClassFormatter()

    def forward(self, file_content: str):
        # 📜 Step 1: Extract classes from the file
        extraction_result = self.extractor.forward(file_content=file_content)
        class_components = extraction_result.classes

        verified_classes = []
        for class_component in class_components:
            # ✅ Step 2: Verify the class syntax
            verification_result = self.verifier.forward(class_code=class_component.class_code)
            if verification_result.is_valid:
                # 🛠️ Step 3: Reformat into dspy.Module if verification passed
                reformat_result = self.formatter.forward(
                    class_name=class_component.class_name,
                    class_code=class_component.class_code
                )
                verified_classes.append(reformat_result.dspy_module_code)
            else:
                # Handle invalid classes — optionally log or discard
                print(f"Skipping invalid class: {class_component.class_name} - {verification_result.error_message}")

        # 🛑 Assert: Ensure we have at least one valid reformatted class
        dspy.Assert(len(verified_classes) > 0, "No valid dspy.Modules generated from the file content.")
        return dspy.Prediction(verified_modules=verified_classes)


In [10]:
gpt4omini = dspy.OpenAI(model='gpt-4o-mini', max_tokens=250)

ollama_local = dspy.OllamaLocal(model="llama3.1:8b")

dspy.settings.configure(lm=gpt4omini)

In [22]:
DSPY_SAMPLE_CODE_DIRTY = "\n".join(open('./DSPY_SAMPLE_CODE_DIRTY.txt').readlines())
len(DSPY_SAMPLE_CODE_DIRTY)


50392

In [25]:
class_extractor = ClassExtractor()
class_extractor(file_content=DSPY_SAMPLE_CODE_DIRTY)

ValueError: ('Too many retries trying to get the correct output format. Try simplifying the requirements.', {'classes': 'ValueError("Don\'t write anything after the final json ```")'})