In [1]:
import textgrad as tg
from textgrad.engine import get_engine
from textgrad.variable import Variable
from textgrad.optimizer import TextualGradientDescent
from textgrad.verifier import TextualVerifierWithTracker
from textgrad.loss import TextLoss

In [2]:
engine = get_engine("gemini-1.5-pro")
tg.set_backward_engine("gemini-1.5-pro", override=True)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
initial_solution = """To solve the equation 3x^2 - 7x + 2 = 0, we use the quadratic formula:
x = (-b ± √(b^2 - 4ac)) / 2a
a = 3, b = -7, c = 2
x = (7 ± √((-7)^2 + 4(3)(2))) / 6
x = (7 ± √73) / 6
The solutions are:
x1 = (7 + √73)
x2 = (7 - √73)"""

solution = Variable(initial_solution,
                       requires_grad=True,
                       role_description="solution to the math question")

loss_system_prompt = Variable("""You will evaluate a solution to a math question. 
Do not attempt to solve it yourself, do not give a solution, only identify errors. Be super concise.""",
                              requires_grad=False,
                              role_description="system prompt")


In [4]:
optimizer = TextualGradientDescent([solution])

In [5]:
loss = TextLoss(loss_system_prompt, engine=engine)
loss_value = loss(solution) # Forward method in Loss Function

print("INITIAL LOSS:", loss_value)

INITIAL LOSS: The calculation of  b² - 4ac was incorrect: it should be (-7)² - 4 * 3 * 2 = 49 - 24 = 25, not 49 + 24 = 73.  Also, the division by 6 is missing in the final answers for x1 and x2.



In [6]:
loss_verification_task_prompts = [
    """
    1. When using $instruction to $instance, I got the loss $calculation.
    2. Evaluate the loss $calculation value correctly reflects the performance of the instance.
    3. If the loss $calculation is incorrect or inconsistent, provide the corrected version of the loss $calculation. 
    4. Do NOT calculate the solution/instance, evaluate $calculation ONLY.
    """
]

loss_verifier = TextualVerifierWithTracker(
    verifier_engine=engine, 
    use_cot_generation=False,
    use_step_breakdown=False,
    verification_task_prompts=loss_verification_task_prompts,
    enable_logging=True
)

loss_verified_result = loss_verifier.verify(instance=solution,
                                  instruction=loss_system_prompt,
                                  calculation=loss_value)

print("VERIFIED RESULT:", loss_verified_result.value)

INFO:textgrad:TextualVerifier: Start verification process...
INFO:textgrad:TextualVerifier: Ready to verify 1 calculation steps...
INFO:textgrad:TextualVerifier: Verifying step 1/1...
INFO:textgrad:TextualVerifier: Generating step 1 variant 1/1...
INFO:textgrad:TextualVerifier: Running majority voting for step 1...
VERIFIED RESULT: <VERIFIED>The calculation correctly identifies both errors in the provided solution.  The discriminant is calculated incorrectly (it should be 25, not 73) and the division by 6 is missing from the final answers.
</VERIFIED>


In [7]:
loss_value.set_value(loss_verified_result.value) 

In [8]:
# Optimize
loss_value.backward()

optimizer.step()
print("Optimized Solution:", solution.value)

['Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \nDo not attempt to solve it yourself, do not give a solution, only identify errors. Be super concise. </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> To solve the equation 3x^2 - 7x + 2 = 0, we use the quadratic formula:\nx = (-b ± √(b^2 - 4ac)) / 2a\na = 3, b = -7, c = 2\nx = (7 ± √((-7)^2 + 4(3)(2))) / 6\nx = (7 ± √73) / 6\nThe solutions are:\nx1 = (7 + √73)\nx2 = (7 - √73) </LM_INPUT>\n\n<LM_OUTPUT> <VERIFIED>The calculation correctly identifies both errors in the provided solution.  The discriminant is calculated incorrectly (it should be 25, not 73) and the division by 6 is missing from the final answers.\n</VERIFIED> </LM_OUTPUT>\n\n</CONVERSATION>\n\nThis conversation is potentially part of a larger system. The output is used as response from the language model\n\nHere is the feedback we got for solution to the math question in the conversation:\n\n<FEEDBACK>The language model evalu

In [9]:
optimizer_verification_task_prompts = [
    # Perspective 1: Rule-based verifier (objective, procedural)
    """
    Evaluate the calculation step strictly based on mathematical correctness and procedural rules. 
    If the step violates any algebraic or logical principle, replace it with the corrected version of that step only. 
    Do not proceed to solve the full problem.
    """,

    # Perspective 2: Teaching assistant (didactic, pedagogical)
    """
    Review the calculation step from the perspective of a teaching assistant helping a student learn. 
    If there's an error or suboptimal explanation, provide a corrected version that would best aid the student's understanding. 
    Focus only on the step in question, without solving the full problem.
    """
]

optimizer_verifier = TextualVerifierWithTracker(
    verifier_engine=engine, 
    use_cot_generation=True,
    use_step_breakdown=True,
    verification_task_prompts=optimizer_verification_task_prompts,
    enable_logging=True
)

instance = Variable(f"initial_solution: {initial_solution}\nloss_value: {loss_value}",
                    requires_grad=False,
                    role_description="instance")

optimizer_instruction = Variable("""You will optimize $initial_solution based on $loss_value. Be super concise.""",
                                requires_grad=False,
                                role_description="optimizer prompt")

verified_result = optimizer_verifier.verify(instance=instance,
                                  instruction=optimizer_instruction,
                                  calculation=solution)

print("VERIFIED RESULT:", verified_result.value)

INFO:textgrad:TextualVerifier: Start verification process...
INFO:textgrad:TextualVerifier: Ready to verify 8 calculation steps...
INFO:textgrad:TextualVerifier: Verifying step 1/8...
INFO:textgrad:TextualVerifier: Generating step 1 variant 1/2...
INFO:textgrad:TextualVerifier: Generating step 1 variant 2/2...
INFO:textgrad:TextualVerifier: Running majority voting for step 1...
INFO:textgrad:TextualVerifier: Verifying step 2/8...
INFO:textgrad:TextualVerifier: Generating step 2 variant 1/2...
INFO:textgrad:TextualVerifier: Generating step 2 variant 2/2...
INFO:textgrad:TextualVerifier: Running majority voting for step 2...
INFO:textgrad:TextualVerifier: Verifying step 3/8...
INFO:textgrad:TextualVerifier: Generating step 3 variant 1/2...
INFO:textgrad:TextualVerifier: Generating step 3 variant 2/2...
INFO:textgrad:TextualVerifier: Running majority voting for step 3...
INFO:textgrad:TextualVerifier: Verifying step 4/8...
INFO:textgrad:TextualVerifier: Generating step 4 variant 1/2...
IN

In [10]:
import json

loss_tracker_data = loss_verifier.get_tracker()
optimizer_tracker_data = optimizer_verifier.get_tracker()

tracker_data = {
    "loss_tracker": loss_tracker_data,
    "optimizer_tracker": optimizer_tracker_data,
}

with open('tracker_results/loss_optimizer_verification.json', 'w') as f:
    json.dump(tracker_data, f, indent=4, default=str)

In [11]:
tracker_data

{'loss_tracker': {'setup': {'verifier_engine': <textgrad.engine.gemini.ChatGemini at 0x11dfb9c10>,
   'use_cot_generation': False,
   'use_step_breakdown': False,
   'verification_task_prompts': ['\n    1. When using $instruction to $instance, I got the loss $calculation.\n    2. Evaluate the loss $calculation value correctly reflects the performance of the instance.\n    3. If the loss $calculation is incorrect or inconsistent, provide the corrected version of the loss $calculation. \n    4. Do NOT calculate the solution/instance, evaluate $calculation ONLY.\n    '],
   'enable_logging': True},
  'variable': {'instance': Variable(value=To solve the equation 3x^2 - 7x + 2 = 0, we use the quadratic formula:
   x = (-b ± √(b^2 - 4ac)) / 2a
   Where a = 3, b = -7, and c = 2.
   
   So, x = (7 ± √((-7)^2 - 4 * 3 * 2)) / (2 * 3)
   x = (7 ± √(49 - 24)) / 6
   x = (7 ± √25) / 6
   x = (7 ± 5) / 6
   
   Therefore, the solutions are:
   x1 = (7 + 5) / 6 = 12/6 = 2
   x2 = (7 - 5) / 6 = 2/6 = 