In [1]:
import textgrad as tg
from textgrad.engine import get_engine
from textgrad.variable import Variable
from textgrad.optimizer import TextualGradientDescent
from textgrad.verifier import TextualVerifierWithTracker
from textgrad.loss import TextLoss

In [2]:
engine = get_engine("gemini-1.5-pro")
tg.set_backward_engine("gemini-1.5-pro", override=True)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
initial_solution = """To solve the equation 3x^2 - 7x + 2 = 0, we use the quadratic formula:
x = (-b ± √(b^2 - 4ac)) / 2a
a = 3, b = -7, c = 2
x = (7 ± √((-7)^2 + 4(3)(2))) / 6
x = (7 ± √73) / 6
The solutions are:
x1 = (7 + √73)
x2 = (7 - √73)"""

solution = Variable(initial_solution,
                       requires_grad=True,
                       role_description="solution to the math question")

loss_system_prompt = Variable("""You will evaluate a solution to a math question. 
Do not attempt to solve it yourself, do not give a solution, only identify errors. Be super concise.""",
                              requires_grad=False,
                              role_description="system prompt")


In [4]:
loss = TextLoss(loss_system_prompt, engine=engine)
loss_value = loss(solution) # Forward method in Loss Function

print("INITIAL LOSS:", loss_value)

INITIAL LOSS: The calculation of  b² - 4ac was incorrect: it should be (-7)² - 4 * 3 * 2 = 49 - 24 = 25, not 49 + 24 = 73.  Also, the division by 6 is missing in the final answers for x1 and x2.



In [5]:
verification_task_prompts = [
    """
    1. When using $instruction to $instance, I got the loss $calculation.
    2. Evaluate the loss $calculation value correctly reflects the performance of the instance.
    3. If the loss $calculation is incorrect or inconsistent, provide the corrected version of the loss $calculation. 
    4. Do NOT calculate the solution/instance, evaluate $calculation ONLY.
    """
]

verifier = TextualVerifierWithTracker(
    verifier_engine=engine, 
    use_cot_generation=False,
    use_step_breakdown=False,
    verification_task_prompts=verification_task_prompts,
    enable_logging=True
)

verified_result = verifier.verify(instance=solution,
                                  instruction=loss_system_prompt,
                                  calculation=loss_value)

print("VERIFIED RESULT:", verified_result.value)

INFO:textgrad:TextualVerifier: Start verification process...
INFO:textgrad:TextualVerifier: Ready to verify 1 calculation steps...
INFO:textgrad:TextualVerifier: Verifying step 1/1...
INFO:textgrad:TextualVerifier: Generating step 1 variant 1/1...
INFO:textgrad:TextualVerifier: Running majority voting for step 1...
VERIFIED RESULT: <VERIFIED>The calculation correctly identifies both errors in the provided solution.  The discriminant is calculated incorrectly (it should be 25, not 73) and the division by 6 is missing from the final answers.
</VERIFIED>


In [6]:
import json

tracker_data = verifier.get_tracker()

with open('tracker_results/loss_verification.json', 'w') as f:
    json.dump(tracker_data, f, indent=4, default=str)

In [7]:
tracker_data

{'setup': {'verifier_engine': <textgrad.engine.gemini.ChatGemini at 0x10e619b10>,
  'use_cot_generation': False,
  'use_step_breakdown': False,
  'verification_task_prompts': ['\n    1. When using $instruction to $instance, I got the loss $calculation.\n    2. Evaluate the loss $calculation value correctly reflects the performance of the instance.\n    3. If the loss $calculation is incorrect or inconsistent, provide the corrected version of the loss $calculation. \n    4. Do NOT calculate the solution/instance, evaluate $calculation ONLY.\n    '],
  'enable_logging': True},
 'variable': {'instance': Variable(value=To solve the equation 3x^2 - 7x + 2 = 0, we use the quadratic formula:
  x = (-b ± √(b^2 - 4ac)) / 2a
  a = 3, b = -7, c = 2
  x = (7 ± √((-7)^2 + 4(3)(2))) / 6
  x = (7 ± √73) / 6
  The solutions are:
  x1 = (7 + √73)
  x2 = (7 - √73), role=solution to the math question, grads=set()),
  'instruction': Variable(value=You will evaluate a solution to a math question. 
  Do not