# Solution Optimization Evaluaton Raw TextGrad

In [2]:
import pandas as pd
import textgrad as tg
from textgrad.engine import get_engine
from textgrad.variable import Variable
from textgrad.optimizer import TextualGradientDescent
from textgrad.verifier import TextualVerifier
from textgrad.loss import TextLoss

## Load Datasets

In [3]:
initial_solution = pd.read_csv("csv/initial_solution.csv")
initial_solution

Unnamed: 0,id,formatted_question,raw_solution,correct_answer,source,subject
0,1,\n Two quantum states with energies E1 and ...,<Solution>\nThe energy-time uncertainty princi...,B,GPQA-Diamond,-
1,2,\n trans-cinnamaldehyde was treated with me...,<Solution>\n1. **Reaction 1:** trans-cinnamald...,A,GPQA-Diamond,-
2,8,\n You perform a high-throughput experiment...,<Solution>\nHere's how we can analyze the resu...,B,GPQA-Diamond,-
3,3,\n A spin-half particle is in a linear supe...,<Solution>\nLet the given state be $|\psi\rang...,D,GPQA-Diamond,-
4,6,\n A quantum mechanical particle of mass m ...,"<Solution>\nThe potential is given by:\nV(r, θ...",D,GPQA-Diamond,-
...,...,...,...,...,...,...
407,404,"\n Under certain conditions, a beam of elec...",<Solution>\nThe de Broglie wavelength of the e...,C,MMLU-CP,college_physics
408,384,\n Two spaceships approach Earth with equal...,<Solution>\nLet $v$ be the speed of each space...,C,MMLU-CP,college_physics
409,411,\n Positronium is an atom formed by an elec...,<Solution>\nThe energy levels of positronium a...,C,MMLU-CP,college_physics
410,396,\n A photon strikes an electron of mass m t...,<Solution>\nLet $E_\gamma$ be the energy of th...,C,MMLU-CP,college_physics


## Experiment

In [1]:
import os
import sys

engine = get_engine("gemini-1.5-pro")
tg.set_backward_engine("gemini-1.5-pro")

NameError: name 'get_engine' is not defined

In [5]:
def evaluate_with_raw_textgrad(row_data):
    sys.stdout = open(os.devnull, 'w')  # Suppress stdout
    sys.stderr = open(os.devnull, 'w')  # Suppress stderr
    result = {
        "id": row_data["id"],
        "raw_solution": row_data["raw_solution"],
        "correct_answer": row_data["correct_answer"],
        "source": row_data["source"],
        "subject": row_data["subject"]
    }
    
    solution = Variable(row_data["raw_solution"],
                    requires_grad=True,
                    role_description="Solution to the math question")
    loss_system_prompt = Variable("""You will evaluate a solution to a math question. 
                                    Do not attempt to solve it yourself, do not give a solution, 
                                    only identify errors. Be super concise.""",
                                    requires_grad=False,
                                    role_description="system prompt")
    optimizer = TextualGradientDescent([solution])
    loss = TextLoss(loss_system_prompt, engine=engine)
    
    # Iterate 5 times
    for i in range(1, 6):
        optimizer.zero_grad()  # Clean gradients
        loss_result = loss(solution)
        
        loss_result.backward()
        optimizer.step()
        result[f"solution_{i}"] = solution.value
    
    return result

In [6]:
def evaluate_with_textual_verified_textgrad(row_data):
    sys.stdout = open(os.devnull, 'w')  # Suppress stdout
    sys.stderr = open(os.devnull, 'w')  # Suppress stderr
    result = {
        "id": row_data["id"],
        "raw_solution": row_data["raw_solution"],
        "correct_answer": row_data["correct_answer"],
        "source": row_data["source"],
        "subject": row_data["subject"]
    }
    
    solution = Variable(row_data["raw_solution"],
                    requires_grad=True,
                    role_description="Solution to the math question")
    loss_system_prompt = Variable("""You will evaluate a solution to a math question. 
                                    Do not attempt to solve it yourself, do not give a solution, 
                                    only identify errors. Be super concise.""",
                                    requires_grad=False,
                                    role_description="system prompt")
    optimizer = TextualGradientDescent([solution])
    loss = TextLoss(loss_system_prompt, engine=engine)
    verifier = TextualVerifier(verifier_engine=engine, step_eval_iterations=3, logger=False)
    
    # Iterate 5 times
    for i in range(1, 6):
        optimizer.zero_grad()  # Clean gradients
        loss_result = loss(solution)
        
        verified_result = verifier.verify(instance=solution,
                                         prompt=loss_system_prompt,
                                         calculation=loss_result)
        loss_result.set_value(verified_result.value)
        
        loss_result.backward()
        optimizer.step()
        result[f"solution_{i}"] = solution.value
    
    return result

## Running Evaluation

### Raw TextGrad

In [7]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import time

results = []
start_time = time.time()

with ThreadPoolExecutor(max_workers=8) as executor:
    # Submit all tasks
    futures = [
        executor.submit(evaluate_with_raw_textgrad, row.to_dict()) 
        for _, row in initial_solution.iterrows()
    ]
    
    # Use tqdm for progress tracking
    for future in tqdm(as_completed(futures), total=len(futures), desc="Processing"):
        result = future.result()
        if result is not None:
            results.append(result)

raw_textgrad = pd.DataFrame(results)

print(f"Completed in {time.time() - start_time:.1f} seconds")

['Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> <Solution>\nThe Michael reaction involves the addition of a nucleophile to an α,β-unsaturated carbonyl compound.\n\n**Reaction A:**\n* **Nucleophile:** methyl 2-oxocyclohexane-1-carboxylate (specifically, the enolate formed by deprotonation at the alpha position to the ester group)\n* **Michael acceptor:** 2,4-dimethyl-1-(vinylsulfinyl)benzene\nThe nucleophile attacks the β-carbon of the Michael acceptor. The resulting product is methyl 1-(2-((2,4-dimethylphenyl)sulfinyl)ethyl)-2-oxocyclohexane-1-carboxylate.\n\n**Reaction B:**\n* **Nucleophile:** ethyl 2-ethylbutanoate (specifically, the enolate formed by deprotonation at the alpha position)\n* **Michael acceptor:** me

In [None]:
raw_textgrad.to_csv('csv/raw_textgrad.csv', index=False)

### TextGrad with TextualVerifier