# Solution Optimization Evaluaton Raw TextGrad

In [6]:
import pandas as pd
import textgrad as tg
from textgrad.engine import get_engine
from textgrad.variable import Variable
from textgrad.optimizer import TextualGradientDescent
from textgrad.verifier import TextualVerifier
from textgrad.loss import TextLoss

## Load Datasets

In [7]:
initial_solution = pd.read_csv("csv/initial_solution.csv")
initial_solution

Unnamed: 0,id,formatted_question,raw_solution,correct_answer,source,subject
0,1,\n Two quantum states with energies E1 and ...,<Solution>\nThe energy-time uncertainty princi...,B,GPQA-Diamond,-
1,2,\n trans-cinnamaldehyde was treated with me...,<Solution>\n1. **Reaction 1:** trans-cinnamald...,A,GPQA-Diamond,-
2,8,\n You perform a high-throughput experiment...,<Solution>\nHere's how we can analyze the resu...,B,GPQA-Diamond,-
3,3,\n A spin-half particle is in a linear supe...,<Solution>\nLet the given state be $|\psi\rang...,D,GPQA-Diamond,-
4,6,\n A quantum mechanical particle of mass m ...,"<Solution>\nThe potential is given by:\nV(r, θ...",D,GPQA-Diamond,-
...,...,...,...,...,...,...
407,404,"\n Under certain conditions, a beam of elec...",<Solution>\nThe de Broglie wavelength of the e...,C,MMLU-CP,college_physics
408,384,\n Two spaceships approach Earth with equal...,<Solution>\nLet $v$ be the speed of each space...,C,MMLU-CP,college_physics
409,411,\n Positronium is an atom formed by an elec...,<Solution>\nThe energy levels of positronium a...,C,MMLU-CP,college_physics
410,396,\n A photon strikes an electron of mass m t...,<Solution>\nLet $E_\gamma$ be the energy of th...,C,MMLU-CP,college_physics


In [8]:
# Test size only 50 rows each datasets (Total 150 rows)

df_gpqa = initial_solution[initial_solution['source'] == 'GPQA-Diamond'].head(50)
df_mmlu_ml = initial_solution[initial_solution['source'] == 'MMLU-ML'].head(50)
df_mmlu_cp = initial_solution[initial_solution['source'] == 'MMLU-CP'].head(50)
df_test = pd.concat([df_gpqa, df_mmlu_ml, df_mmlu_cp], ignore_index=True)

df_test

Unnamed: 0,id,formatted_question,raw_solution,correct_answer,source,subject
0,1,\n Two quantum states with energies E1 and ...,<Solution>\nThe energy-time uncertainty princi...,B,GPQA-Diamond,-
1,2,\n trans-cinnamaldehyde was treated with me...,<Solution>\n1. **Reaction 1:** trans-cinnamald...,A,GPQA-Diamond,-
2,8,\n You perform a high-throughput experiment...,<Solution>\nHere's how we can analyze the resu...,B,GPQA-Diamond,-
3,3,\n A spin-half particle is in a linear supe...,<Solution>\nLet the given state be $|\psi\rang...,D,GPQA-Diamond,-
4,6,\n A quantum mechanical particle of mass m ...,"<Solution>\nThe potential is given by:\nV(r, θ...",D,GPQA-Diamond,-
...,...,...,...,...,...,...
145,361,\n A grating spectrometer can just barely r...,<Solution>\nThe resolving power R of a grating...,C,MMLU-CP,college_physics
146,339,\n A proton moves in the +z-direction after...,<Solution>\nHere's how to solve this problem:\...,B,MMLU-CP,college_physics
147,364,"\n A net force F_A acts on object A, and a ...",<Solution>\nLet m_A be the mass of object A an...,B,MMLU-CP,college_physics
148,348,"\n The negative muon, mu^-, has properties ...",<Solution>\nA muon (μ) is a fundamental subato...,B,MMLU-CP,college_physics


## Experiment

In [9]:
engine = get_engine("gemini-1.5-pro")
tg.set_backward_engine("gemini-1.5-pro", override=True)

  from .autonotebook import tqdm as notebook_tqdm


In [22]:
def evaluate_with_raw_textgrad(row_data):
    match = initial_solution[initial_solution["id"] == row_data["id"]]
    if match.empty:
        return None  # or raise error
    formatted_question = match.iloc[0]["formatted_question"]
    result = {
        "id": row_data["id"],
        "raw_solution": row_data["raw_solution"],
        "correct_answer": row_data["correct_answer"],
        "source": row_data["source"],
        "subject": row_data["subject"]
    }
    
    solution = Variable(row_data["raw_solution"],
                    requires_grad=True,
                    role_description=f"Solution to the math question: {formatted_question}")
    loss_system_prompt = Variable("""You will evaluate a solution to a math question. 
                                    Do not attempt to solve it yourself, do not give a solution, 
                                    only identify errors. Be super concise. The result still on format
                                    <Solution></Solution><Answer>$LETTER</Answer>""",
                                    requires_grad=False,
                                    role_description="system prompt")
    optimizer = TextualGradientDescent([solution])
    loss = TextLoss(loss_system_prompt, engine=engine)
    
    # Iterate 5 times
    for i in range(1, 6):
        optimizer.zero_grad()  # Clean gradients
        loss_result = loss(solution)
        
        loss_result.backward()
        optimizer.step()
        result[f"solution_{i}"] = solution.value

    return result

## Running Evaluation

### Raw TextGrad

In [23]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import time

results = []
start_time = time.time()

with ThreadPoolExecutor(max_workers=32) as executor:
    # Submit all tasks
    futures = [
        executor.submit(evaluate_with_raw_textgrad, row.to_dict()) 
        for _, row in df_test[:10].iterrows()
    ]
    
    # Use tqdm for progress tracking
    for future in tqdm(as_completed(futures), total=len(futures), desc="Processing"):
        result = future.result()
        if result is not None:
            results.append(result)

raw_textgrad = pd.DataFrame(results)

print(f"Completed in {time.time() - start_time:.1f} seconds")
raw_textgrad.to_csv('csv/raw_textgrad.csv', index=False)

Processing:   0%|          | 0/10 [00:00<?, ?it/s]

["Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. The result still on format\n                                    <Solution></Solution><Answer>$LETTER</Answer> </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> <Solution>\nIn our universe, Maxwell's equations for magnetism are:\n\n1. ∇ ⋅ B = 0 (Divergence of B is zero - no magnetic monopoles)\n2. ∇ × B = μ₀J + μ₀ε₀ ∂E/∂t (Ampere-Maxwell's Law - relates the curl of B to current density and changing electric field)\n\nIn a universe with magnetic monopoles, magnetic field lines can start and end at these monopoles, similar to how electric field lines start and end at charges. This means the divergence of the magnetic field would no longer be zero.  Instead, it would be proportional to the magnetic charge density (ρₘ), ana

Processing:  10%|█         | 1/10 [00:15<02:22, 15.83s/it]

["Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. The result still on format\n                                    <Solution></Solution><Answer>$LETTER</Answer> </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> <Solution>\nIn our universe, Maxwell's equations for magnetism are:\n\n1. ∇ ⋅ B = 0 (Divergence of B is zero - no magnetic monopoles)\n2. ∇ × B = μ₀J + μ₀ε₀ ∂E/∂t (Ampere-Maxwell's Law - relates the curl of B to current density and changing electric field)\n\nIn a universe with magnetic monopoles, magnetic field lines can start and end at these monopoles, similar to how electric field lines start and end at charges. This means the divergence of the magnetic field would no longer be zero.  Instead, it would be proportional to the magnetic charge density (ρₘ), ana

Processing:  30%|███       | 3/10 [01:02<02:08, 18.41s/it]

['Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. The result still on format\n                                    <Solution></Solution><Answer>$LETTER</Answer> </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> <Solution>\nThe operator $\\vec{P}$ can be written as $P_x \\hat{x} + P_y \\hat{y} + P_z \\hat{z}$.\nGiven that $\\vec{n}$ lies in the x-z plane, we can write $\\vec{n} = \\sin\\theta \\hat{x} + \\cos\\theta \\hat{z}$, where $\\theta$ is the angle $\\vec{n}$ makes with the z-axis.\nThe operator along $\\vec{n}$ is given by\n$\\vec{P}\\cdot\\vec{n} = P_x \\sin\\theta + P_z \\cos\\theta$.\nSubstituting the given matrices for $P_x$ and $P_z$, we get\n$\\vec{P}\\cdot\\vec{n} = \\frac{\\hbar}{2} \\begin{pmatrix} 0 & \\sin\\theta \\\\ \\sin\\theta & 0 \\end{pmatrix} +

Processing:  40%|████      | 4/10 [01:03<01:09, 11.53s/it]

['Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. The result still on format\n                                    <Solution></Solution><Answer>$LETTER</Answer> </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> <Solution>\nHere\'s how we can analyze the results and determine the relationship between the genes:\n\n1. **G2\'s Importance:** The g2 mutant (and any double mutant including g2) shows 0% resistance. This indicates that G2 is essential for resistance to anthracnose. Since the problem states one gene is a transcription factor acting upstream of others, G2 is likely that transcription factor.\n\n2. **G1 and G3\'s Roles:** Individually, g1 and g3 mutants show some resistance (75% and 50%, respectively). This suggests they contribute to resistance but aren\'t as cr

Processing:  60%|██████    | 6/10 [01:06<00:23,  5.98s/it]

["Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. The result still on format\n                                    <Solution></Solution><Answer>$LETTER</Answer> </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> <Solution>\nThe probability density function is given by the square of the magnitude of the wave function. Let $\\psi(x)$ be the wave function. Then the probability density $P(x)$ is given by $|\\psi(x)|^2$.\n\nIn our case, $\\psi(x) = \\frac{a}{\\sqrt{1+x}} - 0.5i$.\nSo,\n$$ P(x) = |\\psi(x)|^2 = \\left(\\frac{a}{\\sqrt{1+x}} - 0.5i\\right)\\left(\\frac{a}{\\sqrt{1+x}} + 0.5i\\right) = \\frac{a^2}{1+x} + 0.25 $$\nSince the particles are only found between x=1 and x=3, the integral of the probability density function over this interval must be equal to 1.  Since

Processing:  70%|███████   | 7/10 [01:24<00:29,  9.95s/it]

["Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. The result still on format\n                                    <Solution></Solution><Answer>$LETTER</Answer> </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> <Solution>\nThe eigenvalue equation is given by Ay(φ) = a(φ), where Ay = cS.\nThe matrix S is given by:\n```\nS = [[0, -i],\n     [i,  0]]\n```\nwhere i is the imaginary unit.\nWe have c = h/4π. Thus,\n```\nAy = (h/4π) * [[0, -i],\n               [i,  0]]\n```\nTo find the eigenvalues, we need to solve the characteristic equation det(Ay - aI) = 0, where I is the identity matrix.\n```\ndet(Ay - aI) = det([[(-a), (-ih/4π)],\n                   [(ih/4π), (-a)]])\n             = a^2 - (ih/4π)(-ih/4π)\n             = a^2 - (-h^2/16π^2)\n             = a^2 + (h^2/16π^

Processing:  90%|█████████ | 9/10 [01:33<00:07,  7.17s/it]

['Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. The result still on format\n                                    <Solution></Solution><Answer>$LETTER</Answer> </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> <Solution>\nThe probability density function is given by the square of the magnitude of the wave function. Let $\\psi(x)$ be the wave function. Then the probability density $P(x)$ is given by $|\\psi(x)|^2$.\n\nThe wave function is given by $\\psi(x) = \\frac{a}{\\sqrt{1+x}} - 0.5i$.\nThe magnitude of $\\psi(x)$ is $|\\psi(x)| = \\sqrt{\\left(\\frac{a}{\\sqrt{1+x}}\\right)^2 + (-0.5)^2}$.\nSo,\n$$ P(x) = |\\psi(x)|^2 = \\left(\\frac{a}{\\sqrt{1+x}}\\right)^2 + (-0.5)^2 = \\frac{a^2}{1+x} + 0.25 $$\nSince the particles are only found between x=1 and x=3, the inte

Processing: 100%|██████████| 10/10 [01:51<00:00, 11.20s/it]

["Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. The result still on format\n                                    <Solution></Solution><Answer>$LETTER</Answer> </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> <Solution>\nThe probability density function is given by the square of the magnitude of the wave function. Let $\\psi(x)$ be the wave function. Then the probability density $P(x)$ is given by $|\\psi(x)|^2$.\n\nThe wave function is given by $\\psi(x) = \\frac{a}{\\sqrt{1+x}} - 0.5i$.\nThe magnitude of $\\psi(x)$ is $|\\psi(x)| = \\sqrt{\\left(\\frac{a}{\\sqrt{1+x}}\\right)^2 + (-0.5)^2} = \\sqrt{\\frac{a^2}{1+x} + 0.25}$.\nSo,\n$$ P(x) = |\\psi(x)|^2 = \\frac{a^2}{1+x} + 0.25 $$\nSince the particles are only found between x=1 and x=3, the integral of the probabi


