# Solution Optimization Evaluaton Raw TextGrad

In [48]:
import pandas as pd
import textgrad as tg
from textgrad.engine import get_engine
from textgrad.variable import Variable
from textgrad.optimizer import TextualGradientDescent
from textgrad.verifier import TextualVerifier
from textgrad.loss import TextLoss

## Load Datasets

In [49]:
initial_solution = pd.read_csv("csv/initial_solution.csv")
initial_solution

Unnamed: 0,id,formatted_question,raw_solution,correct_answer,source,subject
0,4,Answer the following multiple choice question....,Maxwell's equations in our universe are:\n\n1....,A,GPQA-Diamond,-
1,2,Answer the following multiple choice question....,Here's how we can determine the number of carb...,B,GPQA-Diamond,-
2,10,Answer the following multiple choice question....,We need to determine which planet has the high...,C,GPQA-Diamond,-
3,1,Answer the following multiple choice question....,The energy-time uncertainty principle states t...,C,GPQA-Diamond,-
4,9,Answer the following multiple choice question....,Let's analyze the symmetry of each molecule:\n...,D,GPQA-Diamond,-
5,6,Answer the following multiple choice question....,"The potential is given by:\nV(r, θ) = (1/2)kr^...",B,GPQA-Diamond,-
6,8,Answer the following multiple choice question....,Here's how we can analyze the results and dete...,B,GPQA-Diamond,-
7,3,Answer the following multiple choice question....,The given state is $|\psi\rangle = 0.5|\uparro...,B,GPQA-Diamond,-
8,7,Answer the following multiple choice question....,The process described is $\gamma\gamma\rightar...,C,GPQA-Diamond,-
9,5,Answer the following multiple choice question....,Here's how we can find the eigenvector:\n\n1. ...,A,GPQA-Diamond,-


In [50]:
# Test size only 50 rows each datasets (Total 150 rows)

df_gpqa = initial_solution[initial_solution['source'] == 'GPQA-Diamond'].head(50)
df_mmlu_ml = initial_solution[initial_solution['source'] == 'MMLU-ML'].head(50)
df_mmlu_cp = initial_solution[initial_solution['source'] == 'MMLU-CP'].head(50)
df_test = pd.concat([df_gpqa, df_mmlu_ml, df_mmlu_cp], ignore_index=True)

df_test

Unnamed: 0,id,formatted_question,raw_solution,correct_answer,source,subject
0,4,Answer the following multiple choice question....,Maxwell's equations in our universe are:\n\n1....,A,GPQA-Diamond,-
1,2,Answer the following multiple choice question....,Here's how we can determine the number of carb...,B,GPQA-Diamond,-
2,10,Answer the following multiple choice question....,We need to determine which planet has the high...,C,GPQA-Diamond,-
3,1,Answer the following multiple choice question....,The energy-time uncertainty principle states t...,C,GPQA-Diamond,-
4,9,Answer the following multiple choice question....,Let's analyze the symmetry of each molecule:\n...,D,GPQA-Diamond,-
5,6,Answer the following multiple choice question....,"The potential is given by:\nV(r, θ) = (1/2)kr^...",B,GPQA-Diamond,-
6,8,Answer the following multiple choice question....,Here's how we can analyze the results and dete...,B,GPQA-Diamond,-
7,3,Answer the following multiple choice question....,The given state is $|\psi\rangle = 0.5|\uparro...,B,GPQA-Diamond,-
8,7,Answer the following multiple choice question....,The process described is $\gamma\gamma\rightar...,C,GPQA-Diamond,-
9,5,Answer the following multiple choice question....,Here's how we can find the eigenvector:\n\n1. ...,A,GPQA-Diamond,-


## Experiment

In [51]:
engine = get_engine("gemini-1.5-pro")
tg.set_backward_engine("gemini-1.5-pro", override=True)

In [52]:
def evaluate_with_raw_textgrad(row_data):
    match = initial_solution[initial_solution["id"] == row_data["id"]]
    if match.empty:
        return None  # or raise error
    formatted_question = match.iloc[0]["formatted_question"]
    result = {
        "id": row_data["id"],
        "raw_solution": row_data["raw_solution"],
        "correct_answer": row_data["correct_answer"],
        "source": row_data["source"],
        "subject": row_data["subject"]
    }
    
    solution = Variable(row_data["raw_solution"],
                    requires_grad=True,
                    role_description=f"Solution to the math question: {formatted_question}")
    loss_system_prompt = Variable("""You will evaluate a solution to a math question. 
                                    Do not attempt to solve it yourself, do not give a solution, 
                                    only identify errors. Be super concise.""",
                                    requires_grad=False,
                                    role_description="system prompt")
    optimizer = TextualGradientDescent([solution])
    loss = TextLoss(loss_system_prompt, engine=engine)
    
    # Iterate 5 times
    for i in range(1, 6):
        optimizer.zero_grad()  # Clean gradients
        loss_result = loss(solution)
        
        loss_result.backward()
        optimizer.step()
        result[f"solution_{i}"] = solution.value

    return result

## Running Evaluation

### Raw TextGrad

In [53]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import time

results = []
start_time = time.time()

with ThreadPoolExecutor(max_workers=32) as executor:
    # Submit all tasks
    futures = [
        executor.submit(evaluate_with_raw_textgrad, row.to_dict()) 
        for _, row in df_test.iterrows()
    ]
    
    # Use tqdm for progress tracking
    for future in tqdm(as_completed(futures), total=len(futures), desc="Processing"):
        result = future.result()
        if result is not None:
            results.append(result)

raw_textgrad = pd.DataFrame(results)

print(f"Completed in {time.time() - start_time:.1f} seconds")
raw_textgrad.to_csv('results/raw_textgrad.csv', index=False)

Processing:   0%|          | 0/10 [00:00<?, ?it/s]

["Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> The energy-time uncertainty principle states that the product of the uncertainty in energy (ΔE) and the uncertainty in time (Δt) is greater than or equal to h-bar/2, where h-bar is the reduced Planck constant (h/2π).  We can approximate this as ΔE * Δt ≈ ħ.\n\nTo distinguish between two energy levels, their energy difference must be greater than the uncertainty in their energies. The lifetimes of the states give us the uncertainty in time.\n\nThe shorter lifetime, 10^-9 sec, corresponds to the larger energy uncertainty.  We can use this lifetime to determine the minimum energy difference needed to resolve the two states.\n\nΔE ≈ ħ / Δt\n\nħ ≈ 6.582 * 10^-16 eV*s\nΔt = 1

Processing:  10%|█         | 1/10 [00:15<02:16, 15.18s/it]

['Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> The given state is $|\\psi\\rangle = 0.5|\\uparrow\\rangle + \\frac{\\sqrt{3}}{2}|\\downarrow\\rangle$.\nThe operators are $\\sigma_z$ and $\\sigma_x$. In the $z$ basis,\n$$ \\sigma_z = \\begin{pmatrix} 1 & 0 \\\\ 0 & -1 \\end{pmatrix}, \\quad \\sigma_x = \\begin{pmatrix} 0 & 1 \\\\ 1 & 0 \\end{pmatrix} $$\nWe want to find the expectation value of $10\\sigma_z + 5\\sigma_x$.\n$$ \\langle\\psi|(10\\sigma_z + 5\\sigma_x)|\\psi\\rangle = 10\\langle\\psi|\\sigma_z|\\psi\\rangle + 5\\langle\\psi|\\sigma_x|\\psi\\rangle $$\nFirst, let\'s calculate $\\langle\\psi|\\sigma_z|\\psi\\rangle$:\n$$ \\sigma_z|\\psi\\rangle = 0.5|\\uparrow\\rangle - \\frac{\\sqrt{3}}{2}|\\downarrow\\r

Processing:  20%|██        | 2/10 [00:47<03:21, 25.21s/it]

["Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> If magnetic monopoles exist, Gauss's law for magnetism is modified to include a magnetic charge density term, changing the divergence of the magnetic field.  Faraday's law, relating to the curl of the electric field, remains unchanged. Ampere-Maxwell's law is modified with the addition of a magnetic current term, which alters the curl of the magnetic field. Therefore, the equations related to the divergence and the curl of the magnetic field are different.\n\nAnswer: B </LM_INPUT>\n\n<LM_OUTPUT> The prompt mentions a magnetic *charge* density, implying the divergence of **B** is affected, not the curl.  The answer incorrectly states the curl of **B** is changed by the m

Processing:  30%|███       | 3/10 [00:50<01:44, 14.94s/it]

['Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> The process described is $\\gamma\\gamma\\rightarrow e^{+}e^{-}$. This process can occur if the center-of-mass energy of the two photons is greater than or equal to the rest mass energy of the electron-positron pair.\n\nThe rest mass energy of an electron (or positron) is $m_e c^2 = 0.511$ MeV.  Therefore, the center-of-mass energy squared, $(E_{cm})^2$, must be greater than or equal to $(2m_e c^2)^2 = (2 \\times 0.511 \\text{ MeV})^2 = (1.022 \\text{ MeV})^2 = 1.044484 \\text{ MeV}^2$.\n\nLet $E$ be the energy of the high-energy gamma ray and $\\epsilon$ be the energy of the CMB photon.  The energy of CMB photons scales with redshift as $\\epsilon = \\epsilon_0 (1+z)$,

Processing:  60%|██████    | 6/10 [01:04<00:25,  6.26s/it]

["Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> Let $\\rho$ be the density, $M$ be the mass, and $R$ be the radius of a planet.  We know that $\\rho = \\frac{M}{V}$, where $V = \\frac{4}{3}\\pi R^3$ is the volume.  Thus, $\\rho = \\frac{3M}{4\\pi R^3}$.\n\na) An Earth-mass and Earth-radius planet has the same density as Earth, which is approximately 5.5 g/cm$^3$.\n\nb) This planet has a density of 5.5 g/cm$^3$, as stated.\n\nc) This planet has the same composition as Earth, so its density is also approximately 5.5 g/cm$^3$.\n\nd) This planet has the same composition as Earth, so its density is also approximately 5.5 g/cm$^3$.\n\nComparing the densities:\n\na) $\\rho_a \\approx 5.5$ g/cm$^3$\nb) $\\rho_b \\approx 5.5$

Processing:  70%|███████   | 7/10 [01:10<00:18,  6.21s/it]

['Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> Here\'s how we find the eigenvector:\n\n1. **Define the operator:** The operator $\\vec{P}$ along $\\vec{n}$ in the x-z plane is given by $\\vec{P} = P_x \\cos\\theta + P_z \\sin\\theta$, where $\\theta$ is the angle between $\\vec{n}$ and the x-axis.\n\n2. **Substitute the given matrices:**\n   $P_x = \\frac{\\hbar}{2} \\begin{pmatrix} 0 & 1 \\\\ 1 & 0 \\end{pmatrix}$\n   $P_z = \\frac{\\hbar}{2} \\begin{pmatrix} 1 & 0 \\\\ 0 & -1 \\end{pmatrix}$\n\n   So, $\\vec{P} = \\frac{\\hbar}{2} \\begin{pmatrix} \\sin\\theta & \\cos\\theta \\\\ \\cos\\theta & -\\sin\\theta \\end{pmatrix}$\n\n3. **Eigenvalue equation:** We seek the eigenvector corresponding to $+\\hbar/2$. Let th

Processing:  80%|████████  | 8/10 [01:26<00:18,  9.37s/it]

['Here is a conversation:\n\n<CONVERSATION><LM_SYSTEM_PROMPT> You will evaluate a solution to a math question. \n                                    Do not attempt to solve it yourself, do not give a solution, \n                                    only identify errors. Be super concise. </LM_SYSTEM_PROMPT>\n\n<LM_INPUT> Here\'s how we find the eigenvector:\n\n1. **Define the operator:** The operator $\\vec{P}$ along $\\vec{n}$ in the x-z plane is given by $\\vec{P} = P_x \\cos\\theta + P_z \\sin\\theta$, where $\\theta$ is the angle between $\\vec{n}$ and the x-axis.\n\n2. **Substitute the given matrices:**\n   $P_x = \\frac{\\hbar}{2} \\begin{pmatrix} 0 & 1 \\\\ 1 & 0 \\end{pmatrix}$\n   $P_z = \\frac{\\hbar}{2} \\begin{pmatrix} 1 & 0 \\\\ 0 & -1 \\end{pmatrix}$\n\n   So, $\\vec{P} = \\frac{\\hbar}{2} \\begin{pmatrix} \\sin\\theta & \\cos\\theta \\\\ \\cos\\theta & -\\sin\\theta \\end{pmatrix}$\n\n3. **Eigenvalue equation:** We seek the eigenvector corresponding to $+\\hbar/2$. Let th

Processing: 100%|██████████| 10/10 [01:50<00:00, 11.05s/it]

Completed in 110.6 seconds



