# Solution Optimization Analysis

In [72]:
import re
import pandas as pd
from collections import Counter

## Load Datasets

In [73]:
raw_textgrad = pd.read_csv("results/raw_textgrad.csv")
raw_textgrad

Unnamed: 0,id,raw_solution,correct_answer,source,subject,solution_1,solution_2,solution_3,solution_4,solution_5
0,2,Here's how we can determine the number of carb...,A,GPQA-Diamond,-,Here's how we can determine the number of carb...,Here's how we can determine the number of carb...,Here's how we can determine the number of carb...,Here's how we can determine the number of carb...,Here's how we can determine the number of carb...
1,1,The energy-time uncertainty principle states t...,D,GPQA-Diamond,-,The energy-time uncertainty principle states t...,The energy-time uncertainty principle states t...,The energy-time uncertainty principle states t...,The energy-time uncertainty principle states t...,The energy-time uncertainty principle states t...


## Functions

In [74]:
def extract_answer(text):
    # Now extract correctly
    match = re.search(r"(?i)Answer\s*:\s*([A-D])", text, re.IGNORECASE)
    if match:
        return match.group(1).upper()
    return None


In [75]:
def process_answer(row_data):
    data = {
        "correct_answer": row_data["correct_answer"],
        "initial_answer": extract_answer(row_data["raw_solution"]),
        "solution_answer_1": extract_answer(row_data["solution_1"]),
        "solution_answer_2": extract_answer(row_data["solution_2"]),
        "solution_answer_3": extract_answer(row_data["solution_3"]),
        "solution_answer_4": extract_answer(row_data["solution_4"]),
        "final_answer": extract_answer(row_data["solution_5"])
    }
    # Get zero-shot & final result
    data["zero_shot_result"] = data["correct_answer"] == data["initial_answer"]
    data["final_result"] = data["correct_answer"] == data["final_answer"]

    # Majority voting among solution_1 to solution_5
    voted_answers = [
        data["solution_answer_1"],
        data["solution_answer_2"],
        data["solution_answer_3"],
        data["solution_answer_4"],
        data["final_answer"]
    ]
    
    # Filter out None values
    voted_answers = [a for a in voted_answers if a is not None]

    # Get majority answer
    if voted_answers:
        majority_vote = Counter(voted_answers).most_common(1)[0][0]
    else:
        majority_vote = None

    data["majority_answer"] = majority_vote
    data["majority_result"] = data["correct_answer"] == majority_vote

    return data


In [76]:
def process_result_data(result_df):
    processed_answer = []
    for index, row in result_df.iterrows():
        processed_answer.append(process_answer(row))
    return processed_answer

In [77]:
def calculate_true_percentages(df, columns):
    result = {}
    total = len(df)
    for col in columns:
        true_count = df[col].sum()  # Since True == 1 and False == 0
        result[col] = round((true_count / total) * 100, 2)
    return result

## 1. Analysis Raw TextGrad

In [78]:
columns_to_check = ["zero_shot_result", "final_result", "majority_result"]

In [79]:
# Raw TextGrad
raw_textgrad_processed_answer_list = process_result_data(raw_textgrad)
raw_textgrad_processed_answer = pd.DataFrame(raw_textgrad_processed_answer_list)
raw_textgrad_processed_answer

Unnamed: 0,correct_answer,initial_answer,solution_answer_1,solution_answer_2,solution_answer_3,solution_answer_4,final_answer,zero_shot_result,final_result,majority_answer,majority_result
0,A,,,,,,,False,False,,False
1,D,,,,,,,False,False,,False


In [80]:
# Drop None Rows
clean_raw_textgrad_processed_answer = raw_textgrad_processed_answer.dropna()
clean_raw_textgrad_processed_answer

Unnamed: 0,correct_answer,initial_answer,solution_answer_1,solution_answer_2,solution_answer_3,solution_answer_4,final_answer,zero_shot_result,final_result,majority_answer,majority_result


In [81]:

percentages = calculate_true_percentages(clean_raw_textgrad_processed_answer, columns_to_check)
percentages

  result[col] = round((true_count / total) * 100, 2)


{'zero_shot_result': np.float64(nan),
 'final_result': np.float64(nan),
 'majority_result': np.float64(nan)}