# Generate Initial Solution

In [None]:
import pandas as pd
import textgrad as tg
from textgrad.engine import get_engine

## Load Datasets
- GPQA Diamond
- MMLU Machine Learning
- MMLU College Physics

In [None]:
# Login using e.g. `huggingface-cli login` to access this dataset

dataset_gpqa = pd.read_csv("hf://datasets/Idavidrein/gpqa/gpqa_diamond.csv")
dataset_mmlu_ml = pd.read_parquet("hf://datasets/cais/mmlu/machine_learning/test-00000-of-00001.parquet")
dataset_mmlu_cp = pd.read_parquet("hf://datasets/cais/mmlu/college_physics/test-00000-of-00001.parquet")

In [None]:
dataset_gpqa

In [None]:
dataset_mmlu_ml

In [None]:
dataset_mmlu_cp

## Build GeneralDatasets & Result Tables

In [None]:
# Create empty DataFrame with proper GeneralDatasets schema
general_datasets_columns = [
    'id',                    # integer - unique identifier
    'question',              # string - the question text
    'correct_answer',        # string - correct answer
    'incorrect_answer_1',    # string - first incorrect option
    'incorrect_answer_2',    # string - second incorrect option  
    'incorrect_answer_3',    # string - third incorrect option
    'source',               # string - dataset source (GPQA, MMLU, etc.)
    'subject'               # string - subject/topic area
]
general_datasets = pd.DataFrame(columns=general_datasets_columns)
general_datasets

In [None]:
# Create empty DataFrame with proper Result schema
result_columns = [
    'id',                # integer - links to GeneralDatasets.id
    'raw_solution',      # string - initial solution before optimization
    'solution_1',        # string - first TextGrad iteration
    'solution_2',        # string - second TextGrad iteration  
    'solution_3',        # string - third TextGrad iteration
    'solution_4',        # string - fourth TextGrad iteration
    'solution_5',        # string - fifth TextGrad iteration (final)
    'correct_answer',    # string - ground truth answer
    'source',           # string - dataset source (GPQA, MMLU)
    'subject'           # string - subject area
]
result = pd.DataFrame(columns=result_columns)
result

In [None]:
# Reset general_datasets DataFrame
general_datasets = pd.DataFrame(columns=general_datasets_columns)

i = 1
# GPQA Diamond
for index, row in dataset_gpqa.iterrows():
    general_datasets.loc[len(general_datasets)] = {
        "id": i,
        "question": row["Question"],
        "correct_answer": row["Correct Answer"],
        "incorrect_answer_1": row["Incorrect Answer 1"],
        "incorrect_answer_2": row["Incorrect Answer 2"],
        "incorrect_answer_3": row["Incorrect Answer 3"],
        "source": "GPQA-Diamond",
        "subject": "-"
    }
    i += 1

# MMLU Machine Learning
for index, row in dataset_mmlu_ml.iterrows():
    available_choices = [0, 1, 2, 3]
    available_choices.remove(row["answer"])
    general_datasets.loc[len(general_datasets)] = {
        "id": i,
        "question": row["question"],
        "correct_answer": row["choices"][row["answer"]],
        "incorrect_answer_1": row["choices"][available_choices[0]],
        "incorrect_answer_2": row["choices"][available_choices[1]],
        "incorrect_answer_3": row["choices"][available_choices[2]],
        "source": "MMLU-ML",
        "subject": row["subject"]
    }
    i += 1

# MMLU College Physics
for index, row in dataset_mmlu_cp.iterrows():
    available_choices = [0, 1, 2, 3]
    available_choices.remove(row["answer"])
    general_datasets.loc[len(general_datasets)] = {
        "id": i,
        "question": row["question"],
        "correct_answer": row["choices"][row["answer"]],
        "incorrect_answer_1": row["choices"][available_choices[0]],
        "incorrect_answer_2": row["choices"][available_choices[1]],
        "incorrect_answer_3": row["choices"][available_choices[2]],
        "source": "MMLU-CP",
        "subject": row["subject"]
    }
    i += 1


In [None]:
general_datasets

## Populate Experiment Subject

In [None]:
engine = get_engine("gemini-1.5-pro")

In [None]:
import random

QUERY_TEMPLATE_MULTICHOICE = """
Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.

{Question}

A) {A}
B) {B}
C) {C}
D) {D}
""".strip()

# Generate datasets initial_solution.csv
def generate_initial_solution(
    question, 
    correct_answer, 
    incorrect_answer_1, 
    incorrect_answer_2, 
    incorrect_answer_3
    ):
    
    # Randomize
    answers = [correct_answer, incorrect_answer_1, incorrect_answer_2, incorrect_answer_3]
    letter = ['A', 'B', 'C', 'D']
    correct_index = random.randint(0,3)
    for i in range(correct_index):
        answers[i] = answers[i+1]
        answers[i+1] = correct_answer
        letter.pop(0)

    choices_dict = dict(
        A=answers[0], B=answers[1], C=answers[2], D=answers[3], Question=question
    ) 

    correct_answer_letter = letter[0]
    formatted_question = QUERY_TEMPLATE_MULTICHOICE.format(**choices_dict)

    STARTING_SYSTEM_PROMPT = f"""
        You are Gemini, a large language model trained by Google, based on the Gemini-1.5-Pro architecture.
        \nKnowledge cutoff: 2024-12\nCurrent date: 2025-05-01
    """

    system_prompt = tg.Variable(STARTING_SYSTEM_PROMPT, requires_grad=False, role_description="System prompt to the language model")
    model = tg.BlackboxLLM(engine, system_prompt)
    initial_solution = model(tg.Variable(formatted_question, requires_grad=False, role_description="Question to the language model"))
    return formatted_question, initial_solution, correct_answer_letter

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed

def process_single_row(row_data):
    """Process a single row for multithreading"""
    try:
        formatted_question, solution, correct_answer_letter = generate_initial_solution(
            question=row_data['question'],
            correct_answer=row_data['correct_answer'],
            incorrect_answer_1=row_data['incorrect_answer_1'],
            incorrect_answer_2=row_data['incorrect_answer_2'],
            incorrect_answer_3=row_data['incorrect_answer_3']
        )
        
        return {
            "id": row_data["id"],
            "formatted_question": formatted_question,
            "raw_solution": solution,
            "correct_answer": correct_answer_letter,
            "source": row_data["source"],
            "subject": row_data["subject"]
        }
    except Exception as e:
        print(f"Error processing problem {row_data['id']}: {e}")
        return None

In [None]:
# Multithreaded processing
all_rows = []

with ThreadPoolExecutor(max_workers=32) as executor:
    # Submit all tasks
    futures = [
        executor.submit(process_single_row, row.to_dict()) 
        for _, row in general_datasets.iterrows()
    ]
    
    # Collect results
    completed = 0
    for future in as_completed(futures):
        result = future.result()
        if result is not None:
            all_rows.append(result)
        
        completed += 1
        if completed % 10 == 0:
            print(f"Progress: {completed}/{len(futures)} ({completed/len(futures)*100:.1f}%)")

initial_solution = pd.DataFrame(all_rows)
initial_solution

In [None]:
initial_solution.to_csv('datasets/initial_solution.csv', index=False)