import mathQA --> decontextualize --> add new context

In [None]:
import json
import os
import pandas as pd
from openai import OpenAI
from typing import List, Dict

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [None]:
def load_mathqa_dataset(file_path: str) -> List[Dict]:
    """Load MathQA dataset from JSONL file."""
    questions = []
    with open(file_path, 'r') as f:
        for line in f:
            questions.append(json.loads(line))
    return questions

In [None]:
def remove_context(question: str) -> str:
    """Use GPT to transform word problem into context-free numerical question."""
    prompt = f"""You are a math problem simplifier. 
        Transform the given math word problem into a pure math question without any story or context. 
        Keep the same numbers and mathematical operations, but remove all narrative elements. 
        The result should be a direct mathematical question. 
    
        Original question: {question}

        Output ONLY the transformed question, nothing else."""

    response = client.chat.completions.create(
        model="gpt-5.1",  # Use "gpt-4" as GPT-5.1 isn't available yet
        messages=[
            {"role": "system", "content": "You are a math problem transformer that removes context from word problems."},
            {"role": "user", "content": prompt}
        ],
        temperature=0,
        max_tokens=500
    )
    
    return response.output_text

In [None]:
def add_context(context_free_question: str, topic: str) -> str:
    """Use GPT to transform context-free question into story problem with given context."""
   
    prompt = f"""You are a creative writer. 
    Transform the given pure math question into a word problem about {topic}. 
    Keep the same numbers and mathematical structure, but add a story/context around it. 
    Preserve all the original mathematical relationships.
    
    Pure math question: {context_free_question}

    Output ONLY the transformed question, nothing else."""

    response = client.chat.completions.create(
        model="gpt-5.1",
        messages=[
            {"role": "system", "content": f"You are a math problem writer specializing in {topic}. You expertly transform "},
            {"role": "user", "content": prompt}
        ],
        temperature=0,
        max_tokens=500
    )
    
    return response.output_text

In [None]:

def transform_question(original_question: str, target_contexts: List[str]) -> Dict:
    """Complete transformation pipeline for a single question."""
    print(f"\nOriginal: {original_question[:100]}...")
    
    # Step 1: Remove context
    context_free = remove_context(original_question)
    print(f"Context-free: {context_free[:100]}...")
    
    # Step 2: Add new contexts
    transformed = {
        "original": original_question,
        "context_free": context_free,
        "new_contexts": {}
    }
    
    for context in target_contexts:
        new_question = add_context(context_free, context)
        transformed["new_contexts"][context] = new_question
        print(f"{context.capitalize()}: {new_question[:100]}...")
    
    return transformed
