## load

In [2]:
import pandas as pd
from openai import OpenAI
import json
import configparser

def get_openai_key():
    config = configparser.ConfigParser()
    config.read("oai-config.ini")
    return config["openai"]["key"]

# Initialize OpenAI client
client = OpenAI(api_key=get_openai_key())

## helpers

In [22]:
def create_prompt(row):
    question = row['question']
    facts = row['facts']
    answer = row['model_answer']
    
    prompt = f"""Given this question and its supporting facts, create a variant by changing one subject/term that would result in the opposite answer:

Original Question: {question}
Original Answer: {answer}
Supporting Facts:
1. {facts[0]}
2. {facts[1]}

Create a new version where changing one subject/term would lead to the opposite conclusion.
Provide your response in this format:

--==--
Original Term: [original term to change]
New Term: [new term/subject]
New Question: [rewritten question]
New Facts:
1. [new fact 1 about the new term]
2. [new fact 2 about the new term]
New Answer: [opposite of original answer]

Ensure the new question follows the same logical pattern but leads to the opposite conclusion."""
    
    return prompt

In [23]:
def parse_gpt_response(response_text, original_question, original_facts, original_answer):
    try:
        if "--==--" not in response_text:
            return None
        
        formatted_section = response_text.split("--==--")[1].strip()
        
        lines = formatted_section.split("\n")
        parsed = {}
        
        for line in lines:
            if line.startswith("Original Term:"):
                parsed['old_subject'] = line.replace("Original Term:", "").strip()
            elif line.startswith("New Term:"):
                parsed['new_subject'] = line.replace("New Term:", "").strip()
            elif line.startswith("New Question:"):
                parsed['new_question'] = line.replace("New Question:", "").strip()
        
        parsed['old_question'] = original_question
        parsed['old_answer'] = original_answer
        parsed['new_answer'] = not original_answer  # Ensure opposite answer
        
        facts_start = formatted_section.find("New Facts:")
        facts_text = formatted_section[facts_start:].split("\n")[1:3]
        parsed['new_facts'] = [
            fact.strip().replace("1. ", "").replace("2. ", "")
            for fact in facts_text
        ]
        parsed['old_facts'] = original_facts
        
        return parsed
    except Exception as e:
        print(f"Error parsing response: {e}")
        return None

In [24]:
def generate_variants(input_data):
    """Generates variants for each question in the input data."""
    variants = []
    
    for _, row in pd.DataFrame(input_data).iterrows():
        prompt = create_prompt(row)
        
        try:
            completion = client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {"role": "user", "content": prompt}
                ],
                max_tokens=500
            )
            
            response = completion.choices[0].message.content
            parsed = parse_gpt_response(response, row['question'], row['facts'], row['answer'])
            
            if parsed:
                parsed['qid'] = row['qid']
                variants.append(parsed)
                print(f"Generated variant for question {row['qid']}")
                
        except Exception as e:
            print(f"Error processing question {row['qid']}: {e}")
            continue
    
    return variants

## load data

In [27]:
#df = pd.read_json("https://raw.githubusercontent.com/eladsegal/strategyqa/refs/heads/main/data/strategyqa/dev.json")

df2 = pd.read_json("model-reasons.jsonl", lines=True)

filtered_df = df2[df2['facts'].str.len() == 2]

input_data = filtered_df.to_dict('records')

variants = generate_variants(input_data)

Generated variant for question e0044a7b4d146d611e73
Generated variant for question c69397b4341b65ed080f
Generated variant for question 1932e05f10680ece229f
Generated variant for question c91eafafed5a8f80bb5a
Generated variant for question 2047c0c34383f8014820
Generated variant for question 1e9d59987a695898808f
Generated variant for question 869bbd1c4e3c0bf02527
Generated variant for question 9635db8809b449470dd6
Generated variant for question addf92ab71aca4e783b1
Generated variant for question 99eb858b4c9624a71b40
Generated variant for question b1e1256007b0a4a341a7
Generated variant for question 3545982eb15f96652e1b
Generated variant for question cbebe1a0113581f37141
Generated variant for question e5ffcc7b22a58df8952d
Generated variant for question e144fef6c590823af46a
Generated variant for question 8d95e7d922024a684ac0
Generated variant for question 427fdafa9e7047587d75
Generated variant for question 2025983d427a9d3d5bab
Generated variant for question 0edac4af92465027fe27
Generated va

In [28]:
# with open("question_variants.jsonl", 'w') as f:
#     for variant in variants:
#         f.write(json.dumps(variant) + '\n')

# print("Results saved to question_variants.jsonl")

variants_df = pd.DataFrame(variants)
display(variants_df)

Unnamed: 0,old_subject,new_subject,new_question,old_question,old_answer,new_answer,new_facts,old_facts,qid
0,Albany in Georgia,Augusta in Georgia,Will the Augusta in Georgia reach a hundred th...,Will the Albany in Georgia reach a hundred tho...,False,True,[The Augusta in Georgia has a larger populatio...,[The Albany in Georgia has a smaller populatio...,e0044a7b4d146d611e73
1,Saint Vincent and the Grenadines,Haiti,Is the language used in Haiti rooted in English?,Is the language used in Saint Vincent and the ...,True,False,[Haitian Creole and French are the official la...,[English is the official language of Saint Vin...,c69397b4341b65ed080f
2,Mount Fuji,Mount Aso,Would the top of Mount Aso stick out of the Se...,Would the top of Mount Fuji stick out of the S...,True,False,"[Mount Aso is located in Kyushu, which is much...","[Mount Fuji is located on Honshu Island, which...",1932e05f10680ece229f
3,Miami,Los Angeles,Is Los Angeles a city on the American West Coast?,Is Miami a city on the American West Coast?,False,True,[Los Angeles is located in the Pacific Time Zo...,[Miami is not located in the Pacific Time Zone...,c91eafafed5a8f80bb5a
4,Swiss Guard,Virginia general assembly members,Can the Virginia General Assembly members fill...,Can the Swiss Guard fill the Virginia General ...,False,True,[The Virginia General Assembly members are ele...,[The Swiss Guard is a military corps that serv...,2047c0c34383f8014820
...,...,...,...,...,...,...,...,...,...
123,Al Unser Jr.,Neil Armstrong,Did Neil Armstrong win the Space Race?,Did Al Unser Jr. win the Space Race?,False,True,[Neil Armstrong was an astronaut who was part ...,"[Al Unser Jr. is a racing driver, not an astro...",5f290c4202a54bf6aa71
124,Amy Winehouse,Adele,Did Adele always perform live perfectly?,Did Amy Winehouse always perform live perfectly?,False,True,"[She is known for her powerful, clear voice an...",[She struggled with substance abuse and its ef...,6e95d89ccd3256bde343
125,Anchovy,Chicken,Are there bones in a chicken pizza?,Are there bones in an anchovy pizza?,True,False,"[Chicken meat often contains bones., Bones are...","[Anchovies are fish and have bones., Bones are...",7d1f1c2a9d554a017a22
126,basketball,pebble,Can you hide a pebble in a sand cat's ear?,Can you hide a basketball in a sand cat's ear?,False,True,[Sand cats have ears that can easily accommoda...,[Sand cats do not have ears that can accommoda...,4070e4485b1bcc9e04d8


In [8]:
df = variants_df.copy().head(10)

In [29]:
def create_final_facts(row):
    prompt = f"""Replace "{row['new_subject']}" with "{row['old_subject']}" in the following facts, but ONLY when it refers to the main subject of the fact. 
    Do NOT replace any other location names, even if they are similar. Preserve all comparison locations exactly as they are.
    
    For example, if replacing "Augusta in Georgia" with "Albany in Georgia", in the fact:
    "Augusta, GA has more people than Augusta, NY"
    You should get:
    "Albany, GA has more people than Augusta, NY"
    
    Here are the facts to transform:
    1. {row['new_facts'][0]}
    2. {row['new_facts'][1]}
    
    Original question for context: {row['old_question']}
    New question for context: {row['new_question']}
    
    Return only the transformed facts in this exact format:
    1. [first fact]
    2. [second fact]"""
    
    try:
        completion = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=200
        )
        
        response = completion.choices[0].message.content
        
        # Extract the transformed facts
        facts = [line.split('. ')[1].strip() for line in response.split('\n') 
                if line.strip() and line[0].isdigit()]
        
        if len(facts) == 2:
            return facts
        else:
            print(f"Warning: Could not properly parse facts for qid {row['qid']}")
            return row['new_facts']
            
    except Exception as e:
        print(f"Error processing final facts for qid {row['qid']}: {e}")
        return row['new_facts']

variants_df['final_facts'] = variants_df.apply(create_final_facts, axis=1)



In [30]:
variants_df

Unnamed: 0,old_subject,new_subject,new_question,old_question,old_answer,new_answer,new_facts,old_facts,qid,final_facts
0,Albany in Georgia,Augusta in Georgia,Will the Augusta in Georgia reach a hundred th...,Will the Albany in Georgia reach a hundred tho...,False,True,[The Augusta in Georgia has a larger populatio...,[The Albany in Georgia has a smaller populatio...,e0044a7b4d146d611e73,[The Albany in Georgia has a larger population...
1,Saint Vincent and the Grenadines,Haiti,Is the language used in Haiti rooted in English?,Is the language used in Saint Vincent and the ...,True,False,[Haitian Creole and French are the official la...,[English is the official language of Saint Vin...,c69397b4341b65ed080f,[Haitian Creole and French are the official la...
2,Mount Fuji,Mount Aso,Would the top of Mount Aso stick out of the Se...,Would the top of Mount Fuji stick out of the S...,True,False,"[Mount Aso is located in Kyushu, which is much...","[Mount Fuji is located on Honshu Island, which...",1932e05f10680ece229f,"[Mount Fuji is located in Kyushu, which is muc..."
3,Miami,Los Angeles,Is Los Angeles a city on the American West Coast?,Is Miami a city on the American West Coast?,False,True,[Los Angeles is located in the Pacific Time Zo...,[Miami is not located in the Pacific Time Zone...,c91eafafed5a8f80bb5a,"[Miami is located in the Pacific Time Zone., M..."
4,Swiss Guard,Virginia general assembly members,Can the Virginia General Assembly members fill...,Can the Swiss Guard fill the Virginia General ...,False,True,[The Virginia General Assembly members are ele...,[The Swiss Guard is a military corps that serv...,2047c0c34383f8014820,[The Swiss Guard are elected officials who ser...
...,...,...,...,...,...,...,...,...,...,...
123,Al Unser Jr.,Neil Armstrong,Did Neil Armstrong win the Space Race?,Did Al Unser Jr. win the Space Race?,False,True,[Neil Armstrong was an astronaut who was part ...,"[Al Unser Jr. is a racing driver, not an astro...",5f290c4202a54bf6aa71,"[Al Unser Jr, The Space Race was a competition..."
124,Amy Winehouse,Adele,Did Adele always perform live perfectly?,Did Amy Winehouse always perform live perfectly?,False,True,"[She is known for her powerful, clear voice an...",[She struggled with substance abuse and its ef...,6e95d89ccd3256bde343,"[Amy Winehouse is known for her powerful, clea..."
125,Anchovy,Chicken,Are there bones in a chicken pizza?,Are there bones in an anchovy pizza?,True,False,"[Chicken meat often contains bones., Bones are...","[Anchovies are fish and have bones., Bones are...",7d1f1c2a9d554a017a22,"[Anchovy meat often contains bones., Bones are..."
126,basketball,pebble,Can you hide a pebble in a sand cat's ear?,Can you hide a basketball in a sand cat's ear?,False,True,[Sand cats have ears that can easily accommoda...,[Sand cats do not have ears that can accommoda...,4070e4485b1bcc9e04d8,[Sand cats have ears that can easily accommoda...


In [31]:
variants_df.to_json("swapped-model.jsonl", orient = 'records', lines = True)