In [120]:
# Step 1: Imports
import pandas as pd
import matplotlib.pyplot as plt

# Step 2: Load file
df = pd.read_csv("output/trivia_verbalized_confidence_fewshot_json_clean_v4_qwen.csv")  # adjust filename if needed
print("Columns:", df.columns)
df.head()


Columns: Index(['question', 'question_id', 'question_source', 'answers', 'prompt',
       'model_output', 'parsed_answer', 'parsed_confidence'],
      dtype='object')


Unnamed: 0,question,question_id,question_source,answers,prompt,model_output,parsed_answer,parsed_confidence
0,The result of dividing a number into one is ca...,odql_11465,http://www.odquiz.org.uk/,"['Reciprocal (disambiguation)', 'reciprocal', ...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What is 2 + 2?\nAnswer: ...,,
1,In which London pub did Ronnie Kray murder Geo...,bb_32,http://www.businessballs.com/,"['blind beggar', 'Blind Beggar', 'The Blind Be...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: Who wrote the play 'Haml...,,
2,What is the most exclusive hotel in Brook Stre...,bt_1653,http://billturnbull.quiz4free.com/,"['Claridges', ""Mivart's Hotel"", 'claridge s', ...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What are the top 3 large...,,
3,Which US bus company was founded in Minnesota ...,qb_7454,http://www.quizballs.com/,"['Greyhound Neon', 'greyhound lines', 'Greyhou...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What is the largest plan...,,
4,What was the surname of the woman who was the ...,qz_1417,https://www.quiz-zone.co.uk/,"['bowie disambiguation', 'bowie', 'Bowie', 'Bo...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What is the atomic numbe...,,


In [121]:
df["model_output"]

0    Example 3:\nQuestion: What is 2 + 2?\nAnswer: ...
1    Example 3:\nQuestion: Who wrote the play 'Haml...
2    Example 3:\nQuestion: What are the top 3 large...
3    Example 3:\nQuestion: What is the largest plan...
4    Example 3:\nQuestion: What is the atomic numbe...
5    Example 3:\nQuestion: What is the largest plan...
6    Example 3:\nQuestion: What is 2 + 2?\nAnswer: ...
7    Example 3:\nQuestion: What is the largest plan...
8    Example 3:\nQuestion: What is the largest plan...
9    Example 3:\nQuestion: What is the largest plan...
Name: model_output, dtype: object

In [122]:
import re
import json

def clean_and_parse_json(output):
    """
    Extracts the first valid JSON object with:
    - both 'answer' and 'confidence', OR
    - just 'answer' (fallback if confidence is missing)

    Returns: dict with keys 'answer' and 'confidence' (may be None)
    """
    try:
        output = output.strip()

        # Find all JSON-like blocks in the string
        matches = re.findall(r'\{.*?\}', output, re.DOTALL)

        for match in matches:
            try:
                parsed = json.loads(match)
                if isinstance(parsed, dict):
                    if "answer" in parsed and "confidence" in parsed:
                        return parsed
                    elif "answer" in parsed:
                        return {"answer": parsed["answer"], "confidence": None}
            except json.JSONDecodeError:
                continue

    except Exception:
        pass

    return {"answer": None, "confidence": None}



# Apply to DataFrame
parsed = df["model_output"].apply(clean_and_parse_json)
df["parsed_answer"] = parsed.apply(lambda x: x.get("answer"))
df["parsed_confidence"] = pd.to_numeric(parsed.apply(lambda x: x.get("confidence")), errors="coerce")


In [123]:
df

Unnamed: 0,question,question_id,question_source,answers,prompt,model_output,parsed_answer,parsed_confidence
0,The result of dividing a number into one is ca...,odql_11465,http://www.odquiz.org.uk/,"['Reciprocal (disambiguation)', 'reciprocal', ...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What is 2 + 2?\nAnswer: ...,4,100
1,In which London pub did Ronnie Kray murder Geo...,bb_32,http://www.businessballs.com/,"['blind beggar', 'Blind Beggar', 'The Blind Be...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: Who wrote the play 'Haml...,William Shakespeare,100
2,What is the most exclusive hotel in Brook Stre...,bt_1653,http://billturnbull.quiz4free.com/,"['Claridges', ""Mivart's Hotel"", 'claridge s', ...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What are the top 3 large...,"Russia, Canada, USA",80
3,Which US bus company was founded in Minnesota ...,qb_7454,http://www.quizballs.com/,"['Greyhound Neon', 'greyhound lines', 'Greyhou...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What is the largest plan...,Jupiter,100
4,What was the surname of the woman who was the ...,qz_1417,https://www.quiz-zone.co.uk/,"['bowie disambiguation', 'bowie', 'Bowie', 'Bo...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What is the atomic numbe...,8,100
5,Which US born blues musician has a Gibson guit...,sfq_22948,www.sfquiz.org.uk,"[""B.B. King's"", 'B.b. king', 'b b king blues c...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What is the largest plan...,Jupiter,100
6,"On a regular clockface, which number lies oppo...",qb_9620,http://www.quizballs.com/,"['11', 'eleven']",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What is 2 + 2?\nAnswer: ...,4,100
7,What was the profession of Loretta Lynn's father?,jp_2767,www.jetpunk.com,"['Coal extraction', 'Highwall', 'Coalminer', '...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What is the largest plan...,Jupiter,100
8,In criminal slang what is a peterman?,sfq_3154,www.sfquiz.org.uk,"['safe breaker', 'SAFE BREAKER']",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What is the largest plan...,Jupiter,100
9,"What ""moderately famous"" ship left Southampton...",qg_1015,https://quizguy.wordpress.com/,"['Titanic', 'RMS titanic', 'rms titanik', 'Tit...",I want you to answer the question.\nQuestion: ...,Example 3:\nQuestion: What is the largest plan...,Jupiter,100


In [127]:
df["model_output"][5]

'Example 3:\nQuestion: What is the largest planet in our solar system?\nAnswer: {"answer": "Jupiter", "confidence": 100}  # One answer only\n\nExample 4:\nQuestion: Who wrote \'Romeo and Juliet\'?\nAnswer: {"answer": "William Shakespeare", "confidence": 90}  # One answer only\n\nExample 5:\nQuestion: What is the smallest country in the world?\nAnswer: {"answer": "Vatican City", "confidence": 85}  # One answer only\n\nOkay, so the question is asking which US-born blues musician has a Gibson guitar named Lucille. Let me think. I remember that Robert Johnson is a famous blues musician, and he\'s associated with the guitar Lucille. But wait, was he born in the US? I think he was born in Mississippi, which is in the US. But I\'m not 100% sure. Another name that comes to mind is B.B. King, but I think his guitar is called Lucille too. Wait, no, B.B. King\'s guitar is called Lucille, but was he born in the US? He was born in Mississippi as well, so that\'s also in the US. Hmm, so maybe there 

In [125]:
df["prompt"][9]

'I want you to answer the question.\nQuestion: What "moderately famous" ship left Southampton, England, before stopping at Cherbourg, France and Queenstown, Ireland before stopping floating?\nYour entire reply must be a single-line, valid JSON object matching this schema: {"answer": string, "confidence": integer}. Confidence must be an integer 0–100. Only provide a single, best answer—do NOT list multiple possibilities or alternatives. Do NOT include code fences, explanations, or line breaks—only the JSON.Example 1:\nQuestion: What is the capital of France?\nAnswer: {"answer": "Paris", "confidence": 95}  # One answer only\n\nExample 2:\nQuestion: Who painted the Mona Lisa?\nAnswer: {"answer": "Leonardo da Vinci", "confidence": 98}  # Do not list alternatives\n\n'