In [20]:
import json
import os
import textwrap

# Define a function to pretty-print JSON with wrapped lines
def pretty_print_json(data, max_width=80):
    json_str = json.dumps(data, indent=4)
    wrapped_lines = []
    for line in json_str.splitlines():
        wrapped_lines.extend(textwrap.wrap(line, width=max_width))
    return "\n".join(wrapped_lines)

In [21]:
file_path = os.path.join("eval_question_only_emrqa", "eval_predictions.jsonl")

# Read the file line by line
with open(file_path, "r") as file:
    predictions = [json.loads(line) for line in file]

for i, prediction in enumerate(predictions[:5]):
    print(f"Prediction {i + 1}:")
    print(pretty_print_json(predictions[i], max_width=100))

Prediction 1:
{
    "context": "Mrs. Wetterauer is a 54-year-old female with coronary artery disease status post
inferior myocardial infarction in March of 1997, with sick sinus syndrome, status post permanent
pacemaker placement, and paroxysmal atrial fibrillation controlled with amiodarone; also with
history of diabetes mellitus and hypertension. On 1/11, she experienced severe respiratory distress
and was unable to be intubated on the field. She was ultimately intubated at Sirose, and an
echocardiogram showed an ejection fraction of 25 to 30 percent with flat CKs. She was diuresed six
liters and a right heart catheterization showed a pulmonary artery pressure of 40/15, wedge of 12,
and cardiac output of 5.2. Hemodynamics indicated her cardiac output was dependent on her SVR. At
the outside hospital, a right upper lobe infiltrate was noted and she was given gentamicin 250 mg
times one, and clindamycin 600 mg. She was diagnosed with pneumonia and treated with clindamycin,
which caused

In [22]:
# Filter failed examples where the predicted answer does not match the expected answer
failed_examples = [
    prediction for prediction in predictions
    if prediction['predicted_answer'] is not prediction['answers']['text']
]
len(failed_examples)

2500

In [23]:
for i, prediction in enumerate(failed_examples[:5]):
    print(f"Prediction {i + 1}:")
    print(pretty_print_json(failed_examples[i], max_width=100))

Prediction 1:
{
    "context": "Mrs. Wetterauer is a 54-year-old female with coronary artery disease status post
inferior myocardial infarction in March of 1997, with sick sinus syndrome, status post permanent
pacemaker placement, and paroxysmal atrial fibrillation controlled with amiodarone; also with
history of diabetes mellitus and hypertension. On 1/11, she experienced severe respiratory distress
and was unable to be intubated on the field. She was ultimately intubated at Sirose, and an
echocardiogram showed an ejection fraction of 25 to 30 percent with flat CKs. She was diuresed six
liters and a right heart catheterization showed a pulmonary artery pressure of 40/15, wedge of 12,
and cardiac output of 5.2. Hemodynamics indicated her cardiac output was dependent on her SVR. At
the outside hospital, a right upper lobe infiltrate was noted and she was given gentamicin 250 mg
times one, and clindamycin 600 mg. She was diagnosed with pneumonia and treated with clindamycin,
which caused

In [None]:
import pandas as pd

data = [
    {
        "Question": example["question"],
        "Predicted Answer": example["predicted_answer"],
        "Actual Answer": ", ".join(example["answers"]["text"])
    }
    for example in failed_examples
]
comparison_df = pd.DataFrame(data)

In [None]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_colwidth', None)
comparison_df

Unnamed: 0,Question,Predicted Answer,Actual Answer
0,Has the patient ever taken glyburide for their diabetes mellitus:,"Lovenox 60 mg b.i.d., aspirin 325 p.o","Glyburide 5 mg p.o. q.d.,"
1,has there been a prior protonix,"max 70 mg weekly, Dulcolax p.r. 10 mg as","daily, CellCept 1500 mg b.i.d., Protonix 20 mg daily, Pravachol 40 mg daily,"
2,Was the patient ever given medication for wheezing.,vastatin) 20 mg PO QHS with,"lasix, and nebs for wheezing, and was monitored for lytes."
3,What medication has the patient take for dvt prophylaxis,"units, Tylenol p.r.n., Dilaudid 2-4 mg p.o",Hematology: He received heparin for DVT prophylaxis
4,Why was the patient prescribed percocet,Kayexalate and his potassium level returned to,result of Percocet taken for his left hip pain with resulting decreased p.o. intake.
...,...,...,...
2495,What was the indication for my patient's fresh frozen plasma,ONDE MEDICAL CENTER trial drug. Her headache was treated with Tylenol to,"The patient's Lovenox was reversed with protamine and her hematoma continued to expand overnight,"
2496,Has this patient ever been prescribed trazodone,417100958) with a potentially serious interaction with Benztropine Mesylate and Chlorpromazine HC,TRAZODONE 50 MG PO BEDTIME PRN Insomnia.
2497,has there been a prior loperamide,Isordil 40 mg p.o. t.i,"Loperamide 2 tabs p.o. four times a day,"
2498,Has a patient had zantac ( ranitidine hcl ),Maleate 20 mg PO BID (hold if,"Zantac (Ranitidine HCl) 150 mg PO BID,"


### No. of questions in the dataset that start with "has" is 14319. Seems like its a pattern in the failed examples. Let's look at those examples more closely.

In [31]:
has_questions_df = comparison_df[comparison_df['Question'].str.lower().str.startswith('why')]
has_questions_df

Unnamed: 0,Question,Predicted Answer,Actual Answer
4,Why was the patient prescribed percocet,Kayexalate and his potassium level returned to,result of Percocet taken for his left hip pain with resulting decreased p.o. intake.
45,Why is the patient on coumadin,"ture repair, as well as coronary artery bypass graft times","maintain INR between 2 and 3, aspirin, Diltiazem 30 mg t.i.d.,"
50,Why was percocet originally prescribed,dur 120 mg p.o. b.i,"Percocet 1 to 2 tablets p.o. q3 - 4h p.r.n. pain, Zantac 150 mg p.o. b.i.d.,"
52,Why is the patient on lovenox,"USATE SODIUM) 100 MG PO BID, NEXIUM (",He received Lovenox for DVT prophylaxis during his admission.
73,Why was gentle hydration therapy prescribed,laja is an,"He was given gentle hydration therapy with 2 liters of IV fluids,"
...,...,...,...
2428,Why was the patient on acetylsalicylic acid,history of CAD and prior MI leading to CABG in 5/23 was admitted to RCH 6/,instructions to increased dose for costochondritis as pt.
2441,Why is the patient on toradol,"LIMA to LAD, SVG1 to PDA, SVG2-OM2 with a","given Toradol initially for pain and Percocet for break through pain,"
2451,Why did the patient have ciprofloxacin,"ozalone 5 milligrams q.d., Lasix 160 mill",and Ciprofloxacin 500 milligrams b.i.d.
2452,Why is the patient taking lopressor,"units, Tylenol p.r.n., Dilaudid 2-4 mg p","event prophylaxis. Blood pressure was controlled with isosorbide dinitrate,"
