In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from med_llm_bias import *

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import os
import time
import re
import numpy as np
from models import llm_model
from tqdm import tqdm
from med_llm_bias import *

In [4]:
def generate_prompt_dataset(bias_type, mitigation_strategy, question_set):
    questions = load_usmle_questions(question_set=question_set)
    q_proc = USMLEQuestionProcessor(None, bias_type=bias_type, mitigation_strategy=mitigation_strategy)

    if mitigation_strategy is None:
        mitigation_strategy = "no-mitigation"

    out_folder = f"biased_data/prompts/{bias_type}/{mitigation_strategy}/{question_set}"

    if not os.path.exists(out_folder):
        os.makedirs(out_folder)

    for i, q in enumerate(questions):
        prompt, info = q_proc.generate_full_prompt(q)
        out_file = f"bias_{bias_type}_{mitigation_strategy}_{question_set}_{i}.txt"

        # Write to file
        out_file = os.path.join(out_folder, out_file)
        with open(out_file, 'w') as fout:
            fout.write(prompt)

In [5]:
biases = ["self_diagnosis", "recency", "confirmation", "frequency", "cultural",  "status_quo", "false_consensus"]
mitigation_strategies = [None, "education", "one-shot", "few-shot" ]

for bias_type in biases:
    for mitigation_strategy in mitigation_strategies:
        for question_set in ["train", "test", "dev"]:
            generate_prompt_dataset(bias_type, mitigation_strategy, question_set)
            print(f"Generated prompts for {bias_type}, {mitigation_strategy}, {question_set}")

Generated prompts for self_diagnosis, None, train
Generated prompts for self_diagnosis, None, test
Generated prompts for self_diagnosis, None, dev
Generated prompts for self_diagnosis, education, train
Generated prompts for self_diagnosis, education, test
Generated prompts for self_diagnosis, education, dev
Generated prompts for self_diagnosis, one-shot, train
Generated prompts for self_diagnosis, one-shot, test
Generated prompts for self_diagnosis, one-shot, dev
Generated prompts for self_diagnosis, few-shot, train
Generated prompts for self_diagnosis, few-shot, test
Generated prompts for self_diagnosis, few-shot, dev
Generated prompts for recency, None, train
Generated prompts for recency, None, test
Generated prompts for recency, None, dev
Generated prompts for recency, education, train
Generated prompts for recency, education, test
Generated prompts for recency, education, dev
Generated prompts for recency, one-shot, train
Generated prompts for recency, one-shot, test
Generated pro

In [6]:
def generate_json_dataset(bias_type, question_set):
    questions = load_usmle_questions(question_set=question_set)
    q_proc = USMLEQuestionProcessor(None, bias_type=bias_type, mitigation_strategy=None)

    all_qs = []
    for q in questions:
        prompt, info = q_proc.generate_full_prompt(q)
        all_qs.append(info)

    out_folder = f"biased_data/json/{bias_type}"
    out_file = f"bias_{bias_type}_{question_set}.json"

    if not os.path.exists(out_folder):
        os.makedirs(out_folder)

    out_file = os.path.join(out_folder, out_file)
    with open(out_file, 'w') as fout:
        json.dump(all_qs , fout)

for bias_type in biases:
    for question_set in ["train", "test", "dev"]:
        generate_json_dataset(bias_type, question_set)
        print(f"Generated json for {bias_type}, {question_set}")

Generated json for self_diagnosis, train
Generated json for self_diagnosis, test
Generated json for self_diagnosis, dev
Generated json for recency, train
Generated json for recency, test
Generated json for recency, dev
Generated json for confirmation, train
Generated json for confirmation, test
Generated json for confirmation, dev
Generated json for frequency, train
Generated json for frequency, test
Generated json for frequency, dev
Generated json for cultural, train
Generated json for cultural, test
Generated json for cultural, dev
Generated json for status_quo, train
Generated json for status_quo, test
Generated json for status_quo, dev
Generated json for false_consensus, train
Generated json for false_consensus, test
Generated json for false_consensus, dev


biased_data/[bias]/[mitigation]/[bias]_[mitigation]_[train/test/val/all].json
biased_data/[bias]/[bias]_[mitigation]_[train/test/val/all].json