## Setting

In [1]:
!pip -q install -U pandas pyarrow scikit-learn eli5 matplotlib

In [2]:
import os, re, time
from collections import Counter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

import eli5
from IPython.display import display

SEED = 42
rng = np.random.default_rng(SEED)

In [3]:
from huggingface_hub import login
login()

## Load data

In [4]:
DATA_PATH = "compiled_df.parquet"
df = pd.read_parquet(DATA_PATH)
print("Rows:", len(df))
print("Columns:", df.columns.tolist())

qt = df["question_type"].astype(str).str.strip().str.lower()
qt = qt.replace({"yn":"y/n", "yes/no":"y/n", "y\n":"y/n"})
df["question_type_norm"] = qt

print("\nquestion_type counts:")
print(df["question_type_norm"].value_counts(dropna=False))

assert "question" in df.columns
assert "answer_label" in df.columns

Rows: 15642
Columns: ['dataset_name', 'id_in_dataset', 'question', 'options', 'answer_label', 'question_type', 'prompt_text']

question_type counts:
question_type_norm
mcq    15040
y/n      602
Name: count, dtype: int64


In [5]:
mcq = df[df["question_type_norm"].isin(["mcq","multiple choice","multiple-choice","mcq "])].copy()
yn  = df[df["question_type_norm"].isin(["y/n","yes/no","yn"])].copy()

if len(mcq)==0 and "options" in df.columns:
    mcq = df[df["options"].notna()].copy()
if len(yn)==0:
    yn = df[df["answer_label"].astype(str).str.lower().isin(["yes","no"])].copy()

print("MCQ rows:", len(mcq))
print("YN  rows:", len(yn))

if len(mcq):
    mcq["answer_label"] = mcq["answer_label"].astype(str).str.strip().str.upper()
    mcq = mcq[mcq["answer_label"].isin(["A","B","C","D"])].copy()

if len(yn):
    yn["answer_label"] = yn["answer_label"].astype(str).str.strip().str.lower()
    yn = yn[yn["answer_label"].isin(["yes","no"])].copy()

print("MCQ label dist:", Counter(mcq["answer_label"]) if len(mcq) else {})
print("YN  label dist:", Counter(yn["answer_label"]) if len(yn) else {})

MCQ rows: 15040
YN  rows: 602
MCQ label dist: Counter({'A': 4160, 'B': 3818, 'C': 3752, 'D': 3310})
YN  label dist: Counter({'yes': 421, 'no': 181})


## Prompt builder

In [6]:
def extract_abcd_from_options(options_text: str):
    if options_text is None:
        return None
    s = str(options_text)

    matches = re.findall(r"(?is)\b([ABCD])\s*[\.\)]\s*(.*?)(?=\n\s*[ABCD]\s*[\.\)]|\Z)", s)
    if not matches or len(matches) < 4:
        return None
    d = {k.upper(): v.strip() for k, v in matches}
    if all(k in d for k in ["A","B","C","D"]):
        return d
    return None

def render_mcq_prompt(question: str, options_text: str):
    opts = extract_abcd_from_options(options_text)
    if not opts:
        return (
            "You are a careful medical question-answering assistant.\n"
            "Choose the single best option.\n\n"
            f"Question: {question}\n"
            f"{options_text}\n"
            "Answer: "
        )
    return (
        "You are a careful medical question-answering assistant.\n"
        "Choose the single best option.\n\n"
        f"Question: {question}\n"
        "Answer Choices:\n"
        f"A. {opts['A']}\n"
        f"B. {opts['B']}\n"
        f"C. {opts['C']}\n"
        f"D. {opts['D']}\n"
        "Answer: "
    )

def render_yn_prompt(question: str):
    return (
        "You are a careful medical question-answering assistant.\n"
        "Answer Yes or No.\n"
        "Use A for Yes and B for No.\n\n"
        f"Question: {question}\n"
        "Answer: "
    )

def parse_mcq_answer_strict(text: str):
    if text is None:
        return None
    t = str(text).strip()

    m = re.search(r"(?im)^\s*Final:\s*([ABCD])\s*$", t)
    if m: return m.group(1).upper()

    m = re.search(r"(?im)^\s*Answer:\s*([ABCD])\s*$", t)
    if m: return m.group(1).upper()

    lines = [ln.strip() for ln in t.splitlines() if ln.strip()]
    for ln in reversed(lines):
        if re.fullmatch(r"[ABCD]", ln, flags=re.IGNORECASE):
            return ln.upper()
    return None

def parse_yn_answer_strict(text: str):
    if text is None:
        return None
    t = str(text).strip()

    m = re.search(r"(?im)^\s*Final:\s*([AB])\s*$", t)
    if m: return m.group(1).upper()

    m = re.search(r"(?im)^\s*Answer:\s*([AB])\s*$", t)
    if m: return m.group(1).upper()

    lines = [ln.strip() for ln in t.splitlines() if ln.strip()]
    for ln in reversed(lines):
        if re.fullmatch(r"[AB]", ln, flags=re.IGNORECASE):
            return ln.upper()

    return None

def normalize_gold_yn(val):
    g = str(val).strip().lower()
    if g in {"yes", "y", "true", "t", "1", "a"}:
        return "A"  # Yes
    if g in {"no", "n", "false", "f", "0", "b"}:
        return "B"  # No
    return None

def reasoning_suffix_mcq():
    return (
        "\n\nExplain briefly (1-3 sentences).\n"
        "Then output the final answer on the last line ONLY as:\n"
        "Final: A\n"
        "or Final: B\n"
        "or Final: C\n"
        "or Final: D\n"
        "Do not write anything after the Final line."
    )

def reasoning_suffix_yn():
    return (
        "\n\nExplain briefly (1-3 sentences).\n"
        "Then output the final answer on the last line ONLY as:\n"
        "Final: A   (A = Yes)\n"
        "or Final: B   (B = No)\n"
        "Do not write anything after the Final line."
    )

In [7]:
ex = mcq.iloc[0]
print(render_mcq_prompt(ex["question"], ex["options"])[:600])

You are a careful medical question-answering assistant.
Choose the single best option.

Question: Urogenital Diaphragm is made up of the following, except:
Answer Choices:
A. Deep transverse Perineus
B. Perinial membrane
C. Colle's fascia
D. Sphincter Urethrae
Answer: 


## Load medical wrapper

In [8]:
from google.colab import userdata
HF_TOKEN = userdata.get('HF_TOKEN')

In [9]:
from medical_llm_wrapper_fixed import MedicalLLMWrapper

MODEL_ID = "BioMistral/BioMistral-7B"
DEVICE = "cuda"

llm = MedicalLLMWrapper(
    model_name=MODEL_ID,
    device=DEVICE,
    token=HF_TOKEN,
    torch_dtype=None,
)

# For accuracy evaluation: answer_only (fast)
llm.set_mode("answer_only")

llm.get_model_info()

[MedicalLLMWrapper] Loading model: BioMistral/BioMistral-7B


Loading weights:   0%|          | 0/291 [00:00<?, ?it/s]

Exception in thread Thread-auto_conversion:
Traceback (most recent call last):
  File "/usr/lib/python3.12/threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.12/threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.12/dist-packages/transformers/safetensors_conversion.py", line 116, in auto_conversion
    raise e
  File "/usr/local/lib/python3.12/dist-packages/transformers/safetensors_conversion.py", line 95, in auto_conversion
    sha = get_conversion_pr_reference(api, pretrained_model_name_or_path, **cached_file_kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/transformers/safetensors_conversion.py", line 71, in get_conversion_pr_reference
    spawn_conversion(token, private, model_id)
  File "/usr/local/lib/python3.12/dist-packages/transformers/safetensors_conversion.py", line 47, in spawn_con

[MedicalLLMWrapper] ✓ Model loaded successfully
[MedicalLLMWrapper]   Device: cuda
[MedicalLLMWrapper]   Dtype: torch.bfloat16
[MedicalLLMWrapper]   Option token IDs - AB: [330, 365], ABCD: [330, 365, 334, 384]


{'model_name': 'BioMistral/BioMistral-7B',
 'device': 'cuda',
 'dtype': 'torch.bfloat16',
 'task_type': 'free',
 'mode': 'answer_only',
 'num_parameters': 7241732096,
 'AB_token_ids': [330, 365],
 'ABCD_token_ids': [330, 365, 334, 384]}

## Inference

In [10]:
def eval_mcq_answer_only(df_mcq: pd.DataFrame, n=1000, seed=7):
    df_s = df_mcq.sample(n=min(n, len(df_mcq)), random_state=seed).reset_index(drop=True)
    golds, preds = [], []

    for r in df_s.to_dict("records"):
        gold = str(r["answer_label"]).strip().upper()
        if gold not in {"A","B","C","D"}:
            continue

        prompt = render_mcq_prompt(r["question"], r["options"])
        llm.set_task("mcq"); llm.set_mode("answer_only")
        out = llm.generate(prompt)
        pred = parse_mcq_answer_strict(out)

        if pred not in {"A","B","C","D"}:
            continue

        golds.append(gold); preds.append(pred)

    acc = accuracy_score(golds, preds) if golds else 0.0
    print(f"MCQ answer_only accuracy: {acc:.4f} (n={len(golds)}/{len(df_s)})")
    print(classification_report(golds, preds, labels=["A","B","C","D"]))
    return acc

def eval_yn_answer_only(df_yn: pd.DataFrame, n=600, seed=7):
    df_s = df_yn.sample(n=min(n, len(df_yn)), random_state=seed).reset_index(drop=True)
    golds, preds = [], []

    for r in df_s.to_dict("records"):
        gold = normalize_gold_yn(r["answer_label"])
        if gold not in {"A","B"}:
            continue

        prompt = render_yn_prompt(r["question"])
        llm.set_task("yn"); llm.set_mode("answer_only")
        out = llm.generate(prompt)
        pred = parse_yn_answer_strict(out)

        if pred not in {"A","B"}:
            continue

        golds.append(gold); preds.append(pred)

    acc = accuracy_score(golds, preds) if golds else 0.0
    print(f"Y/N answer_only accuracy: {acc:.4f} (n={len(golds)}/{len(df_s)})")
    print(classification_report(golds, preds, labels=["A","B"]))

    return acc

In [11]:
mcq_ans_acc = eval_mcq_answer_only(mcq, n=1000)
yn_ans_acc  = eval_yn_answer_only(yn, n=600) if len(yn) else None

MCQ answer_only accuracy: 0.4750 (n=1000/1000)
              precision    recall  f1-score   support

           A       0.42      0.64      0.50       263
           B       0.54      0.34      0.42       251
           C       0.54      0.41      0.47       263
           D       0.48      0.51      0.49       223

    accuracy                           0.47      1000
   macro avg       0.49      0.47      0.47      1000
weighted avg       0.49      0.47      0.47      1000

Y/N answer_only accuracy: 0.7017 (n=600/600)
              precision    recall  f1-score   support

           A       0.70      1.00      0.82       421
           B       0.00      0.00      0.00       179

    accuracy                           0.70       600
   macro avg       0.35      0.50      0.41       600
weighted avg       0.49      0.70      0.58       600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


## build a surrogate + show eli5 explainations

In [12]:
def clean_text(t: str):
    t = str(t or "")
    t = re.sub(r"\s+", " ", t).strip()
    return t

def mcq_surrogate_text(question: str, options_text: str):
    opts = extract_abcd_from_options(options_text)
    if not opts:
        return clean_text(question + " " + str(options_text))
    return clean_text(
        f"{question} "
        f"A {opts['A']} "
        f"B {opts['B']} "
        f"C {opts['C']} "
        f"D {opts['D']}"
    )

def yn_surrogate_text(question: str):
    return clean_text(question + " (A=Yes, B=No)")


def train_surrogate_return_parts(X_texts, y, title="", top=30, seed=7):
    if len(set(y)) < 2:
        print(f"[{title}] Not enough class variety.")
        return None, None, None

    Xtr, Xte, ytr, yte = train_test_split(
        X_texts, y, test_size=0.2, random_state=seed, stratify=y
    )

    tf = TfidfVectorizer(ngram_range=(1,2), min_df=2, max_df=0.95)
    clf = LogisticRegression(max_iter=4000)

    Xtr_t = tf.fit_transform(Xtr)
    clf.fit(Xtr_t, ytr)

    score = clf.score(tf.transform(Xte), yte)
    print(f"[{title}] heldout score: {score:.3f} (n_test={len(yte)})")

    pipe = make_pipeline(tf, clf)
    feat_names = tf.get_feature_names_out()
    try:
        display(eli5.show_weights(clf, top=top, feature_names=feat_names))
    except Exception as e:
        # fallback
        print("[eli5.show_weights] error:", e)

    return pipe, tf, clf

def eli5_explain_case_parts(vectorizer, classifier, text, top=20, display_html=True):
    X = vectorizer.transform([text])
    x_dense = X.toarray()[0]

    feature_names = vectorizer.get_feature_names_out()

    try:
        disp = eli5.show_prediction(classifier, x_dense, feature_names=feature_names, top=top)
        if display_html:
            display(disp)
        else:
            return disp
    except Exception as e:
        print("[eli5.show_prediction] error:", e)
        print("Falling back to top weighted features from classifier.coef_")
        coefs = classifier.coef_
        if coefs.ndim == 1 or coefs.shape[0] == 1:
            coef = coefs.ravel()
        else:
            coef = coefs.max(axis=0)
        top_idx_pos = np.argsort(-coef)[:top]
        top_idx_neg = np.argsort(coef)[:top]
        names = feature_names
        print("\nTop positive features:")
        for i in top_idx_pos:
            print(f"{coef[i]:+.3f}\t{names[i]}")
        print("\nTop negative features:")
        for i in top_idx_neg:
            print(f"{coef[i]:+.3f}\t{names[i]}")

def collect_mcq_for_surrogates(df_mcq, n=1500, seed=7):
    df_s = df_mcq.sample(n=min(n, len(df_mcq)), random_state=seed).reset_index(drop=True)
    rows = []
    for r in df_s.to_dict("records"):
        gold = str(r["answer_label"]).strip().upper()
        if gold not in {"A","B","C","D"}:
            continue

        prompt = render_mcq_prompt(r["question"], r["options"])
        llm.set_task("mcq"); llm.set_mode("answer_only")
        out = llm.generate(prompt)
        pred = parse_mcq_answer_strict(out)
        if pred not in {"A","B","C","D"}:
            continue

        rows.append({
            "text": mcq_surrogate_text(r["question"], r["options"]),
            "gold": gold,
            "pred": pred,
            "wrong": int(pred != gold),
            "question": r["question"],
            "options": r["options"],
        })
    return pd.DataFrame(rows)

def collect_yn_for_surrogates(df_yn, n=1500, seed=7):
    df_s = df_yn.sample(n=min(n, len(df_yn)), random_state=seed).reset_index(drop=True)
    rows = []
    for r in df_s.to_dict("records"):
        gold = normalize_gold_yn(r["answer_label"])
        if gold not in {"A","B"}:
            continue

        prompt = render_yn_prompt(r["question"])
        llm.set_task("yn"); llm.set_mode("answer_only")
        out = llm.generate(prompt)
        pred = parse_yn_answer_strict(out)
        if pred not in {"A","B"}:
            continue

        rows.append({
            "text": yn_surrogate_text(r["question"]),
            "gold": gold,
            "pred": pred,
            "wrong": int(pred != gold),
            "question": r["question"],
        })
    return pd.DataFrame(rows)

def explain_case(bundle, text, top=20):
    if bundle.get("mimic"):
        print("\n[ELI5] mimic_llm (why the model predicts what it predicts):")
        display(eli5.show_prediction(bundle["mimic"], text, top=top))
    if bundle.get("gold"):
        print("\n[ELI5] predict_gold (features associated with correct labels):")
        display(eli5.show_prediction(bundle["gold"], text, top=top))
    if bundle.get("error"):
        print("\n[ELI5] predict_error (features associated with failures):")
        display(eli5.show_prediction(bundle["error"], text, top=top))

In [17]:
# MCQ bundle
mcq_bundle = None
if len(mcq):
    mcq_sur = collect_mcq_for_surrogates(mcq, n=1500)
    print("MCQ surrogate rows:", len(mcq_sur))

    mcq_bundle = {
        "mimic": train_surrogate_return_parts(mcq_sur["text"], mcq_sur["pred"], title="MCQ mimic_llm", top=30),
        "gold":  train_surrogate_return_parts(mcq_sur["text"], mcq_sur["gold"], title="MCQ predict_gold", top=30),
        "error": train_surrogate_return_parts(mcq_sur["text"], mcq_sur["wrong"], title="MCQ predict_error", top=30),
    }

MCQ surrogate rows: 1500
[MCQ mimic_llm] heldout score: 0.387 (n_test=300)


Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+0.952,reaction,,
+0.901,<BIAS>,,
+0.618,cyst,,
+0.618,25,,
+0.580,level,,
+0.575,disorder,,
+0.574,posterior,,
+0.567,body,,
+0.557,16,,
+0.556,neonatal,,

Weight?,Feature
+0.952,reaction
+0.901,<BIAS>
+0.618,cyst
+0.618,25
+0.580,level
+0.575,disorder
+0.574,posterior
+0.567,body
+0.557,16
+0.556,neonatal

Weight?,Feature
+0.729,drug
+0.641,used
+0.615,fever
+0.579,deficiency
+0.561,prostate
+0.561,femoral
+0.539,aoic
+0.531,recommended
+0.528,lithium
+0.522,cell carcinoma

Weight?,Feature
+0.826,aery
+0.643,withdrawal
+0.634,receptors
+0.623,to
+0.611,hypoxia
+0.609,ray
+0.594,duct
+0.590,anemia
+0.587,cholesterol
+0.553,periodontal

Weight?,Feature
+0.827,the
+0.796,her
+0.758,disease
+0.734,all
+0.731,management
+0.579,positive
+0.571,she
+0.559,chromosome
+0.556,flexor
+0.556,protein


[MCQ predict_gold] heldout score: 0.230 (n_test=300)


Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+0.766,dna,,
+0.675,common,,
+0.658,tube,,
+0.608,group,,
+0.600,lithium,,
+0.566,been,,
+0.531,neonatal,,
+0.521,arthritis,,
+0.519,cyst,,
+0.514,reactions,,

Weight?,Feature
+0.766,dna
+0.675,common
+0.658,tube
+0.608,group
+0.600,lithium
+0.566,been
+0.531,neonatal
+0.521,arthritis
+0.519,cyst
+0.514,reactions

Weight?,Feature
+0.656,type
+0.634,hiv
+0.613,rotation
+0.592,aery
+0.583,lobe
+0.578,anti
+0.567,excessive
+0.546,effective against
+0.541,degeneration
+0.533,10

Weight?,Feature
+0.684,except
+0.657,joint
+0.655,hernia
+0.653,serum
+0.599,only
+0.591,stab
+0.582,to
+0.580,weeks
+0.578,calcium
+0.567,flexor

Weight?,Feature
+0.925,all of
+0.783,form
+0.717,all
+0.691,in
+0.675,above
+0.625,nerve
+0.624,chromosome
+0.574,vocal
+0.573,esophagus
+0.572,acid


[MCQ predict_error] heldout score: 0.537 (n_test=300)


Weight?,Feature
+0.940,all
+0.809,serum
+0.753,artery
+0.738,he
+0.696,except
+0.666,muscle
+0.630,children
+0.596,he has
+0.589,nerve
+0.583,lateral


In [19]:
# Y/N bundle
yn_bundle = None
if len(yn):
    yn_sur = collect_yn_for_surrogates(yn, n=1500)
    print("Y/N surrogate rows:", len(yn_sur))

    yn_bundle = {
        "mimic": train_surrogate_return_parts(yn_sur["text"], yn_sur["pred"], title="Y/N mimic_llm", top=30),
        "gold":  train_surrogate_return_parts(yn_sur["text"], yn_sur["gold"], title="Y/N predict_gold", top=30),
        "error": train_surrogate_return_parts(yn_sur["text"], yn_sur["wrong"], title="Y/N predict_error", top=30),
    }

Y/N surrogate rows: 602
[Y/N mimic_llm] Not enough class variety.
[Y/N predict_gold] heldout score: 0.702 (n_test=121)


Weight?,Feature
+0.984,age
+0.942,treatment
+0.794,necessary yes
+0.777,necessary
+0.768,does the
+0.713,women
+0.711,really
+0.674,with
+0.654,equivalent
+0.642,stage


[Y/N predict_error] heldout score: 0.702 (n_test=121)


Weight?,Feature
+0.984,age
+0.942,treatment
+0.794,necessary yes
+0.777,necessary
+0.768,does the
+0.713,women
+0.711,really
+0.674,with
+0.654,equivalent
+0.642,stage


In [20]:
from IPython.display import display
import numpy as np

def get_tf_clf_from_bundle(bundle, key):
    if bundle is None:
        return None, None

    part = bundle.get(key, None) if isinstance(bundle, dict) else bundle

    # If part is a dict with tf/clf
    if isinstance(part, dict):
        tf = part.get("tf", None)
        clf = part.get("clf", None)
        if tf is not None and clf is not None:
            return tf, clf

        pipe = part.get("pipe", None)
        if pipe is not None and hasattr(pipe, "named_steps"):
            tf = pipe.named_steps.get("tfidfvectorizer") or pipe.named_steps.get("tfidfvectorizer".replace("_",""))
            clf = pipe.named_steps.get("logisticregression") or pipe.named_steps.get("logisticregression".replace("_",""))
            return tf, clf

    # If part is a sklearn Pipeline directly
    if hasattr(part, "named_steps"):
        pipe = part
        tf = pipe.named_steps.get("tfidfvectorizer") or pipe.named_steps.get("tfidfvectorizer".replace("_",""))
        if tf is None:
            first = list(pipe.named_steps.items())[0][1]
            tf = first if hasattr(first, "vocabulary_") or hasattr(first, "get_feature_names_out") else None
        clf = pipe.named_steps.get("logisticregression") or pipe.named_steps.get("logisticregression".replace("_",""))
        if clf is None:
            last = list(pipe.named_steps.items())[-1][1]
            clf = last if hasattr(last, "coef_") else None
        return tf, clf

    return None, None


def show_wrong_cases_with_eli5(bundle, wrong_cases_df, which="gold", top=20):
    tf, clf = get_tf_clf_from_bundle(bundle, which)
    if tf is None or clf is None:
        print(f"[ERROR] Could not extract tf/clf for '{which}' from bundle. Inspect bundle keys:", list(bundle.keys()) if bundle else None)
        return

    for _, r in wrong_cases_df.iterrows():
        print("\n============================")
        print(f"{which.upper()} GOLD: {r.get('gold')} PRED: {r.get('pred')}")
        print("Q:", r.get("question"))
        text = r.get("text") or r.get("question")
        try:
            # eli5_explain_case_parts expects
            eli5_explain_case_parts(tf, clf, text, top=top, display_html=True)
        except Exception as e:
            print("[eli5_explain_case_parts] error:", e)
            # fallback
            try:
                X = tf.transform([text])
                x_dense = X.toarray()[0]
                feature_names = tf.get_feature_names_out()
                if hasattr(clf, "coef_"):
                    coefs = clf.coef_
                    if coefs.ndim == 1 or coefs.shape[0] == 1:
                        coef = coefs.ravel()
                        contrib = coef * x_dense
                        top_idx = np.argsort(-contrib)[:top]
                        print("\nTop contributing features (positive):")
                        for i in top_idx:
                            if x_dense[i] != 0:
                                print(f"{contrib[i]:+.3f}\t{feature_names[i]}")
                        neg_idx = np.argsort(contrib)[:top]
                        print("\nTop contributing features (negative):")
                        for i in neg_idx:
                            if x_dense[i] != 0:
                                print(f"{contrib[i]:+.3f}\t{feature_names[i]}")
                    else:
                        contribs = (coefs * x_dense).max(axis=0)
                        top_idx = np.argsort(-contribs)[:top]
                        print("\nTop features by max-class contribution:")
                        for i in top_idx:
                            print(f"{contribs[i]:+.3f}\t{feature_names[i]}")
                else:
                    print("Classifier has no coef_, cannot compute contributions.")
            except Exception as e2:
                print("Fallback failed:", e2)

In [21]:
import numpy as np
from IPython.display import display

def debug_inspect_bundle(bundle):
    print("BUNDLE KEYS:", list(bundle.keys()) if isinstance(bundle, dict) else "bundle not dict")
    for k in (bundle.keys() if isinstance(bundle, dict) else []):
        val = bundle[k]
        print(f"\n--- key: {k} ---")
        print("type:", type(val))
        # small repr
        rep = repr(val)
        print("repr:", rep[:400] + ("..." if len(rep) > 400 else ""))
        # show attributes that might help
        attrs = []
        for a in ("named_steps","steps","get_params","tf","clf","pipe"):
            if hasattr(val, a):
                attrs.append(a)
        if attrs:
            print("has attrs:", attrs)
        else:
            print("no obvious attrs")

def extract_tf_clf_from_obj(obj):
    # None
    if obj is None:
        return None, None

    if isinstance(obj, dict):
        tf = obj.get("tf") or obj.get("vectorizer") or obj.get("tfidf") or obj.get("tfidfvectorizer")
        clf = obj.get("clf") or obj.get("classifier") or obj.get("model") or obj.get("logisticregression")
        if tf is not None and clf is not None:
            return tf, clf
        pipe = obj.get("pipe")
        if pipe is not None:
            obj = pipe  # fall through

    if hasattr(obj, "named_steps"):
        ns = obj.named_steps
        tf = None; clf = None
        for name in ("tfidfvectorizer","tfidf","vectorizer","tfidf_vectorizer","tfidfvectoriser"):
            if name in ns:
                tf = ns[name]
                break
        for name in ("logisticregression","classifier","clf","logreg","logistic"):
            if name in ns:
                clf = ns[name]
                break

        if tf is None:
            try:
                first = list(ns.items())[0][1]
                if hasattr(first, "get_feature_names_out") or hasattr(first, "vocabulary_"):
                    tf = first
            except Exception:
                pass
        if clf is None:
            try:
                last = list(ns.items())[-1][1]
                if hasattr(last, "coef_") or hasattr(last, "decision_function"):
                    clf = last
            except Exception:
                pass
        if tf is not None and clf is not None:
            return tf, clf

    if hasattr(obj, "steps"):
        try:
            steps = obj.steps
            if len(steps) >= 2:
                first = steps[0][1]
                last = steps[-1][1]
                tf = first if (hasattr(first,"get_feature_names_out") or hasattr(first,"vocabulary_")) else None
                clf = last if (hasattr(last,"coef_") or hasattr(last,"decision_function")) else None
                if tf is not None and clf is not None:
                    return tf, clf
        except Exception:
            pass

    if isinstance(obj, (list, tuple)):
        if len(obj) >= 2:
            a, b = obj[0], obj[1]
            if hasattr(a, "get_feature_names_out") and (hasattr(b, "coef_") or hasattr(b, "decision_function")):
                return a, b
            if len(obj) >= 3:
                a2, b2, c2 = obj[0], obj[1], obj[2]
                if hasattr(b2, "get_feature_names_out") and (hasattr(c2, "coef_") or hasattr(c2, "decision_function")):
                    return b2, c2
    if hasattr(obj, "get_feature_names_out") and hasattr(obj, "transform"):
        return obj, None
    if hasattr(obj, "coef_") or hasattr(obj, "decision_function"):
        return None, obj

    return None, None

def explain_with_fallback(tf, clf, text, top=20):
    try:
        if tf is None or clf is None:
            raise ValueError("tf or clf is None")
        eli5_explain_case_parts(tf, clf, text, top=top, display_html=True)
        return
    except Exception as e:
        print("[fallback explain] eli5_explain_case_parts failed:", e)
        try:
            X = tf.transform([text])
            x_dense = X.toarray()[0]
            feature_names = tf.get_feature_names_out()
            if hasattr(clf, "coef_"):
                coefs = clf.coef_
                if coefs.ndim == 1 or coefs.shape[0] == 1:
                    coef = coefs.ravel()
                    contrib = coef * x_dense
                    top_idx_pos = np.argsort(-contrib)[:top]
                    top_idx_neg = np.argsort(contrib)[:top]
                    print("\nTop positive contributions:")
                    for i in top_idx_pos:
                        if x_dense[i] != 0:
                            print(f"{contrib[i]:+.3f}\t{feature_names[i]}")
                    print("\nTop negative contributions:")
                    for i in top_idx_neg:
                        if x_dense[i] != 0:
                            print(f"{contrib[i]:+.3f}\t{feature_names[i]}")
                else:
                    # multiclass fallback
                    contribs = (coefs * x_dense).max(axis=0)
                    top_idx = np.argsort(-contribs)[:top]
                    print("\nTop features by max-class contribution:")
                    for i in top_idx:
                        print(f"{contribs[i]:+.3f}\t{feature_names[i]}")
            else:
                print("Classifier has no coef_; cannot compute contributions.")
        except Exception as e2:
            print("Fallback failed:", e2)

# Run inspection
print("Inspecting mcq_bundle contents:")
debug_inspect_bundle(mcq_bundle)

# Prepare wrong cases df
try:
    wrong_df = mcq_sur[mcq_sur["wrong"] == 1].head(5)
except Exception:
    try:
        wrong_df = wrong_cases
    except Exception:
        wrong_df = None

if wrong_df is None or len(wrong_df) == 0:
    print("No wrong cases found to explain.")
else:
    for which in ("mimic","gold","error"):
        print(f"\n\n=== Explanations for '{which}' ===")
        obj = mcq_bundle.get(which) if isinstance(mcq_bundle, dict) else None
        tf, clf = extract_tf_clf_from_obj(obj)
        if tf is None or clf is None:
            print(f"[WARN] could not extract tf/clf automatically for '{which}'. Attempting additional heuristics...")
            # try if mcq_bundle[which] is a Pipeline and pull first/last step
            val = mcq_bundle.get(which)
            try:
                if hasattr(val, "named_steps"):
                    steps = val.named_steps
                    print("named_steps keys:", list(steps.keys()))
                if hasattr(val, "steps"):
                    print("steps keys:", [s[0] for s in val.steps])
            except Exception:
                pass
            # attempt to see if bundle contains tf/clf separately at top-level (unlikely)
            tf_alt = getattr(mcq_bundle, "tf", None) or mcq_bundle.get("tf") if isinstance(mcq_bundle, dict) else None
            clf_alt = getattr(mcq_bundle, "clf", None) or mcq_bundle.get("clf") if isinstance(mcq_bundle, dict) else None
            if tf_alt is not None and clf_alt is not None:
                tf, clf = tf_alt, clf_alt

        if tf is None or clf is None:
            print(f"[ERROR] still no tf/clf for '{which}'. Repr of mcq_bundle['{which}'] shown above. Falling back to best-effort per-example contributions using any available classifier in bundle.")
            # try to find any classifier in the whole bundle dict
            found_tf, found_clf = None, None
            for k in (mcq_bundle.keys() if isinstance(mcq_bundle, dict) else []):
                t_tmp, c_tmp = extract_tf_clf_from_obj(mcq_bundle[k])
                if c_tmp is not None and found_clf is None:
                    found_tf, found_clf = t_tmp, c_tmp
            if found_clf is not None:
                print(f"Using classifier found under key (first match).")
                tf, clf = found_tf, found_clf
            else:
                print("No classifier found anywhere in bundle. Can't compute contributions.")
                tf, clf = None, None

        # Explain each wrong example with the extracted tf/clf (or fallback)
        for _, r in wrong_df.iterrows():
            print("\n-----------------------------")
            print("Q:", r.get("question"))
            text = r.get("text") or r.get("question")
            if tf is None or clf is None:
                print("No tf/clf available — skipping eli5, printing raw text snippet:")
                print(text[:400])
            else:
                explain_with_fallback(tf, clf, text, top=20)

Inspecting mcq_bundle contents:
BUNDLE KEYS: ['mimic', 'gold', 'error']

--- key: mimic ---
type: <class 'tuple'>
repr: (Pipeline(steps=[('tfidfvectorizer',
                 TfidfVectorizer(max_df=0.95, min_df=2, ngram_range=(1, 2))),
                ('logisticregression', LogisticRegression(max_iter=4000))]), TfidfVectorizer(max_df=0.95, min_df=2, ngram_range=(1, 2)), LogisticRegression(max_iter=4000))
no obvious attrs

--- key: gold ---
type: <class 'tuple'>
repr: (Pipeline(steps=[('tfidfvectorizer',
                 TfidfVectorizer(max_df=0.95, min_df=2, ngram_range=(1, 2))),
                ('logisticregression', LogisticRegression(max_iter=4000))]), TfidfVectorizer(max_df=0.95, min_df=2, ngram_range=(1, 2)), LogisticRegression(max_iter=4000))
no obvious attrs

--- key: error ---
type: <class 'tuple'>
repr: (Pipeline(steps=[('tfidfvectorizer',
                 TfidfVectorizer(max_df=0.95, min_df=2, ngram_range=(1, 2))),
                ('logisticregression', LogisticRegression(max_

Contribution?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Contribution?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Contribution?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Contribution?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+0.901,<BIAS>,,
+0.057,arch,,
+0.037,swelling,,
+0.030,medial,,
+0.028,knee,,
+0.024,when,,
+0.021,border,,
+0.021,marked,,
+0.020,he is,,
+0.019,64,,

Contribution?,Feature
+0.901,<BIAS>
+0.057,arch
+0.037,swelling
+0.030,medial
+0.028,knee
+0.024,when
+0.021,border
+0.021,marked
+0.020,he is
+0.019,64

Contribution?,Feature
+0.028,foot
+0.026,the
+0.016,examination shows
+0.016,to show
+0.014,examination
+0.013,is
+0.012,second
… 37 more positive …,… 37 more positive …
… 47 more negative …,… 47 more negative …
-0.012,weakness

Contribution?,Feature
+0.067,to
+0.060,of
+0.040,the
+0.028,his
+0.023,is
+0.023,space
+0.022,of the
+0.019,man with
+0.018,further
+0.017,weakness

Contribution?,Feature
+0.110,the
+0.041,evaluation of
+0.040,of
+0.030,in his
+0.028,evaluation
+0.021,in
+0.020,of the
+0.017,his
+0.017,the foot
+0.015,sole



-----------------------------
Q: Incidence of choriocarcinoma is seen more after:


Contribution?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Contribution?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Contribution?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Contribution?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+0.901,<BIAS>,,
+0.070,is seen,,
+0.051,spontaneous abortion,,
+0.029,cesarean section,,
+0.028,section,,
+0.026,choriocarcinoma,,
+0.024,cesarean,,
+0.023,ectopic,,
+0.014,ectopic pregnancy,,
+0.011,of choriocarcinoma,,

Contribution?,Feature
+0.901,<BIAS>
+0.070,is seen
+0.051,spontaneous abortion
+0.029,cesarean section
+0.028,section
+0.026,choriocarcinoma
+0.024,cesarean
+0.023,ectopic
+0.014,ectopic pregnancy
+0.011,of choriocarcinoma

Contribution?,Feature
+0.044,ectopic
+0.040,ectopic pregnancy
+0.040,normal
+0.021,spontaneous
+0.010,is
… 2 more positive …,… 2 more positive …
-0.006,abortion
-0.006,more
-0.010,cesarean section
-0.012,cesarean

Contribution?,Feature
+0.056,seen
+0.051,delivery
+0.047,normal
+0.045,incidence of
+0.045,incidence
+0.039,of choriocarcinoma
+0.030,choriocarcinoma
+0.023,of
+0.023,after
+0.018,is

Contribution?,Feature
+0.035,abortion
+0.026,seen
+0.015,of
+0.007,incidence of
+0.007,incidence
+0.002,cesarean section
… 2 more positive …,… 2 more positive …
-0.002,cesarean
-0.011,delivery
-0.013,section



-----------------------------
Q: If somebody develops resistance to INH, patient will develop simultaneously resistance to which drug?


Contribution?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Contribution?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Contribution?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Contribution?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+0.901,<BIAS>,,
+0.038,develops,,
+0.016,resistance,,
+0.016,resistance to,,
+0.015,streptomycin,,
+0.015,pyrazinamide,,
+0.015,ethambutol,,
+0.006,which drug,,
+0.005,patient,,
-0.013,to which,,

Contribution?,Feature
0.901,<BIAS>
0.038,develops
0.016,resistance
0.016,resistance to
0.015,streptomycin
0.015,pyrazinamide
0.015,ethambutol
0.006,which drug
0.005,patient
-0.013,to which

Contribution?,Feature
0.106,drug
0.047,will
0.017,which drug
0.016,which
0.012,ethambutol
0.012,pyrazinamide
0.012,streptomycin
0.006,to which
-0.001,if
-0.008,patient will

Contribution?,Feature
0.068,to
0.062,if
0.03,patient will
0.022,develop
0.009,will
0.005,patient
0.003,which
-0.005,develops
-0.01,which drug
-0.012,resistance to

Contribution?,Feature
0.066,resistance
0.021,develop
0.02,to which
0.009,resistance to
0.004,patient
-0.006,which
-0.008,to
-0.008,patient will
-0.01,develops
-0.013,which drug



-----------------------------
Q: A 24-year-old woman with no past medical history is post operative day 2 from a cesarean section that resulted in the birth of her first child. She begins to cry when she's told that today's lunch will be gluten-free. Although the patient feels "exhausted" and has had trouble sleeping, she deeply desires to return home and take care of her newborn. The patient denies any changes in concentration or suicidal thoughts now or during the pregnancy. What is the diagnosis and likely outcome?


Contribution?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Contribution?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Contribution?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Contribution?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+0.901,<BIAS>,,
+0.060,postpartum,,
+0.033,depression,,
+0.029,likely,,
+0.020,when,,
+0.020,that,,
+0.020,from,,
+0.020,risk,,
… 55 more positive …,… 55 more positive …,,
… 64 more negative …,… 64 more negative …,,

Contribution?,Feature
+0.901,<BIAS>
+0.060,postpartum
+0.033,depression
+0.029,likely
+0.020,when
+0.020,that
+0.020,from
+0.020,risk
… 55 more positive …,… 55 more positive …
… 64 more negative …,… 64 more negative …

Contribution?,Feature
+0.057,will
+0.046,lithium
+0.022,the
+0.018,diagnosis and
+0.017,diagnosis
+0.017,will be
+0.016,although
+0.015,that
… 48 more positive …,… 48 more positive …
… 71 more negative …,… 71 more negative …

Contribution?,Feature
+0.072,or
+0.059,she
+0.045,and
+0.034,the
+0.033,high
+0.028,to
+0.025,of
+0.019,is
+0.019,the pregnancy
+0.018,first

Contribution?,Feature
+0.091,the
+0.090,her
+0.086,she
+0.048,and
+0.045,psychosis
+0.029,the patient
+0.027,in
+0.023,with
+0.020,denies
+0.020,now



-----------------------------
Q: Iron and folic acid supplement during pregnancy:


Contribution?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Contribution?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Contribution?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Contribution?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+0.901,<BIAS>,,
+0.077,folic acid,,
+0.077,folic,,
+0.075,iron,,
+0.025,20,,
+0.022,500,,
+0.008,supplement,,
+0.007,100 mg,,
+0.003,pregnancy,,
+0.002,20 mg,,

Contribution?,Feature
0.901,<BIAS>
0.077,folic acid
0.077,folic
0.075,iron
0.025,20
0.022,500
0.008,supplement
0.007,100 mg
0.003,pregnancy
0.002,20 mg

Contribution?,Feature
0.072,folic acid
0.072,folic
0.023,100
0.01,acid
0.006,mcg
0.004,during pregnancy
0.003,500
0.002,during
0.001,pregnancy
-0.004,and

Contribution?,Feature
0.142,iron
0.052,mg
0.016,100 mg
0.012,and
0.004,20 mg
0.003,supplement
0.002,pregnancy
0.001,during pregnancy
-0.0,during
-0.003,20

Contribution?,Feature
0.124,acid
0.039,mg
0.032,100
0.013,and
0.005,during
-0.002,20 mg
-0.003,20
-0.004,mcg
-0.006,pregnancy
-0.006,supplement




=== Explanations for 'gold' ===

-----------------------------
Q: A 64-year-old man with osteoarthritis of the knee comes to the physician for evaluation of weakness in his foot. Physical examination shows a swelling in the popliteal fossa. There is marked weakness when attempting to invert his right foot. He is unable to curl his toes. Further evaluation of this patient is most likely to show decreased sensation over which of the following locations?


Contribution?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Contribution?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Contribution?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Contribution?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+0.175,<BIAS>,,
+0.038,to,,
+0.035,his,,
+0.034,evaluation,,
+0.030,lateral,,
+0.019,is most,,
+0.019,right,,
+0.018,is,,
+0.013,toes,,
+0.013,arch,,

Contribution?,Feature
+0.175,<BIAS>
+0.038,to
+0.035,his
+0.034,evaluation
+0.030,lateral
+0.019,is most
+0.019,right
+0.018,is
+0.013,toes
+0.013,arch

Contribution?,Feature
+0.117,<BIAS>
+0.041,second
+0.026,arch
+0.025,popliteal
+0.022,the
+0.020,medial
+0.020,weakness in
+0.016,64 year
+0.015,the popliteal
… 21 more positive …,… 21 more positive …

Contribution?,Feature
+0.070,of
+0.066,to
+0.046,the
+0.031,knee
+0.026,osteoarthritis of
+0.023,the knee
+0.020,of the
+0.020,following locations
+0.020,locations
+0.019,to the

Contribution?,Feature
+0.126,foot
+0.083,the foot
+0.040,in
+0.038,dorsal
+0.033,evaluation of
+0.030,sole
+0.022,weakness
+0.022,right foot
+0.021,evaluation
+0.017,physical



-----------------------------
Q: Incidence of choriocarcinoma is seen more after:


Contribution?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Contribution?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Contribution?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Contribution?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+0.175,<BIAS>,,
+0.072,ectopic,,
+0.068,more,,
+0.046,ectopic pregnancy,,
+0.031,is seen,,
+0.028,seen,,
+0.013,after,,
+0.013,is,,
… 1 more positive …,… 1 more positive …,,
… 2 more negative …,… 2 more negative …,,

Contribution?,Feature
+0.175,<BIAS>
+0.072,ectopic
+0.068,more
+0.046,ectopic pregnancy
+0.031,is seen
+0.028,seen
+0.013,after
+0.013,is
… 1 more positive …,… 1 more positive …
… 2 more negative …,… 2 more negative …

Contribution?,Feature
+0.117,<BIAS>
+0.079,incidence of
+0.079,incidence
+0.043,pregnancy spontaneous
+0.043,spontaneous abortion
+0.036,spontaneous
+0.032,cesarean section
+0.030,of choriocarcinoma
+0.029,cesarean
+0.024,delivery

Contribution?,Feature
+0.041,choriocarcinoma
+0.038,after
+0.036,of choriocarcinoma
+0.030,is seen
+0.025,of
+0.007,section
… 3 more positive …,… 3 more positive …
-0.008,delivery
-0.008,ectopic pregnancy
-0.010,normal

Contribution?,Feature
+0.041,abortion
+0.005,section
+0.002,normal
… 1 more positive …,… 1 more positive …
… 2 more negative …,… 2 more negative …
-0.004,seen
-0.005,pregnancy
-0.008,cesarean
-0.009,choriocarcinoma
-0.014,delivery



-----------------------------
Q: If somebody develops resistance to INH, patient will develop simultaneously resistance to which drug?


Contribution?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Contribution?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Contribution?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Contribution?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+0.175,<BIAS>,,
+0.044,drug,,
+0.037,to,,
+0.026,streptomycin,,
+0.020,ethambutol,,
+0.020,pyrazinamide,,
+0.009,if,,
+0.002,develop,,
+0.000,patient,,
-0.002,which,,

Contribution?,Feature
0.175,<BIAS>
0.044,drug
0.037,to
0.026,streptomycin
0.02,ethambutol
0.02,pyrazinamide
0.009,if
0.002,develop
0.0,patient
-0.002,which

Contribution?,Feature
0.133,resistance to
0.117,<BIAS>
0.078,drug
0.075,will
0.049,resistance
0.037,simultaneously
0.03,patient will
0.028,to which
0.024,which drug
0.02,streptomycin

Contribution?,Feature
0.064,to
0.03,to which
0.007,resistance
0.006,develop
0.001,which drug
-0.004,which
-0.007,patient will
-0.01,patient
-0.015,simultaneously
-0.024,streptomycin

Contribution?,Feature
0.076,develops
0.03,patient
0.023,if
0.012,will
0.006,which
-0.008,develop
-0.008,pyrazinamide
-0.008,ethambutol
-0.013,resistance
-0.014,simultaneously



-----------------------------
Q: A 24-year-old woman with no past medical history is post operative day 2 from a cesarean section that resulted in the birth of her first child. She begins to cry when she's told that today's lunch will be gluten-free. Although the patient feels "exhausted" and has had trouble sleeping, she deeply desires to return home and take care of her newborn. The patient denies any changes in concentration or suicidal thoughts now or during the pregnancy. What is the diagnosis and likely outcome?


Contribution?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Contribution?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Contribution?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Contribution?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+0.175,<BIAS>,,
+0.107,postpartum,,
+0.047,lithium,,
+0.026,at,,
+0.024,more,,
+0.022,24,,
+0.021,risk of,,
+0.021,at high,,
+0.021,high risk,,
+0.019,her,,

Contribution?,Feature
+0.175,<BIAS>
+0.107,postpartum
+0.047,lithium
+0.026,at
+0.024,more
+0.022,24
+0.021,risk of
+0.021,at high
+0.021,high risk
+0.019,her

Contribution?,Feature
+0.117,<BIAS>
+0.085,will
+0.025,and
+0.020,feels
+0.016,the
+0.014,treatment with
… 48 more positive …,… 48 more positive …
… 90 more negative …,… 90 more negative …
-0.014,is
-0.014,no

Contribution?,Feature
+0.054,she
+0.049,or
+0.034,the
+0.026,of
+0.026,high
+0.024,to
+0.016,the pregnancy
+0.015,home
+0.014,that
… 58 more positive …,… 58 more positive …

Contribution?,Feature
+0.045,patient
+0.044,in
+0.034,her
+0.031,symptoms
+0.026,the patient
+0.022,post
+0.020,at
+0.016,major
+0.015,symptoms are
+0.015,symptoms will



-----------------------------
Q: Iron and folic acid supplement during pregnancy:


Contribution?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0
Contribution?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1
Contribution?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2
Contribution?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3
+0.175,<BIAS>,,
+0.056,iron,,
+0.036,folic,,
+0.036,folic acid,,
+0.031,mg,,
+0.015,during pregnancy,,
+0.012,during,,
+0.001,pregnancy,,
-0.001,and,,
-0.004,20 mg,,

Contribution?,Feature
0.175,<BIAS>
0.056,iron
0.036,folic
0.036,folic acid
0.031,mg
0.015,during pregnancy
0.012,during
0.001,pregnancy
-0.001,and
-0.004,20 mg

Contribution?,Feature
0.132,100
0.117,<BIAS>
0.096,folic
0.096,folic acid
0.044,500
0.027,mcg
0.023,100 mg
0.018,20 mg
0.007,pregnancy
0.007,and

Contribution?,Feature
0.082,mg
0.024,iron
0.006,20
0.0,and
-0.003,acid
-0.004,supplement
-0.007,pregnancy
-0.008,20 mg
-0.008,during pregnancy
-0.01,mcg

Contribution?,Feature
0.189,acid
0.014,100
0.012,100 mg
0.005,20
0.004,supplement
-0.002,during
-0.002,pregnancy
-0.006,and
-0.006,mcg
-0.007,20 mg




=== Explanations for 'error' ===

-----------------------------
Q: A 64-year-old man with osteoarthritis of the knee comes to the physician for evaluation of weakness in his foot. Physical examination shows a swelling in the popliteal fossa. There is marked weakness when attempting to invert his right foot. He is unable to curl his toes. Further evaluation of this patient is most likely to show decreased sensation over which of the following locations?


Contribution?,Feature
+0.148,foot
+0.058,the foot
+0.050,lateral
+0.048,evaluation
+0.047,evaluation of
+0.042,of
+0.037,weakness
+0.033,decreased
+0.032,he
+0.031,<BIAS>



-----------------------------
Q: Incidence of choriocarcinoma is seen more after:


Contribution?,Feature
0.089,more
0.063,normal
0.057,is seen
0.052,abortion
0.039,incidence
0.038,delivery
0.029,pregnancy
0.026,incidence of
0.02,seen
0.018,choriocarcinoma



-----------------------------
Q: If somebody develops resistance to INH, patient will develop simultaneously resistance to which drug?


Contribution?,Feature
0.053,develop
0.045,to
0.04,drug
0.039,resistance to
0.024,to which
0.021,develops
0.021,if
0.008,patient will
0.006,pyrazinamide
0.006,ethambutol



-----------------------------
Q: A 24-year-old woman with no past medical history is post operative day 2 from a cesarean section that resulted in the birth of her first child. She begins to cry when she's told that today's lunch will be gluten-free. Although the patient feels "exhausted" and has had trouble sleeping, she deeply desires to return home and take care of her newborn. The patient denies any changes in concentration or suicidal thoughts now or during the pregnancy. What is the diagnosis and likely outcome?


Contribution?,Feature
+0.077,will
+0.033,in
+0.031,will be
+0.031,<BIAS>
+0.028,patient
+0.020,treatment
+0.020,symptoms
+0.019,the patient
+0.019,resolve
+0.019,newborn



-----------------------------
Q: Iron and folic acid supplement during pregnancy:


Contribution?,Feature
0.114,folic
0.114,folic acid
0.031,<BIAS>
0.027,500
0.013,supplement
0.013,20
0.008,mcg
0.007,20 mg
0.004,and
0.003,100 mg
