In [None]:
!pip install -U transformers



In [None]:
import os
from google.colab import userdata

# Load the HF_TOKEN from Colab secrets if it's needed for this model
# If you've already set it globally, this might not be strictly necessary here, but it's good practice.
# Ensure your 'HF_TOKEN' secret is set in Colab.
if "HF_TOKEN" not in os.environ:
    os.environ["HF_TOKEN"] = userdata.get("HF_TOKEN")

# Load model directly
from transformers import AutoTokenizer, AutoModelForTokenClassification

tokenizer = AutoTokenizer.from_pretrained("blaze999/Medical-NER")
model = AutoModelForTokenClassification.from_pretrained("blaze999/Medical-NER")

tokenizer_config.json: 0.00B [00:00, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]



model.safetensors:   0%|          | 0.00/736M [00:00<?, ?B/s]

In [None]:
import pandas as pd
import ast
import re
from typing import List, Tuple, Dict, Any, Optional

In [None]:
df = pd.read_csv("/content/output.csv")

clean = (
    df.iloc[33333: 33734, 0]
      .astype(str)
      .str.replace(r'^FINDINGS:\s*', '', regex=True)
      .str.replace(r"\s+", " ", regex=True)
      .str.strip()
      .reset_index(drop=True)
)

concepts = df.iloc[33333: 33734, 1].reset_index(drop=True)
print(concepts[0:3])
print(clean[0:3])

0    ['single portable view', 'superior traction of...
1    ['mildly enlarged heart with left ventricular ...
2    ['PA and lateral views', 'moderately enlarged ...
Name: concepts, dtype: object
0    Single portable view of the chest. There is su...
1    The heart is mildly enlarged with a left ventr...
2    PA and lateral views of the chest are obtained...
Name: mimic_findings, dtype: object


In [None]:
from numpy._core.defchararray import endswith

rows = []
for i in range(len(clean)):
  text = clean[i]

  # Tokenize the input text
  inputs = tokenizer(text, return_tensors="pt")

  # Get model predictions
  outputs = model(**inputs)

  # Process the output to get the predicted labels
  import torch
  predictions = torch.argmax(outputs.logits, dim=-1)

  # Decode the predictions to human-readable tags
  # Assuming the model's config has id2label mapping
  id2label = model.config.id2label

  tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[0])
  predicted_labels = [id2label[p.item()] for p in predictions[0]]

  results = []
  for token, label in zip(tokens, predicted_labels):
    if token.startswith("##"):
        results[-1][0] += token.replace("##", "")
    else:
        results.append([token, label])

  # Filter out special tokens like [CLS] and [SEP]
  final_results = []
  for token, label in results:
    if token not in ['[CLS]', '[SEP]', '[PAD]']:
        final_results.append([token, label])
  rows.append({
        "report": text,
        "ner_output": repr(final_results),        # store as string; my parser ast.literal_eval can read it
        "concepts": concepts[i]
    })

df_optA = pd.DataFrame(rows)
df_optA.to_csv("option_a.csv", index=False)



In [None]:


# --------- Negation + uncertainty cues (generic, not medical) ----------
NEG_CUES = {"no", "without", "absent", "denies", "deny", "none"}   # IMPORTANT: removed "not"
CONTRAST = {"but", "however", "though", "although"}
UNCERTAIN_CUES = {"likely", "probably", "possibly"}

SENT_END = {".", "!", "?"}
HARD_BREAK = {".", ";"}

STOPWORDS = {
    "the","a","an","is","are","was","were","be","been","being","of","to","in","on","for","with",
    "and","or","as","by","at","from","that","this","these","those","it","its","there","appears",
    "appear","seen","noted","present","provided","demonstrate","demonstrates","detected","given",
    "which","who","whom","whose","into","over","under","below","above","within","without"
}

NON_GLUE_WORDS = {
    "left","right","upper","lower","middle","bilateral","bilaterally",
    "small","large","moderate","mild","severe","acute","chronic","old","new",
    "free","air","below","above","midline","frontal","lateral","views","view",
    "worse","better","stable","unchanged","likely","probably","possibly"
}

PUNCT = {".", ",", ";", ":", "!", "?", "(", ")", "[", "]", "{", "}", "/", "\\"}

# ----------------------------
# Parsing NER output
# ----------------------------
def _as_obj(x):
    if isinstance(x, list):
        return x
    if isinstance(x, str):
        return ast.literal_eval(x)
    raise TypeError(f"Unsupported ner_output type: {type(x)}")

def _pair_from_item(item) -> Tuple[str, str]:
    # Your new format: [token, label]
    if isinstance(item, (list, tuple)) and len(item) == 2 and isinstance(item[0], str) and isinstance(item[1], str):
        return item[0], item[1]

    # legacy formats (keep)
    if isinstance(item, dict):
        k, v = next(iter(item.items()))
        if isinstance(k, str) and (k == "O" or re.match(r"^(B|I)-", k)):
            return v, k
        return k, v

    if isinstance(item, (set, tuple, list)) and len(item) == 2:
        a, b = list(item)
        if isinstance(a, str) and (a == "O" or re.match(r"^(B|I)-", a)):
            return b, a
        if isinstance(b, str) and (b == "O" or re.match(r"^(B|I)-", b)):
            return a, b
        return a, b

    raise ValueError(f"Unsupported item format: {item}")

def parse_ner_output(ner_output) -> List[Tuple[str, str]]:
    obj = _as_obj(ner_output)
    return [_pair_from_item(it) for it in obj]

# ----------------------------
# Token rebuild (NO word splitting, NO punctuation glue)
# Works for WordPiece (##) and SentencePiece (▁)
# ----------------------------
def rebuild_tokens(tokens_tags: List[Tuple[str, str]]) -> Tuple[List[str], List[str]]:
    toks: List[str] = []
    tags: List[str] = []

    for tok, tag in tokens_tags:
        tok = "" if tok is None else tok

        # If punctuation token, keep separate always
        if tok in PUNCT:
            toks.append(tok)
            tags.append(tag)
            continue

        # WordPiece continuation
        if tok.startswith("##"):
            piece = tok[2:]
            if toks and toks[-1] not in PUNCT:
                toks[-1] += piece
            else:
                toks.append(piece)
                tags.append(tag)
            continue

        # SentencePiece: ▁ indicates new word boundary
        if tok.startswith("▁"):
            tok = tok[1:]

        # Normal new token (do NOT append to previous)
        toks.append(tok)
        tags.append(tag)

    # normalize empties
    toks = [t for t in toks if t != ""]
    tags = tags[:len(toks)]
    return toks, tags

# ----------------------------
# BIO span extraction
# ----------------------------
VOWELS = set("aeiou")

def _vowel_ratio(w: str) -> float:
    w = w.lower()
    letters = [c for c in w if c.isalpha()]
    if not letters:
        return 0.0
    return sum(c in VOWELS for c in letters) / len(letters)

def _looks_like_word(w: str) -> bool:
    # normal word: alphabetic, decent length, has vowels
    wl = w.lower()
    return _is_alphaish(w) and len(wl) >= 5 and _vowel_ratio(wl) >= 0.30

def _looks_like_fragment(w: str) -> bool:
    wl = w.lower()
    if not _is_alphaish(wl):
        return False
    if len(wl) <= 4:
        return True
    # fragments often have low vowel ratio (e.g., "stin", "meg", "cty"-like)
    if _vowel_ratio(wl) < 0.30:
        return True
    # allow medium fragments (like "otomy", "stinal", "asis") but not full words like "valve"
    if 5 <= len(wl) <= 7 and _vowel_ratio(wl) < 0.45:
        return True
    return False

def _should_glue(prev: str, cur: str) -> bool:
    if not prev or not cur:
        return False

    pl = prev.lower()
    cl = cur.lower()

    # never glue stopwords/modifiers/directions
    if pl in STOPWORDS or cl in STOPWORDS:
        return False
    if pl in NON_GLUE_WORDS or cl in NON_GLUE_WORDS:
        return False

    # keep hyphen logic
    if prev in {"-", "–"} or cur in {"-", "–"}:
        return True

    if not (_is_alphaish(prev) and _is_alphaish(cur)):
        return False

    # if BOTH look like real words, don't glue (prevents aortic+valve, soft+tissue, heart+size)
    if _looks_like_word(prev) and _looks_like_word(cur):
        return False

    # glue when the right side looks like a fragment (most common)
    if _looks_like_fragment(cur):
        return True

    # also glue when the left side looks like a fragment (rare)
    if _looks_like_fragment(prev):
        return True

    return False


def _join_span_tokens(span_tokens):
    out: List[str] = []
    for t in span_tokens:
        if not out:
            out.append(t)
        else:
            if _should_glue(out[-1], t):
                out[-1] = out[-1] + t
            else:
                out.append(t)
    return " ".join(out)

def extract_spans(toks, tags):
    spans = []
    i = 0
    while i < len(tags):
        tag = tags[i]
        if tag == "O" or not isinstance(tag, str):
            i += 1
            continue

        m = re.match(r"^(B|I)-(.+)$", tag)
        if not m:
            i += 1
            continue

        bio, etype = m.group(1), m.group(2)
        if bio == "I":
            bio = "B"

        start = i
        j = i + 1
        while j < len(tags):
            m2 = re.match(r"^I-(.+)$", tags[j] if isinstance(tags[j], str) else "")
            if m2 and m2.group(1) == etype:
                j += 1
            else:
                break

        # ✅ NEW: join tokens with smart glue (instead of always " ".join)
        span_tokens = [toks[k] for k in range(start, j) if toks[k].strip() != "" and toks[k] not in PUNCT]
        text = _join_span_tokens(span_tokens)
        text = re.sub(r"\s+", " ", text).strip()

        spans.append({"type": etype, "text": text, "start": start, "end": j})
        i = j

    return spans


# ----------------------------
# Sentence segmentation
# ----------------------------
def split_into_segments(toks: List[str]) -> List[Tuple[int, int]]:
    segs = []
    s = 0
    for i, t in enumerate(toks):
        if t in SENT_END:
            segs.append((s, i + 1))
            s = i + 1
    if s < len(toks):
        segs.append((s, len(toks)))
    return segs

# ----------------------------
# Negation scope per sentence
# ----------------------------
def compute_negated_span_ids(toks, spans, seg_start, seg_end) -> set:
    cue_positions = []
    for i in range(seg_start, seg_end):
        w = toks[i].lower()
        if w in NEG_CUES:
            cue_positions.append(i)

    negated = set()
    for cue in cue_positions:
        scope_end = seg_end
        for j in range(cue + 1, seg_end):
            w = toks[j].lower()
            if toks[j] in HARD_BREAK or w in CONTRAST:
                scope_end = j
                break

        for si, sp in enumerate(spans):
            if cue < sp["start"] < scope_end:
                negated.add(si)

    return negated

# ----------------------------
# Normalization helpers (generic)
# ----------------------------
def norm_phrase(s: str) -> str:
    s = s.strip()
    s = re.sub(r"\s+", " ", s)
    s = s.strip(" ,;:.")
    s = s.lower()

    s = re.sub(r"\bwithin normal limits\b", "normal", s)
    s = re.sub(r"\bunremarkable\b", "normal", s)
    s = re.sub(r"\btop[- ]normal\b", "normal", s)
    s = re.sub(r"\bno evidence of\b", "no", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

def dedup_preserve_order(items: List[str]) -> List[str]:
    seen = set()
    out = []
    for x in items:
        x = norm_phrase(x)
        if x and x not in seen:
            seen.add(x)
            out.append(x)
    return out

def token_window(toks, a, b):
    return [toks[i] for i in range(a, b) if 0 <= i < len(toks)]

def has_uncertainty(toks, a, b) -> bool:
    ws = [w.lower() for w in token_window(toks, a, b)]
    return any(w in UNCERTAIN_CUES for w in ws)

# ----------------------------
# Concept building (no medical vocabulary)
# ----------------------------
def spans_in_range(spans, a, b):
    return [sp for sp in spans if a <= sp["start"] < b]

def collect_left_mods(seg_spans, head_span, mod_types, max_gap=6):
    mods = [sp for sp in seg_spans
            if sp["type"] in mod_types and sp["end"] <= head_span["start"]
            and (head_span["start"] - sp["end"]) <= max_gap]
    mods.sort(key=lambda x: x["start"])
    return [m["text"] for m in mods if m["text"]]

def build_concepts_from_segment(toks, spans, seg, neg_ids):
    seg_start, seg_end = seg
    seg_spans = spans_in_range(spans, seg_start, seg_end)

    VALUE_TYPES  = {"LAB_VALUE"}
    MOD_TYPES    = {"DETAILED_DESCRIPTION", "SEVERITY"}
    HEAD_TYPES   = {"SIGN_SYMPTOM", "DISEASE_DISORDER"}
    STRUCT_TYPES = {"BIOLOGICAL_STRUCTURE"}
    PROC_TYPES   = {"DIAGNOSTIC_PROCEDURE", "THERAPEUTIC_PROCEDURE"}

    concepts = []

    # (A) Merge adjacent procedure spans with connectors (e.g., “X and Y …”)
    proc = [sp for sp in seg_spans if sp["type"] in PROC_TYPES and sp["text"]]
    proc.sort(key=lambda x: x["start"])
    i = 0
    while i < len(proc):
        cur = proc[i]
        parts = [cur["text"]]
        end = cur["end"]
        j = i + 1
        while j < len(proc):
            # allow small gap with connector tokens like "and", "of", "the"
            gap_tokens = [t.lower() for t in token_window(toks, end, proc[j]["start"])]
            gap_tokens = [t for t in gap_tokens if t not in PUNCT and t != ""]
            if len(gap_tokens) <= 3 and all(t in {"and","of","the"} for t in gap_tokens):
                parts.append(proc[j]["text"])
                end = proc[j]["end"]
                j += 1
            else:
                break
        concepts.append(" ".join(parts))
        i = j

    # (B) Value + Structure: only if value is “normal-like” or “clear-like” (generic)
    # We do it without hardcoding medical terms: we just constrain value to common adjectives.
    NORMAL_LIKE = {"normal", "clear", "stable", "intact"}
    values = [sp for sp in seg_spans if sp["type"] in VALUE_TYPES and norm_phrase(sp["text"]) in NORMAL_LIKE]
    structs = [sp for sp in seg_spans if sp["type"] in STRUCT_TYPES and sp["text"]]

    for v in values:
        vtxt = norm_phrase(v["text"])
        for st in structs:
            # same clause-ish: close and not across sentence end
            if abs(st["start"] - v["start"]) <= 10:
                # optional suffix word if the next token is NOT stopword/punct
                suffix = ""
                after = st["end"]
                if after < seg_end:
                    w = toks[after].strip().lower()
                    if w and w not in PUNCT and w not in STOPWORDS:
                        suffix = " " + toks[after].strip()
                concepts.append(f"{vtxt} {st['text']}{suffix}")

    # (C) Head spans with left modifiers; attach a nearby structure if present
    heads = [(si, sp) for si, sp in enumerate(spans) if sp in seg_spans and sp["type"] in HEAD_TYPES and sp["text"]]
    for si, h in heads:
        mods = collect_left_mods(seg_spans, h, MOD_TYPES, max_gap=6)
        phrase = " ".join([*mods, h["text"]]).strip()

        # If there is a nearby structure, attach it (helps “calcified <structure>”, etc.)
        # Only do this when head is short (1–2 tokens) to avoid huge phrases.
        if len(phrase.split()) <= 2:
            # Prefer a structure immediately BEFORE the head (fixes: "pleural abnormality")
            left_struct = None
            for st in structs:
                if 0 <= (h["start"] - st["end"]) <= 3:
                    left_struct = st
                    break
            if left_struct is not None:
                phrase = f"{left_struct['text']} {phrase}"
            else:
                # Otherwise attach structure immediately AFTER the head
                right_struct = None
                for st in structs:
                    if 0 <= (st["start"] - h["end"]) <= 3:
                        right_struct = st
                        break
                if right_struct is not None:
                    phrase = f"{phrase} {right_struct['text']}"


        # uncertainty cue in local window around head
        if has_uncertainty(toks, max(seg_start, h["start"] - 4), min(seg_end, h["end"] + 4)):
            phrase = "likely " + phrase

        phrase = norm_phrase(phrase)

        if si in neg_ids and not phrase.startswith("no "):
            phrase = "no " + phrase

        concepts.append(phrase)

    # (D) Generic “status post …” ⇒ “status post surgery” if any procedure span exists
    seg_text = " ".join([t.lower() for t in token_window(toks, seg_start, seg_end)])
    if "status post" in seg_text and any(sp["type"] == "THERAPEUTIC_PROCEDURE" for sp in seg_spans):
        concepts.append("status post surgery")

    # (E) “difficult to assess <structure> due to <head>” pattern (generic)
    if "difficult to assess" in seg_text:
        # pick first structure in this segment if exists
        if structs:
            target = structs[0]["text"]
            # pick nearest head span after “due to” if present, else nearest head
            due_idx = None
            for idx in range(seg_start, seg_end - 1):
                if toks[idx].lower() == "due" and toks[idx+1].lower() == "to":
                    due_idx = idx
                    break
            due_head = None
            if due_idx is not None:
                for _, h in heads:
                    if h["start"] > due_idx:
                        due_head = h["text"]
                        break
            if due_head:
                concepts.append(f"difficult to assess {target} due to {due_head}")
            else:
                concepts.append(f"difficult to assess {target}")

    return dedup_preserve_order(concepts)

def postprocess(ner_output) -> List[str]:
    tokens_tags = parse_ner_output(ner_output)
    toks, tags = rebuild_tokens(tokens_tags)
    spans = extract_spans(toks, tags)
    segments = split_into_segments(toks)

    all_concepts = []
    for seg in segments:
        neg_ids = compute_negated_span_ids(toks, spans, seg[0], seg[1])
        all_concepts.extend(build_concepts_from_segment(toks, spans, seg, neg_ids))

    return dedup_preserve_order(all_concepts)

# CSV runner
import pandas as pd
def run_on_csv(path: str, ner_col: str = "ner_output") -> pd.DataFrame:
    df = pd.read_csv(path)
    preds = []
    for x in df[ner_col].astype(str).tolist():
        ner_obj = ast.literal_eval(x)
        preds.append(postprocess(ner_obj))
    out = df.copy()
    out["pred_concepts"] = preds
    return out


In [None]:


# Load Option-A CSV
df = pd.read_csv("/content/option_a.csv")   # change path if needed

# Parse ner_output column (stored as string)
df["ner_output_parsed"] = df["ner_output"].apply(ast.literal_eval)

# Run post-processing
df["pred_concepts"] = df["ner_output_parsed"].apply(postprocess)

# Optional: parse ground-truth concepts if you want to print them
def parse_gt(x):
    if isinstance(x, list):
        return x
    if pd.isna(x):
        return []
    try:
        return ast.literal_eval(x)
    except Exception:
        return [str(x)]

df["gt_concepts"] = df["concepts"].apply(parse_gt)

# Print a few examples
for i in range(10):
    print("REPORT:")
    print(df.loc[i, "report"])
    print("\nPREDICTED CONCEPTS:")
    print(df.loc[i, "pred_concepts"])
    print("\nGROUND TRUTH:")
    print(df.loc[i, "gt_concepts"])
    print("=" * 80)


REPORT:
Single portable view of the chest. There is superior traction of the left hilum. Subtle opacity projects over the left scapula in the region of the overlying cardiac lead. Findings are suggestive of underlying scarring. Elsewhere the lungs are clear. Cardiac silhouette is top-normal in size. For technique. No acute osseous abnormality seen, hypertrophic changes seen spine.

PREDICTED CONCEPTS:
['superior traction left hilum', 'subtle opacity left scapula', 'scarring', 'lungs clear', 'silhouette', 'no acute osseous abnormality seen', 'no acute changes seen']

GROUND TRUTH:
['single portable view', 'superior traction of left hilum', 'opacity over left scapula', 'suggestive of underlying scarring', 'clear lungs', 'normal cardiac silhouette', 'no acute osseous abnormality']
REPORT:
The heart is mildly enlarged with a left ventricular configuration. The mediastinal and hilar contours appear unchanged. The lungs appear clear. Blunting of the right posterior costophrenic sulcus may re

In [None]:
def similarity1(a: str, b: str) -> float:
    a_tokens = set(a.lower().split())
    b_tokens = set(b.lower().split())

    if not a_tokens and not b_tokens:
        return 1.0
    if not a_tokens or not b_tokens:
        return 0.0

    intersection = a_tokens & b_tokens
    union = a_tokens | b_tokens

    return len(intersection) / len(union)


In [None]:
def similarity2(a: str, b: str) -> float:
    a_tokens = set(a.lower().split())
    b_tokens = set(b.lower().split())

    if not a_tokens and not b_tokens:
        return 1.0
    if not a_tokens or not b_tokens:
        return 0.0

    intersection = a_tokens & b_tokens
    union = a_tokens | b_tokens

    return (2*len(intersection)) / (len(a_tokens)+len(b_tokens))


In [None]:
def fuzzy_counts_one(gt_concepts, pred_concepts, threshold):
    gt = [g.strip().lower() for g in gt_concepts if isinstance(g, str) and g.strip()]
    pr = [p.strip().lower() for p in pred_concepts if isinstance(p, str) and p.strip()]

    used_gt = set()
    TP = 0

    for p in pr:
        best_j = None
        best_score = 0.0

        for j, g in enumerate(gt):
            if j in used_gt:
                continue

            score = similarity2(p, g)
            if score > best_score:
                best_score = score
                best_j = j

        if best_j is not None and best_score >= threshold:
            TP += 1
            used_gt.add(best_j)

    FP = len(pr) - TP
    FN = len(gt) - TP

    return TP, FP, FN


In [None]:
def fuzzy_prf(gt_norm, pred_norm, threshold):
    TP = FP = FN = 0

    for gt, pr in zip(gt_norm, pred_norm):
        t, f, n = fuzzy_counts_one(gt, pr, threshold)
        TP += t
        FP += f
        FN += n

    precision = TP / (TP + FP) if TP + FP else 0
    recall    = TP / (TP + FN) if TP + FN else 0
    f1        = (2 * precision * recall / (precision + recall)) if precision + recall else 0

    return precision, recall, f1, (TP, FP, FN)


In [None]:

for th in [0.7,0.75, 0.8, 0.85,0.9]:
    P, R, F1, counts = fuzzy_prf(df["gt_concepts"].tolist(),df["pred_concepts"].tolist(), threshold=th)
    print(th, P, R, F1)

0.7 0.4228429203539823 0.5366795366795367 0.47300850734725447
0.75 0.41841814159292035 0.5310635310635311 0.4680587780355762
0.8 0.382466814159292 0.4854334854334854 0.4278422273781903
0.85 0.3235619469026549 0.41067041067041066 0.3619489559164733
0.9 0.2696349557522124 0.34222534222534223 0.3016241299303944
