# Installing Necessary Packages

In [19]:
!pip -q install zai-sdk
!pip -q install openai
!pip -q install google-generativeai

# Import Packages

In [20]:
import os
import sys
import re
import time
import json
import random
import logging
import warnings
import requests

import numpy as np
import pandas as pd

from google import genai
from google.genai.types import GenerateContentConfig

from abc import ABC, abstractmethod

from zai import ZaiClient
from openai import OpenAI
from tqdm import tqdm
from kaggle_secrets import UserSecretsClient

pd.set_option('display.max_colwidth', None)
warnings.filterwarnings('ignore')

# Logging

In [21]:
logger = logging.getLogger()      # root logger
logger.setLevel(logging.WARNING)

if not logger.handlers:
    handler = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
    handler.setFormatter(formatter)
    logger.addHandler(handler)

# System Prompt

In [22]:
PROMPT_TEMPLATE = """
You are a clinical NLP annotator extracting entities from CHEST CT reports.

OUTPUT ONLY valid JSON. No markdown, no commentary.

SCHEMA:
[
  {
    "general_finding": "string or None",
    "specific_finding": "string or None",
    "finding_presence": "present OR absent OR uncertain OR None",
    "location": ["anatomical sites"],
    "degree": ["qualifiers: mild, moderate, severe, small, large, etc"],
    "measurement": "exact value with unit OR None",
    "comparison": "stable OR improved OR worsened OR None"
  }
]

CORE EXTRACTION RULES:

1. MERGE RELATED INFO
   - Device + position: "catheter in right IJ with tip in right atrium" → ONE entity, location: ["right internal jugular", "right atrium"]
   - Coreferences ("this", "that", "these", "it"): merge into previous finding
   - Multi-sentence descriptions: combine into ONE entity

2. HANDLE UNCERTAINTY
   "versus" OR "and/or" → finding_presence: "uncertain"
   Examples:
   • "consolidation versus atelectasis" → ONE entity, uncertain
   • "atelectasis and/or scarring" → specific_finding: "atelectasis versus scarring", uncertain
   Keep exact wording in specific_finding.

3. GROUPED NEGATIVES
   "No consolidation, effusion or edema" → ONE entity:
   • general_finding: "parenchymal abnormality"
   • specific_finding: "consolidation, effusion, or edema"
   • finding_presence: "absent"

4. NO DUPLICATES
   If FINDINGS and IMPRESSION say the same thing → extract ONCE
   Add IMPRESSION's clinical interpretation to degree if it adds context.

5. EXTRACT ALL FINDINGS
   Include:
   • Normal findings: "heart is normal in size" → extract as present, degree: ["normal"]
   • Negations: "without pericardial effusion" → extract as absent
   • Unremarkable: "thyroid unremarkable" → extract as present, degree: ["unremarkable"]
   Be consistent - don't skip findings.

6. NAMING CONVENTIONS
   Use specific organ names for general_finding:
   • Good: "thyroid gland", "heart", "lung parenchyma", "aorta"
   • Avoid: "thyroid abnormality", "cardiac abnormality", "vascular abnormality"
   • Exception: use "parenchymal abnormality" ONLY for grouped negatives

7. MEASUREMENTS & LOCATIONS
   • Copy measurements EXACTLY: "2.5 cm above carina", "8 mm"
   • Use precise anatomical terms from text
   • Include laterality: "right lower lobe" not "lower lobe"
   • Devices: include insertion site AND final position

8. COMPARISON
   ONLY set if comparing to prior imaging ("stable", "increased", "new vs prior")
   Otherwise → "None"

9. LYMPH NODES
   • "no lymphadenopathy" → absent
   • "subcentimeter nodes" → DO NOT extract lymphadenopathy
   • ONLY extract if "enlarged", "prominent", or "pathologic"

PRESENCE KEYWORDS:
- Present: "seen", "demonstrates", "present", "identified", "noted", "appreciated"
- Absent: "no", "without", "absent", "negative for", "no evidence of"
- Uncertain: "versus", "possible", "suspicious for", "cannot exclude", "and/or"

QUICK CHECK:
✓ Valid JSON (no markdown)?
✓ All 7 keys in every object?
✓ Handled "versus" and "and/or" as uncertain?
✓ No duplicates between FINDINGS and IMPRESSION?
✓ Merged devices, coreferences, grouped negatives?
✓ Extracted ALL findings including normal/negated ones?
✓ Used specific organ names (not "abnormality")?

EXTRACT FROM THIS REPORT:

<<<REPORT_TEXT>>>

Output ONLY the JSON array.
"""

# API Keys

In [23]:
user_secrets = UserSecretsClient()

API_KEYS = {
    "gemini": user_secrets.get_secret("gemini_api_key_0"),
    "gemma": user_secrets.get_secret("gemini_api_key_0"),
    #"glm": user_secrets.get_secret("glm_api_key"),
    #"deepseek": user_secrets.get_secret("deepseek_api_key"),
}

# LLM Classes

In [24]:
class AIBaseModel(ABC):
    def __init__(self, api_key: str, model_name: str):
        self.api_key = api_key
        self.model_name = model_name
    
    @abstractmethod
    def invoke(self, prompt: str, **kwargs):
        raise NotImplementedError

In [25]:
class GeminiModel(AIBaseModel):
    def __init__(self, api_key: str, model_name: str = "gemini-2.5-flash"):
        self.model_name = model_name
        self.client = genai.Client(api_key=api_key)
        self.sleep_time = self._get_time_to_sleep()
        
    def _get_time_to_sleep(self):
        requests_per_minute = 15  # default
        
        if self.model_name == "gemini-2.5-flash":
            requests_per_minute = 5
        elif self.model_name == "gemini-3-flash-preview":
            requests_per_minute = 5
        elif self.model_name == "gemini-2.5-flash-lite":
            requests_per_minute = 10
        elif self.model_name == "gemini-1.5-flash":
            requests_per_minute = 15
        elif "gemma" in self.model_name:
            requests_per_minute = 30
            
        return 60 / requests_per_minute
    
    def invoke(
        self, 
        prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0, 
        top_p: float = 1, 
        max_tokens: int = 8192,
    ):
        try:
            response = self.client.models.generate_content(
                model=self.model_name,
                contents=prompt,
                config=GenerateContentConfig(
                    system_instruction=None,
                    temperature=temperature,
                    top_p=top_p,
                    max_output_tokens=max_tokens,
                ),
            )

            if hasattr(response, "candidates"):
                texts = []
                for c in response.candidates:
                    for p in getattr(c.content, "parts", []):
                        if getattr(p, "text", None):
                            texts.append(p.text)
                return "\n".join(texts) if texts else None
            return None
        except Exception as e:
            logger.error(f"Gemini API error: {e}")
        return None

In [26]:
class GLMModel(AIBaseModel):
    def __init__(self, api_key: str, model_name: str = "glm-4.5-flash"):
        super().__init__(api_key, model_name)
        self.client = ZaiClient(api_key=api_key)
        self.sleep_time = 6
        
    def invoke(
        self,
        prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.0,
        top_p: float = 1.0,
        max_tokens: int = 8192,
    ):
        try:
            messages = [
                {
                    "role": "system", 
                    "content": "You are a medical NLP system specialized in medical entity extraction from a given radiology report."
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ]

            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=messages,
                temperature=temperature,
                top_p=top_p,
                max_tokens=max_tokens,
                stream=False,
            )

            if response.choices:
                return response.choices[0].message.content.strip()

            return None
        except Exception as e:
            logger.error(f"GLM API error: {e}")
            if hasattr(e, "status_code"):
                logger.error(f"Status code: {e.status_code}")
            if hasattr(e, "body"):
                logger.error(f"Error body: {e.body}")
            return None

In [27]:
class DeepSeekModel(AIBaseModel):
    def __init__(self, api_key: str, model_name: str = "deepseek-chat"):
        super().__init__(api_key, model_name)
        self.client = OpenAI(
            api_key=api_key,
            base_url="https://api.deepseek.com"
        )
        self.sleep_time = 3
        
    def invoke(
        self,
        prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.0,
        top_p: float = 1.0,
        max_tokens: int = 8192,
    ):
        try:
            messages = [
                {
                    "role": "system", 
                    "content": "You are a medical NLP system specialized in medical entity extraction from a given radiology report."
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ]

            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=messages,
                temperature=temperature,
                top_p=top_p,
                max_tokens=max_tokens,
                stream=False,
            )

            return response.choices[0].message.content.strip()
        except Exception as e:
            logger.error(f"DeepSeek API error: {e}")
            return None


## Get Model

In [28]:
def get_ai_model(model: str, model_name: str):
    model_map = {
        "gemini": GeminiModel,
        "glm": GLMModel,
        "deepseek": DeepSeekModel
    }
    
    if model not in model_map:
        raise ValueError(f"Unvalid model: {model}. Choices: {list(model_map.keys())}")
    
    return model_map[model](API_KEYS[model], model_name)

In [29]:
def load_jsonl(path: str):
    with open(path, encoding="utf-8") as file:
        return [json.loads(line) for line in file if line.strip()]

def load_json(path: str):
    with open(path, encoding="utf-8") as file:
        return json.load(file)

In [30]:
def build_prompt(report: str) -> str:
    return PROMPT_TEMPLATE.replace("<<<REPORT_TEXT>>>", report)


In [31]:
def safe_parse_json(text: str):
    if not text:
        return None

    text = re.sub(r"```json|```", "", text, flags=re.IGNORECASE).strip()

    try:
        return json.loads(text)
    except json.JSONDecodeError:
        pass

    # önce array yakala
    m = re.search(r"\[.*\]", text, re.S)
    if m:
        try:
            return json.loads(m.group())
        except Exception:
            pass

    # sonra object
    m = re.search(r"\{.*\}", text, re.S)
    if m:
        try:
            return json.loads(m.group())
        except Exception:
            return None

    return None


In [32]:
def run_inference_radgraph(
    dataset,
    output_path: str,
    model_id: str,
    model_name: str,
):
    model = get_ai_model(model_id, model_name)

    results = []
    for idx, sample in tqdm(enumerate(dataset), total=len(dataset), desc="Processing Samples"):
        
        prompt = build_prompt(sample["report"])

        raw_output = model.invoke(
            prompt=prompt,
            system_prompt=None
        )

        parsed = safe_parse_json(raw_output)

        print(f"Report: {sample['report']}")
        print(f"Parsed Output: {parsed}")
        
        result = {
            "dataset": sample["dataset"],
            "doc_key": sample["doc_key"],
            "report": sample["report"],
            "model": model_name,
            "entities": parsed,
        }
        
        results.append(result)

        if (idx + 1) % 5 == 0:
            temp_path = output_path.replace(".json", "_temp.json")
            with open(temp_path, "w") as f:
                f.write(json.dumps(results))
        

        time.sleep(model.sleep_time + 0.2)

    with open(output_path, "w") as f:
        for r in results:
            f.write(json.dumps(r) + "\n")

    return results

def run_inference_ratener(
    dataset,
    output_path: str,
    model_id: str,
    model_name: str,
):
    model = get_ai_model(model_id, model_name)

    results = []
    for idx, sample in tqdm(enumerate(dataset), total=len(dataset), desc="Processing Samples"):
        
        prompt = build_prompt(sample["report"])

        raw_output = model.invoke(
            prompt=prompt,
            system_prompt=None
        )

        parsed = safe_parse_json(raw_output)

        print(f"Report: {sample['report']}")
        print(f"Raw Output: {raw_output}")
        
        result = {
            "note_id": sample["note_id"],
            "report": sample["report"],
            "model": model_name,
            "entities": parsed,
        }
        
        results.append(result)

        if (idx + 1) % 5 == 0:
            temp_path = output_path.replace(".json", "_temp.json")
            with open(temp_path, "w") as f:
                f.write(json.dumps(results))
        

        time.sleep(model.sleep_time + 0.2)

    with open(output_path, "w") as f:
        for r in results:
            f.write(json.dumps(r) + "\n")

    return results

In [33]:
INPUT_PATH = "/kaggle/input/chest-ct2/radgraphxl-chest-ct-reports.json"
OUTPUT_PATH = "/kaggle/working/chest-ct-schema.json"

dataset = load_json(INPUT_PATH)[2:]

results = run_inference_radgraph(
    dataset=dataset,
    output_path=OUTPUT_PATH,
    model_id="gemini",
    model_name="gemma-3-27b-it",
)

Processing Samples:   0%|          | 0/3 [00:00<?, ?it/s]

Report: FINDINGS: Evaluation of the pulmonary vasculature demonstrates no evidence of filling defects to suggest pulmonary emboli. Main pulmonary artery is at the upper limits of normal size. Right internal jugular dual-lumen catheter is in place with the tip in the proximal right atrium. Left internal jugular catheter is in place with the tip in the left innominate vein. Small subcutaneous hematoma is appreciated at the left internal jugular skin entry site. Thoracic aorta demonstrates normal contour and caliber with moderate atherosclerotic plaque. Four-vessel aortic arch is incidentally noted with the left vertebral artery arising directly from the aorta. Mild cardiomegaly is appreciated without evidence of pericardial effusion. Coronary artery calcifications are appreciated involving the left anterior descending and circumflex coronary arteries. Endotracheal tube is in place with the tip approximately 2.5 cm above the carina. Trachea and central bronchi otherwise patent. Multifocal

Processing Samples:  33%|███▎      | 1/3 [00:39<01:18, 39.46s/it]

Report: FINDINGS: Visualized thyroid gland is unremarkable. The heart is normal in size with a trace physiologic pericardial effusion. No coronary artery calcification is seen. No mediastinal, axillary, or hilar lymphadenopathy is present. The ascending aorta is normal in caliber. Bovine aortic arch anatomy is present. No surface irregularity is seen along thoracic aorta to suggest intimal flap, dissection, or atheromatous ulcer. Lack of noncontrast images limits ability to assess for intramural hematoma. The main pulmonary arteries normal in caliber. While not a dedicated evaluation for pulmonary embolus, no filling defects are seen within the pulmonary arterial vasculature. The trachea and central airways are patent. No focal consolidation, effusion or edema is present. Visualized portions of the upper abdomen demonstrate a 8-mm hypodense lesion at the hepatic dome, which is too small to characterize. Fatty liver is present. Cholelithiasis is present without evidence of cholecystitis

Processing Samples:  67%|██████▋   | 2/3 [01:08<00:33, 33.44s/it]

Report: FINDINGS: The thyroid gland is heterogeneous with multiple small nodules seen in the right lobe. The heart size is normal without a pericardial effusion. Although this was a non-gated examination, moderate to severe coronary arterial calcification is noted of the LAD and proximal circumflex vessels. The aorta and great vessels are normal in course and caliber. The main pulmonary artery is normal in course and caliber. While not a dedicated pulmonary embolism study, no filling defects are seen in the main or lobar pulmonary arteries to suggest pulmonary embolism. The lungs demonstrate bibasilar atelectasis and / or scarring. No focal consolidations or pleural effusions. No pneumothorax. An 8mm subpleural cyst is present in the right lower lobe (Series 3, Image 132) A 3 mm pulmonary nodule is seen in the right upper lobe (Series 3, Image 117). The airways are patent and of normal course and caliber. No mediastinal, hilar or axillary lymphadenopathy. A bilobed soft tissue lesion o

Processing Samples: 100%|██████████| 3/3 [01:40<00:00, 33.59s/it]


## Saving as Pretty JSON 

In [34]:
import json

INPUT_PATH = "/kaggle/working/chest-ct-schema.json"
OUTPUT_PATH = "/kaggle/working/chest-ct-schema.pretty.jsonl"

with open(INPUT_PATH, "r", encoding="utf-8") as fin, open(OUTPUT_PATH, "w", encoding="utf-8") as fout:
    for line in fin:
        line = line.strip()
        if not line:
            continue

        obj = json.loads(line)

        # her kaydı indent'li yaz
        fout.write(json.dumps(obj, ensure_ascii=False, indent=2))
        fout.write("\n\n")  # kayıtlar arası boşluk

print("Saved ->", OUTPUT_PATH)


Saved -> /kaggle/working/chest-ct-schema.pretty.jsonl


In [35]:
# INPUT_PATH = "/kaggle/input/radgraph/stanford-radgraph-XL-sentence.jsonl"
# OUTPUT_PATH = "/kaggle/working/stanford-radgraph-XL-mapped.jsonl"

# dataset = load_jsonl(INPUT_PATH)[1:]

# results = run_inference_radgraph(
#     dataset=dataset,
#     output_path=OUTPUT_PATH,
#     model_id="deepseek",
#     model_name="deepseek-chat",
# )