In [2]:
import os
import torch
from torch.utils.data import DataLoader
from utils.train_comparison import *
from utils.processing import image_transform
from utils.data.chexpert_dataset import CheXpertDataset

In [3]:
tok = build_tokenizer_from_labels(gpt2=True)
pad_id = tok.pad_token_id
eos_id = tok.eos_token_id
bos_id = tok.bos_token_id

Using GPT2 tokenizer.


In [4]:
from utils.data.dataloaders import create_dataloaders

# # CheXpert
# CHEXPERT_DIR = "Datasets/CheXpertPlus"
# chexpert_paths = {
#     "chexpert_data_path": f"{CHEXPERT_DIR}/PNG",  # base PNG folder
#     "chexpert_data_csv": f"{CHEXPERT_DIR}/df_chexpert_plus_240401_findings.csv",
# }

# # MIMIC
# MIMIC_DIR = "Datasets/MIMIC"
# mimic_paths = {
#     "mimic_data_path": MIMIC_DIR,
#     "mimic_splits_csv": f"{MIMIC_DIR}/mimic-cxr-2.0.0-split.csv.gz",
#     "mimic_metadata_csv": f"{MIMIC_DIR}/mimic-cxr-2.0.0-metadata-findings-only.csv",
#     "mimic_reports_path": f"{MIMIC_DIR}/cxr-record-list.csv.gz",  # must contain 'path'
#     "mimic_images_dir": f"{MIMIC_DIR}/matched_images_and_masks_mimic_224/images",
# }

# CheXpert
CHEXPERT_DIR = "Datasets/CheXpertPlus"
chexpert_paths = {
    "chexpert_data_path": "Datasets/CHEXPERT516",  # base PNG folder
    "chexpert_data_csv": f"{CHEXPERT_DIR}/df_chexpert_plus_240401_findings.csv",
}

# MIMIC
MIMIC_DIR = "Datasets/MIMIC"
mimic_paths = {
    "mimic_data_path": MIMIC_DIR,
    "mimic_splits_csv": f"{MIMIC_DIR}/mimic-cxr-2.0.0-split.csv.gz",
    "mimic_metadata_csv": f"{MIMIC_DIR}/mimic-cxr-2.0.0-metadata-findings-only.csv",
    "mimic_reports_path": f"{MIMIC_DIR}/cxr-record-list.csv.gz",  # must contain 'path'
    "mimic_images_dir": "Datasets/MIMIC516/datos",
}

import os
kwargs = {
    # "num_workers": os.cpu_count() // 2 if os.cpu_count() else 4,  # adjust on your VM
    # "persistent_workers": True,           # reuses workers between iterations
    # "prefetch_factor": 4,                 # each worker prefetches batches
    # "pin_memory": True,                   # if using CUDA
    # "drop_last": False
}

test_loader = create_dataloaders(
    chexpert_paths, 
    mimic_paths, 
    batch_size=4,
    split="test", 
    sampling_ratio=0.7,
    **kwargs
)

Filtering rows with missing PNGs...
[INFO] Kept 63/63 rows with existing PNGs


In [None]:
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from utils.text_metrics import evaluate_all_metrics, save_metrics_to_json
# Load weights directly to DEVICE
from utils.models.complete_model import create_complete_model, load_complete_model

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEGMENTER_MODEL_PATH_LUNG = f"models/dino_unet_decoder_finetuned.pth"
SEGMENTER_MODEL_PATH_HEART = f"models/dino_unet_organos_best.pth"
model = create_complete_model(device=DEVICE, SEGMENTER_MODEL_PATH_LUNG=SEGMENTER_MODEL_PATH_LUNG, SEGMENTER_MODEL_PATH_HEART=SEGMENTER_MODEL_PATH_HEART, freeze_encoder=False, mask_implementation="hidden")
best_model_path = "checkpoints/model_best7.pth"
ckpt = torch.load(best_model_path, map_location="cpu")
model.load_state_dict(ckpt["model_state_dict"], strict=False)
model.eval()

generated_text, target_text = [], []
iteration = 0
from tqdm import tqdm

with torch.inference_mode():
    for pixel_values, ids_loader, paths, raw_labels in tqdm(test_loader):
        iteration += 1
        
        pixel_values = pixel_values.to(model.device, non_blocking=True)

        # Visual path
        patches = model.encoder(pixel_values)                           # [B,Np,Cenc]
        projected_patches = model.linear_projection(patches)            # [B,Np,n_embd]

        # Segmentation path per layer
        segmented_layers = model.segmenter(pixel_values, model.num_layers) # [B,n_layers,H,W] (per current decoder)


        # Generate (disable all plotting/diagnostics for speed)
        gen_ids = model.decoder.generate(
            inputs_embeds=projected_patches,
            max_new_tokens=150,
            do_sample=False,
            top_k=50,
            top_p=0.95,
            temperature=1.0,
            repetition_penalty=1.2,
            num_beams=1,
            eos_token_id=eos_id,
            pad_token_id=pad_id,
            use_cache=True,
            segmentation_mask=segmented_layers,
            prefix_allowed_length=0,
            plot_attention_mask=False,
            plot_attention_mask_layer=[],
            plot_attention_map=False,
            plot_attention_map_layer=[],
            plot_attention_map_generation=0,
        )
        # Move only the ids needed for decoding to CPU
        texts = model.tokenizer.batch_decode(gen_ids.detach().cpu(), skip_special_tokens=True)

        # Accumulate for final metric pass (metrics often run on CPU/strings anyway)
        generated_text.extend(texts)
        target_text.extend(ids_loader)

        # if iteration >= 50:  # your test cap
        #     break

data_to_save = {
    "generated": generated_text,
    "target": target_text,
}
import json
save_json_path = f"lstm-vs-gpt/results_complete/bestmodelcloud_generated_texts.json"
with open(save_json_path, "w") as f:
    json.dump(data_to_save, f, indent=4)

# Evaluate once per model
eval_results = evaluate_all_metrics(
    generated=generated_text,
    original=target_text,
    evaluation_mode="CheXagent"
)

print(f"\nOverall results for model trained {100} epochs:")
for metric, scores in eval_results.items():
    print(f"{metric}: {scores}")

# add training walltime you tracked
eval_results["training_time_seconds"] = 0

# Save metrics
save_metrics_to_json(
    eval_results,
    f"lstm-vs-gpt/results_complete/cloudbestmodel10_20_MIMIC.json"
)

ðŸ§  Cargando encoder DINOv3...
Loaded segmenter weights from models/dino_unet_organos_best.pth
Loaded segmenter weights from models/dino_unet_decoder_finetuned.pth
Set use_cache=False for training.


  0%|          | 0/616 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 616/616 [10:24<00:00,  1.01s/it]


Using device: cuda:0

Overall results for model trained 100 epochs:
chexbert_f1_weighted: 0.33270280325896656
chexbert_f1_micro: 0.33335252461281595
chexbert_f1_macro: 0.2295067626545221
chexbert_f1_micro_5: 0.40838974586616356
chexbert_f1_macro_5: 0.37603624030106914
radgraph_f1_RG_E: 0.1700120508539722
radgraph_f1_RG_ER: 0.1495862823696185
