In [1]:
import sys
from pathlib import Path

import torch
from torch.utils.data import DataLoader
import os 

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= "5"

TEXT_ENCODER_MODE = "llm"  # "clip" | "llm"
QWEN_MODEL_PATH = "/home/intern4/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/7ae557604adf67be50417f59c2c2f167def9a775"
QWEN_LORA_PATH = "/home/intern4/fm2026/mrfm-intern/code_it_llm_ca/dapt/outputs/lora-0.5B/checkpoint-606"

# Add project root to path
project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# Clear Jupyter's argv to avoid argparse conflicts
if hasattr(sys, 'argv'):
    original_argv = sys.argv.copy()
    sys.argv = [sys.argv[0]]

from model.listfm_it import load_from_ckpt
from datawrapper.datawrapper import LoaderConfig, get_data_wrapper_loader
from core_funcs import test_part
from params import config

# Load Qwen tokenizer if using LLM mode
qwen_tokenizer = None
if TEXT_ENCODER_MODE == "llm":
    from transformers import AutoTokenizer
    qwen_tokenizer = AutoTokenizer.from_pretrained(QWEN_MODEL_PATH, use_fast=True)
    if qwen_tokenizer.pad_token is None:
        qwen_tokenizer.pad_token = qwen_tokenizer.eos_token
    print("Qwen tokenizer loaded")

print("Imports successful")
print(f"Text Encoder Mode: {TEXT_ENCODER_MODE}")
print(f"Device: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}")

Qwen tokenizer loaded
Imports successful
Text Encoder Mode: llm
Device: cuda


In [2]:
# Checkpoint settings
run_idx = 74  # 실행할 run 번호
run_ep = 24   # epoch 번호 (또는 'best')

# Path settings
log_root = Path("/home/intern4/fm2026/fm_flow/code_it/logs_past")
run_dir = log_root / f"{run_idx:05d}_train"

# Checkpoint path
if run_ep == 'best':
    ckpt_path = run_dir / "checkpoints" / "checkpoint_best.ckpt"
else:
    ckpt_path = run_dir / "checkpoints" / f"checkpoint_{run_ep}.ckpt"

print(f"Checkpoint path: {ckpt_path}")
print(f"Exists: {ckpt_path.exists()}")

# Test dataset paths (from log file)
test_dataset = [
    '/fast_storage/intern/data/instruction_tuning/fastmri_acceleration_mat/test',
    '/fast_storage/intern/data/instruction_tuning/brats_crossmodal_mat_simple/test'
]
print(f"Test datasets: {test_dataset}")

# Output directory for results
output_dir = run_dir / "test_rerun" / f"ep_{run_ep}"
print(f"Output directory: {output_dir}")

Checkpoint path: /home/intern4/fm2026/fm_flow/code_it/logs_past/00074_train/checkpoints/checkpoint_24.ckpt
Exists: True
Test datasets: ['/fast_storage/intern/data/instruction_tuning/fastmri_acceleration_mat/test', '/fast_storage/intern/data/instruction_tuning/brats_crossmodal_mat_simple/test']
Output directory: /home/intern4/fm2026/fm_flow/code_it/logs_past/00074_train/test_rerun/ep_24


In [4]:
# Set device
config.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {config.device}")

# Load model from checkpoint
print("Loading model from checkpoint...")
if TEXT_ENCODER_MODE == "llm":
    model = load_from_ckpt(
        ckpt_path=ckpt_path,
        from_scratch=False,
        use_vision_decoder=True,
        use_vision_decoder_weights=True,
        qwen_model_path=QWEN_MODEL_PATH,
        qwen_lora_path=None,
        qwen_trainable=False,
    )
    print("Model loaded with Qwen LLM instruction encoder")
else:
    model = load_from_ckpt(
        ckpt_path=ckpt_path,
        from_scratch=False,
        use_vision_decoder=True,
        use_vision_decoder_weights=True,
    )
    print("Model loaded with CLIP instruction encoder")

model = model.to(config.device)
model.eval()
print("Model loaded successfully!")

Using device: cuda
Loading model from checkpoint...
2026-02-11 00:54:34 [32m[1m[SUCCESS][0m Checkpoint loaded successfully.
2026-02-11 00:54:34 [34m[1m[DEBUG][0m Width check success
2026-02-11 00:54:34 [34m[1m[DEBUG][0m Head check success
2026-02-11 00:54:34 [34m[1m[DEBUG][0m BPE file exists.
2026-02-11 00:54:40 [34m[1m[DEBUG][0m QC start.
2026-02-11 00:54:40 [34m[1m[DEBUG][0m Image size: torch.Size([2, 1, 512, 512])
2026-02-11 00:54:40 [34m[1m[DEBUG][0m Text size: torch.Size([2, 1536])
2026-02-11 00:54:43 [34m[1m[DEBUG][0m img_full_feature size: torch.Size([2, 1025, 512])
2026-02-11 00:54:43 [34m[1m[DEBUG][0m text_full_feature size: torch.Size([2, 1536, 512])
2026-02-11 00:54:43 [34m[1m[DEBUG][0m img mean std max min: 0.0659 0.9905 7.4911 -4.3442
2026-02-11 00:54:43 [34m[1m[DEBUG][0m text mean std max min: 0.0473 1.0504 3.1438 -18.0297
2026-02-11 00:54:43 [34m[1m[DEBUG][0m stack_feature[0] size: torch.Size([2, 64, 512, 512])
2026-02-11 00:54:43 [34

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 44.30 GiB of which 16.69 MiB is free. Process 145172 has 23.06 GiB memory in use. Process 148764 has 20.38 GiB memory in use. Including non-PyTorch memory, this process has 852.00 MiB memory in use. Of the allocated memory 659.38 MiB is allocated by PyTorch, and 30.62 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# Test loader configuration
print(f"Config values:")
print(f"  valid_batch: {config.valid_batch}")
print(f"  debugmode: {config.debugmode}")
print(f"  data_type: {config.data_type}")
print(f"  subject_num: {config.subject_num}")
print(f"  train_percent: {config.train_percent}")
print(f"  slice_per_subject: {config.slice_per_subject}")

test_loader_cfg = LoaderConfig(
    batch=config.valid_batch,  # Use config value (128)
    num_workers=config.num_workers,
    shuffle=False,  # Don't shuffle test data
    debug_mode=False,  # False: 200 per dataset | True: 100 per dataset
    acs_num=config.acs_num,
    parallel_factor=config.parallel_factor,
    data_type=config.data_type,
    subject_num=10000,  # Increase to allow more subjects (was 3)
    train_percent=1.0,
    slice_per_subject=10000,  # Increase to allow more slices per subject (was 100)
    qwen_model_path=config.qwen_model_path,
    qwen_max_length=config.qwen_max_length,
    qwen_use_fast=config.qwen_use_fast,
)

# Get test data loader
print("\nCreating test data loader...")
test_loader, dataset, test_len = get_data_wrapper_loader(
    file_path=test_dataset,  # Use manually specified test_dataset
    training_mode=False,
    loader_cfg=test_loader_cfg,
    split="test",
    distributed=False,
    rank=0,
    world_size=1,
)

print(f"\nTest dataset size: {test_len}")
print(f"Number of batches: {len(test_loader)}")
print(f"Actual file_list length: {len(dataset.file_list)}")

## Run Test Part

test_part 함수를 실행하여 결과를 생성합니다.

In [None]:
# Run test
print("Running test_part...")
print(f"Results will be saved to: {output_dir}")
print("="*80)

with torch.no_grad():
    primary_metric = test_part(
        epoch=run_ep if isinstance(run_ep, int) else 0,
        data_loader=test_loader,
        network=model,
        run_dir=run_dir,
        save_val=True,  # Save results to .mat files
        tb_writer=None,  # No tensorboard logging
        tb_prefix="test_rerun",
    )

print("="*80)
print(f"Test completed! Primary metric (PSNR): {primary_metric:.4f}")
print(f"Results saved to: {output_dir}")

## Verify Results

생성된 결과 파일 확인

In [None]:
# Check output files
if output_dir.exists():
    mat_files = sorted(output_dir.glob("*.mat"))
    print(f"Total .mat files generated: {len(mat_files)}")
    if mat_files:
        print(f"First file: {mat_files[0].name}")
        print(f"Last file: {mat_files[-1].name}")
else:
    print(f"Output directory not found: {output_dir}")

## Optional: Quick Metric Check

생성된 결과의 간단한 메트릭 확인

In [None]:
import numpy as np
from scipy.io import loadmat

# Load a few samples to verify
if output_dir.exists():
    mat_files = sorted(output_dir.glob("*.mat"))
    if mat_files:
        print(f"Loading first result file: {mat_files[0].name}")
        sample = loadmat(mat_files[0])
        print(f"Keys in .mat file: {list(sample.keys())}")
        print(f"Input shape: {sample['input'].shape}")
        print(f"Output shape: {sample['out'].shape}")
        print(f"Label shape: {sample['label'].shape}")
        if 'instruction' in sample:
            print(f"Instruction: {sample['instruction']}")