# BLIP VQA Labeled Test
## True/False 비율 기반 랜드마크 인식 평가

기존의 YES 비율 평가에서 개선하여, 각 질문마다 예상 답변(yes/no)을 라벨링하고
BLIP 모델의 답변이 예상과 일치하는지 True/False로 평가합니다.

In [1]:
import torch
from transformers import BlipProcessor, BlipForQuestionAnswering
from PIL import Image
import glob
import os
import json
from tqdm.auto import tqdm

# GPU 사용 설정
device = "cuda" if torch.cuda.is_available() else "cpu"

# 모델 로드
model_name = "ybelkada/blip-vqa-base"
processor = BlipProcessor.from_pretrained(model_name)
model = BlipForQuestionAnswering.from_pretrained(model_name).to(device)

print(f"BLIP 모델 로드 완료. (사용 장치: {device})")

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/445 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.54G [00:00<?, ?B/s]

BLIP 모델 로드 완료. (사용 장치: cuda)


In [2]:
# 로컬/서버에서 실행할 땐 아래 코드 주석처리 하세요
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [3]:
import os

# --- 1. 환경별 프로젝트 루트 경로 정의 ---
# (경로 1: Google Drive Mount Colab)
COLAB_DRIVE_ROOT = "/content/drive/Othercomputers/내 컴퓨터/데이콘 출판마을 프로젝트/github"
# (경로 2: "서버" 또는 다른 Colab 환경)
SERVER_ROOT = "/content/group5_project"

# --- 2. 환경 감지 및 PROJECT_ROOT 설정 ---
if os.path.exists(COLAB_DRIVE_ROOT):
    PROJECT_ROOT = COLAB_DRIVE_ROOT
    print(f"환경 감지: Google Drive Colab")
elif os.path.exists(SERVER_ROOT):
    PROJECT_ROOT = SERVER_ROOT
    print(f"환경 감지: Server (group5_project)")

# --- 3. 최종 DATA_DIR 설정 ---
DATA_DIR = os.path.join(PROJECT_ROOT, "data")
print(f"최종 DATA_DIR: {DATA_DIR}")

# --- 4. 파일 경로 설정 ---
LANDMARK_QA_LABELED_FILE = os.path.join(DATA_DIR, "landmark_qa_labeled.json")
print(f"JSON 파일 경로: {LANDMARK_QA_LABELED_FILE}")

환경 감지: Google Drive Colab
최종 DATA_DIR: /content/drive/Othercomputers/내 컴퓨터/데이콘 출판마을 프로젝트/github/data
JSON 파일 경로: /content/drive/Othercomputers/내 컴퓨터/데이콘 출판마을 프로젝트/github/data/landmark_qa_labeled.json


In [4]:
# landmark_qa_labeled.json 로드
with open(LANDMARK_QA_LABELED_FILE, 'r', encoding='utf-8') as f:
    labeled_qa_data = json.load(f)
    print(f"Labeled Q&A data loaded from '{LANDMARK_QA_LABELED_FILE}'.")

# 데이터 확인
for landmark, qa_list in labeled_qa_data.items():
    print(f"\n{landmark}: {len(qa_list)}개 질문")
    print(f"  예시: {qa_list[0]}")

Labeled Q&A data loaded from '/content/drive/Othercomputers/내 컴퓨터/데이콘 출판마을 프로젝트/github/data/landmark_qa_labeled.json'.

네모탑: 40개 질문
  예시: ['Is the main object a sculpture?', 'yes']

지혜의숲 조각상: 40개 질문
  예시: ['Is the main object a sculpture?', 'yes']


In [53]:
labeled_qa_data = {
    "활판공방 인쇄기": [
        ["Is the main object a sculpture?", "yes"],
        ["Is the sculpture located outdoors?", "yes"],
        ["Is the sculpture made of metal?", "yes"],
        ["Does the sculpture appear old and rusted?", "yes"],
        ["Does the sculpture look like a large machine?", "yes"],
        ["Does the sculpture resemble a printing press?", "yes"],
        ["Is the sculpture complex and mechanical?", "yes"],
        ["Does the sculpture have many gears?", "yes"],
        ["Does the sculpture have rollers?", "yes"],
        ["Are there many small metal pieces on the sculpture?", "yes"],
        ["Are the small metal pieces clustered together?", "yes"],
        ["Are the small metal pieces in a large pile?", "yes"],
        ["Is the sculpture standing on a platform?", "yes"],
        ["Is the platform gray?", "yes"],
        ["Is the top surface of the platform a metal grate?", "yes"],
        ["Is the sculpture dark in color?", "yes"],
        ["Is the sculpture's texture rough?", "yes"],
        ["Is the sculpture reddish-brown in color?", "yes"],
        ["Is the main body of the sculpture rectangular?", "yes"],
        ["Are there buildings in the background?", "yes"],

        ["Is the sculpture a real, living animal?", "no"],
        ["Is the sculpture a plant or a tree?", "no"],
        ["Is the sculpture soft?", "no"],
        ["Is the sculpture covered in many bright colors?", "no"],
        ["Is the sculpture blue in color?", "no"],
        ["Is the sculpture white in color?", "no"],
        ["Is the sculpture made of wood?", "no"],
        ["Is the sculpture made of glass?", "no"],
        ["Is this a picture of a food dish?", "no"],
        ["Is the object a piece of fruit?", "no"],
        ["Is the object flying in the sky?", "no"],
        ["Is the object in the water?", "no"],
        ["Is the object a computer?", "no"],
        ["Is the object a toy?", "no"],
        ["Is the main object a person?", "no"],
        ["Is the sculpture wearing clothes?", "no"],
        ["Is the sculpture a boat?", "no"],
        ["Is the sculpture a car?", "no"],
        ["Is the sculpture a flower?", "no"],
        ["Is this a picture of a kitchen appliance?", "no"]
    ],
    "활돌이": [
        ["Is the main object a statue?", "yes"],
        ["Is the statue a cartoon character?", "yes"],
        ["Is the statue's main color pink?", "yes"],
        ["Does the statue have a plump body?", "yes"],
        ["Does the statue have arms?", "yes"],
        ["Does the statue have a face?", "yes"],
        ["Does the statue have large, dark eyes?", "yes"],
        ["Does the statue have a smiling mouth?", "yes"],
        ["Is the statue's mouth open?", "yes"],
        ["Is the statue's head rounded?", "yes"],
        ["Is the statue holding an object?", "yes"],
        ["Is the object it's holding a rectangular block?", "yes"],
        ["Does the object have white characters on it?", "yes"],
        ["Is the statue standing on a base?", "yes"],
        ["Is the statue's body one large piece?", "yes"],
        ["Is the statue's face a different color than its body?", "yes"],
        ["Is the statue located outdoors?", "yes"],
        ["Is the statue in front of a building?", "yes"],
        ["Does the building have large glass windows?", "yes"],
        ["Is the ground made of wood decking?", "yes"],

        ["Is the main object a car?", "no"],
        ["Is the main object a tree?", "no"],
        ["Is this a picture of a real animal?", "no"],
        ["Is the object a piece of fruit?", "no"],
        ["Is the object a boat?", "no"],
        ["Is the object a real, living person?", "no"],
        ["Is this a picture of a mountain?", "no"],
        ["Is the object a piece of furniture?", "no"],
        ["Is the object a food dish?", "no"],
        ["Is the object an airplane?", "no"],
        ["Is the object flying?", "no"],
        ["Is the object in the water?", "no"],
        ["Is the object a computer?", "no"],
        ["Is the object a kitchen appliance?", "no"],
        ["Is the object a musical instrument?", "no"],
        ["Is the object a flower?", "no"],
        ["Is this a picture of the sky at night?", "no"],
        ["Is the object a bicycle?", "no"],
        ["Is the object a planet?", "no"],
        ["Is the object on fire?", "no"]
    ],
    "지혜의숲 고양이": [
        ["Is the main object a sculpture?", "yes"],
        ["Is the sculpture an animal?", "yes"],
        ["Is the animal a cat?", "yes"],
        ["Is the sculpture a bust (head and shoulders)?", "yes"],
        ["Is the cat wearing a shirt?", "yes"],
        ["Is the shirt a shade of green?", "yes"],
        ["Does the shirt have a collar?", "yes"],
        ["Does the shirt have buttons?", "yes"],
        ["Does the shirt have a small circular patch on it?", "yes"],
        ["Does the cat have pointed ears?", "yes"],
        ["Does the cat have a smiling mouth?", "yes"],
        ["Is there a teardrop on the cat's face?", "yes"],
        ["Is the teardrop coming from one eye?", "yes"],
        ["Is the cat's fur a mix of dark and light shades?", "yes"],
        ["Is the sculpture located indoors?", "yes"],
        ["Is the sculpture inside a display case?", "yes"],
        ["Is the display case transparent (clear)?", "yes"],
        ["Is the sculpture standing on a white base?", "yes"],
        ["Does the cat have a wide smile?", "yes"],
        ["Does the cat have wide eyes?", "yes"],

        ["Is the main object an airplane?", "no"],
        ["Is the main object a piece of fruit?", "no"],
        ["Is the main object a food dish?", "no"],
        ["Is this a picture of a car?", "no"],
        ["Is this a boat on the water?", "no"],
        ["Is the object a tree?", "no"],
        ["Is this a picture of a computer?", "no"],
        ["Is the main object a person running?", "no"],
        ["Is the object flying in the sky?", "no"],
        ["Is the object a musical instrument?", "no"],
        ["Is the object a table?", "no"],
        ["Is the object a chair?", "no"],
        ["Is this a picture of a road?", "no"],
        ["Is the object a cell phone?", "no"],
        ["Is the object a kitchen appliance?", "no"],
        ["Is the object a bridge?", "no"],
        ["Is the object on fire?", "no"],
        ["Is the object a planet?", "no"],
        ["Is the object a mountain?", "no"],
        ["Is the object a real, living animal?", "no"]
    ],
    "네모탑": [
        ["Is the main object a sculpture?", "yes"],
        ["Is the sculpture located outdoors?", "yes"],
        ["Is the sculpture made of metal?", "yes"],
        ["Is the sculpture reddish-brown in color?", "yes"],
        ["Does the sculpture's surface appear rusted?", "yes"],
        ["Is the sculpture taller than it is wide?", "yes"],
        ["Does the sculpture have a vertical, tower-like shape?", "yes"],
        ["Is the sculpture an abstract piece of art?", "yes"],
        ["Is the sculpture composed of stacked sections?", "yes"],
        ["Are the sections box-like?", "yes"],
        ["Are there more than three stacked sections?", "yes"],
        ["Do the stacked sections decrease in size towards the top?", "yes"],
        ["Is the sculpture hollow?", "yes"],
        ["Does the sculpture have large, square openings?", "yes"],
        ["Are these openings on the sides of the sections?", "yes"],
        ["Can you see through the openings to the other side?", "yes"],
        ["Does the bottom-most section have openings that form 'legs'?", "yes"],
        ["Is the sculpture standing directly on the ground?", "yes"],
        ["Does the sculpture have flat surfaces?", "yes"],
        ["Does the sculpture have sharp edges and corners?", "yes"],

        ["Is the sculpture located indoors?", "no"],
        ["Is the main object a human-shaped figure?", "no"],
        ["Is the sculpture in a sitting position?", "no"],
        ["Is the sculpture covered in many bright colors?", "no"],
        ["Does the sculpture have patterns like flowers?", "no"],
        ["Is the sculpture holding binoculars?", "no"],
        ["Is there a suitcase next to the sculpture?", "no"],
        ["Is the sculpture sitting on a platform made of clear blocks?", "no"],
        ["Does the sculpture have text written on it?", "no"],
        ["Are there tall, wooden bookshelves in the background?", "no"],
        ["Is the floor made of wood?", "no"],
        ["Is the sculpture made of wood?", "no"],
        ["Is the sculpture an animal?", "no"],
        ["Is the sculpture wearing clothes?", "no"],
        ["Does the sculpture have a long nose?", "no"],
        ["Is the sculpture holding a book?", "no"],
        ["Is the sculpture blue in color?", "no"],
        ["Is the sculpture white in color?", "no"],
        ["Is the sculpture made of glass?", "no"],
        ["Is the object a building for people?", "no"]
    ],
"지혜의숲 조각상": [
        ["Is the main object a sculpture?", "yes"],
        ["Is the sculpture human-shaped?", "yes"],
        ["Is the sculpture in a sitting position?", "yes"],
        ["Is the sculpture multi-colored?", "yes"],
        ["Does the sculpture have colorful patterns?", "yes"],
        ["Is the sculpture wearing clothes?", "yes"],
        ["Does the sculpture have a head?", "yes"],
        ["Does the sculpture have arms?", "yes"],
        ["Does the sculpture have legs?", "yes"],
        ["Is the sculpture holding an object?", "yes"],
        ["Is the sculpture holding an object to its eyes?", "yes"],
        ["Is there a second, smaller object next to the sculpture?", "yes"],
        ["Is this second object a bag ?", "yes"],
        ["Is the bag also multi-colored?", "yes"],
        ["Is the sculpture sitting on a platform?", "yes"],
        ["Is the platform made of many blocks?", "yes"],
        ["Is the platform made of stacked blocks?", "yes"],
        ["Is the sculpture located indoors?", "yes"],
        ["Are there bookshelves in the room?", "yes"],
        ["Is the sculpture in a room with many books?", "yes"],

        ["Is the sculpture located outdoors?", "no"],
        ["Is the sculpture standing up?", "no"],
        ["Is the sculpture running?", "no"],
        ["Is the sculpture monochromatic (a single color)?", "no"],
        ["Is the sculpture an animal?", "no"],
        ["Is the sculpture a plant?", "no"],
        ["Is the sculpture a vehicle?", "no"],
        ["Is the sculpture black in color?", "no"],
        ["Is the sculpture white in color?", "no"],
        ["Is the sculpture blue in color?", "no"],
        ["Is the sculpture flying?", "no"],
        ["Is the sculpture in the water?", "no"],
        ["Does the sculpture have wings?", "no"],
        ["Is the sculpture made of wood?", "no"],
        ["Is the sculpture made of glass?", "no"],
        ["Is the sculpture transparent?", "no"],
        ["Does the sculpture have a tail?", "no"],
        ["Is the sculpture wearing a hat?", "no"],
        ["Is the sculpture holding a weapon?", "no"],
        ["Is the sculpture made of food?", "no"]
    ]
}

In [54]:
# 테스트할 랜드마크 선택
LANDMARK_NAME = "활판공방 인쇄기"  # 랜드마크 선택
SUCCESS_THRESHOLD = 0.70  # 성공 판정 임계값 (70%)

landmark_dir = os.path.join(DATA_DIR, LANDMARK_NAME)
print(f"테스트 랜드마크: {LANDMARK_NAME}")
print(f"이미지 폴더: {landmark_dir}")
print(f"성공 임계값: {SUCCESS_THRESHOLD:.0%}")

테스트 랜드마크: 활판공방 인쇄기
이미지 폴더: /content/drive/Othercomputers/내 컴퓨터/데이콘 출판마을 프로젝트/github/data/활판공방 인쇄기
성공 임계값: 70%


In [55]:
# --- 1. 이미지 파일 찾기 ---
image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.webp", "*.jfif"]
image_files = []
for ext in image_extensions:
    image_files.extend(glob.glob(os.path.join(landmark_dir, ext)))

if not image_files:
    print(f"경고: '{landmark_dir}' 폴더에서 이미지를 찾을 수 없습니다. 경로를 확인해주세요.")
else:
    print(f"'{LANDMARK_NAME}' 폴더에서 총 {len(image_files)}개의 이미지를 찾았습니다.")

# --- 2. labeled Q&A 리스트 가져오기 ---
labeled_questions = labeled_qa_data.get(LANDMARK_NAME, [])
total_questions_count = len(labeled_questions)

if total_questions_count == 0:
    print(f"경고: '{LANDMARK_NAME}'에 대한 질문 리스트가 비어있습니다.")
else:
    print(f"'{LANDMARK_NAME}'에 대한 질문: {total_questions_count}개")
    yes_label_count = sum(1 for _, label in labeled_questions if label == "yes")
    no_label_count = sum(1 for _, label in labeled_questions if label == "no")
    print(f"  - Positive (yes): {yes_label_count}개")
    print(f"  - Negative (no): {no_label_count}개")

'활판공방 인쇄기' 폴더에서 총 12개의 이미지를 찾았습니다.
'활판공방 인쇄기'에 대한 질문: 40개
  - Positive (yes): 20개
  - Negative (no): 20개


In [56]:
# --- 3. VQA 실행 및 True/False 평가 ---
image_results_list = []  # 이미지별 결과를 저장할 리스트

if total_questions_count > 0 and image_files:
    # tqdm을 사용하여 진행률 표시
    for img_path in tqdm(image_files, desc=f"'{LANDMARK_NAME}' 이미지 처리 중"):
        try:
            image = Image.open(img_path).convert("RGB")
            img_name = os.path.basename(img_path)

            print("\n" + "="*50)
            print(f"{img_name} 처리 중")
            print("="*50)

            # 이미지 표시 (Colab에서만)
            try:
                from IPython.display import display
                display(image.resize((300, 300)))
            except:
                pass

            true_count = 0
            false_count = 0
            false_questions_list = []  # False인 질문들 저장

            pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)

            for question, expected_label in labeled_questions:
                inputs = processor(text=question, return_tensors="pt").to(device)

                out = model.generate(
                    pixel_values=pixel_values,
                    input_ids=inputs.input_ids,
                    attention_mask=inputs.attention_mask,
                    max_new_tokens=10
                )
                answer = processor.decode(out[0], skip_special_tokens=True).strip().lower()

                # True/False 판정
                is_correct = (answer == expected_label)
                status_icon = "✅" if is_correct else "❌"

                # 상세 로그 출력
                print(f"  {status_icon} Q: {question}")
                print(f"     Expected: {expected_label}, Got: {answer}")

                if is_correct:
                    true_count += 1
                else:
                    false_count += 1
                    false_questions_list.append({
                        "question": question,
                        "expected": expected_label,
                        "got": answer
                    })

            # --- 결과 계산 및 저장 ---
            true_ratio = (true_count / total_questions_count) if total_questions_count > 0 else 0.0
            is_success = true_ratio >= SUCCESS_THRESHOLD

            print(f"\n  [결과] True: {true_count}, False: {false_count}")
            print(f"  정확도: {true_ratio:.2%}")
            print(f"  성공 여부: {'✅ PASS' if is_success else '❌ FAIL'}")

            image_results_list.append({
                "image": img_name,
                "true_count": true_count,
                "false_count": false_count,
                "true_ratio": true_ratio,
                "is_success": is_success,
                "false_questions": false_questions_list,
                "total_questions": total_questions_count
            })

        except Exception as e:
            print(f"{img_path} 처리 중 오류 발생: {e}")

print("\n\n" + "="*50)
print(f"VQA 처리 완료. 'image_results_list' 변수에 {len(image_results_list)}개 결과 저장됨.")
print("="*50)

Output hidden; open in https://colab.research.google.com to view.

In [57]:
# --- 최종 요약 리스트 출력 ---

if 'image_results_list' not in locals() or not image_results_list:
    print("❌ 오류: 'image_results_list' 변수를 찾을 수 없습니다.")
    print("이전 셀을 먼저 실행하여 VQA 처리를 완료해주세요.")
else:
    print("\n" + "="*50)
    print(f"    '{LANDMARK_NAME}' VQA 이미지별 최종 평가 결과")
    print("="*50)

    total_accuracy = 0
    success_count = 0

    for result in image_results_list:
        print(f"\n이미지: {result['image']}")
        print(f"  - True  : {result['true_count']} / {result['total_questions']}")
        print(f"  - False : {result['false_count']}")
        print(f"  - 정확도: {result['true_ratio']:.2%}")
        print(f"  - 성공 여부: {'✅ PASS' if result['is_success'] else '❌ FAIL'}")

        # False 질문 목록 출력
        if result['false_questions']:
            print(f"\n  ❌ False 질문 목록 ({len(result['false_questions'])}개):")
            for i, fq in enumerate(result['false_questions'], 1):
                print(f"    {i}. Q: {fq['question']}")
                print(f"       Expected: {fq['expected']}, Got: {fq['got']}")

        print("-" * 50)

        total_accuracy += result['true_ratio']
        if result['is_success']:
            success_count += 1

    # 전체 통계
    avg_accuracy = total_accuracy / len(image_results_list)
    success_rate = success_count / len(image_results_list)

    print("\n" + "="*50)
    print("전체 통계")
    print("="*50)
    print(f"평균 정확도: {avg_accuracy:.2%}")
    print(f"성공 이미지: {success_count} / {len(image_results_list)} ({success_rate:.2%})")
    print(f"성공 임계값: {SUCCESS_THRESHOLD:.0%}")


    '활판공방 인쇄기' VQA 이미지별 최종 평가 결과

이미지: 13518647321211.jpg
  - True  : 36 / 40
  - False : 4
  - 정확도: 90.00%
  - 성공 여부: ✅ PASS

  ❌ False 질문 목록 (4개):
    1. Q: Is the platform gray?
       Expected: yes, Got: no
    2. Q: Is the main body of the sculpture rectangular?
       Expected: yes, Got: no
    3. Q: Is the sculpture a plant or a tree?
       Expected: no, Got: tree
    4. Q: Is the sculpture made of wood?
       Expected: no, Got: yes
--------------------------------------------------

이미지: 20190831_161244_HDR.jpg
  - True  : 37 / 40
  - False : 3
  - 정확도: 92.50%
  - 성공 여부: ✅ PASS

  ❌ False 질문 목록 (3개):
    1. Q: Is the main body of the sculpture rectangular?
       Expected: yes, Got: no
    2. Q: Is the sculpture a plant or a tree?
       Expected: no, Got: tree
    3. Q: Is the sculpture made of wood?
       Expected: no, Got: yes
--------------------------------------------------

이미지: 20230804＿072534.jpg
  - True  : 37 / 40
  - False : 3
  - 정확도: 92.50%
  - 성공 여부: ✅ PASS



In [None]:
# --- (선택) 결과를 JSON 파일로 저장 ---
import json

output_file = os.path.join(PROJECT_ROOT, "tests", f"blip_results_{LANDMARK_NAME}.json")

with open(output_file, 'w', encoding='utf-8') as f:
    json.dump({
        "landmark": LANDMARK_NAME,
        "threshold": SUCCESS_THRESHOLD,
        "total_images": len(image_results_list),
        "success_count": success_count,
        "average_accuracy": avg_accuracy,
        "results": image_results_list
    }, f, ensure_ascii=False, indent=2)

print(f"결과가 '{output_file}'에 저장되었습니다.")

결과가 '/content/drive/Othercomputers/내 컴퓨터/데이콘 출판마을 프로젝트/github/tests/blip_results_네모탑.json'에 저장되었습니다.
