In [19]:
import torch
from transformers import (BlipProcessor,
                          BlipForImageTextRetrieval,
                          BlipForConditionalGeneration,
                          BlipForQuestionAnswering)
from PIL import Image
import requests
import numpy as np
import glob
import os
import io
import json
from tqdm.auto import tqdm

# GPU 사용 설정
device = "cuda" if torch.cuda.is_available() else "cpu"

# 모델 로드
model_name = "ybelkada/blip-vqa-base"
processor = BlipProcessor.from_pretrained(model_name)
model = BlipForQuestionAnswering.from_pretrained(model_name).to(device)

print(f"BLIP 모델 로드 완료. (사용 장치: {device})")

BLIP 모델 로드 완료. (사용 장치: cuda)


In [37]:
# 로컬/서버에서 실행할 땐 아래 코드 주석처리 하세요
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [38]:
import os

# --- 1. 환경별 프로젝트 루트 경로 정의 ---
# (경로 1: Google Drive Mount Colab)
COLAB_DRIVE_ROOT = "/content/drive/Othercomputers/내 컴퓨터/데이콘 출판마을 프로젝트/github"
# (경로 2: "서버" 또는 다른 Colab 환경)
SERVER_ROOT = "/content/group5_project"


# --- 2. 환경 감지 및 PROJECT_ROOT 설정 ---
if os.path.exists(COLAB_DRIVE_ROOT):
    # Google Drive Colab 환경
    PROJECT_ROOT = COLAB_DRIVE_ROOT
    print(f"환경 감지: Google Drive Colab")

elif os.path.exists(SERVER_ROOT):
    # "서버" (group5_project) 환경
    PROJECT_ROOT = SERVER_ROOT
    print(f"환경 감지: Server (group5_project)")

# --- 3. 최종 DATA_DIR 설정 ---
# (PROJECT_ROOT가 무엇으로 결정되었든, 그 하위에 data 폴더를 붙임)
DATA_DIR = os.path.join(PROJECT_ROOT, "data")


print(f"최종 DATA_DIR: {DATA_DIR}")

# --- 4. (예시) 나머지 파일 경로 설정 ---
LANDMARK_QA_FILE = os.path.join(DATA_DIR, "landmark_qa.json")
# print(f"JSON 파일 경로: {LANDMARK_QA_FILE}")

환경 감지: Google Drive Colab
최종 DATA_DIR: /content/drive/Othercomputers/내 컴퓨터/데이콘 출판마을 프로젝트/github/data


In [44]:

with open(LANDMARK_QA_FILE, 'r', encoding='utf-8') as f:
    data = json.load(f)
    print(f"Landmark Q&A data loaded from '{LANDMARK_QA_FILE}'.")
    questions = data

Landmark Q&A data loaded from '/content/drive/Othercomputers/내 컴퓨터/데이콘 출판마을 프로젝트/github/data/landmark_qa.json'.


In [42]:
questions

{'피노키오': ['Is there a prominent human-shaped statue in the picture?',
  'If there is a statue, is it a character from a fairy tale?',
  'Does the statue have a particularly long nose?',
  'Is the statue wearing green-colored clothes?',
  "Are there flower patterns on the statue's clothes?",
  'Is the statue holding something?',
  'Is the object the statue is holding a book?',
  'Is the hat the statue is wearing a pointed cone shape?',
  "Are the statue's arms relatively thin and long?",
  'Is the statue holding up a finger with its right hand, like the number 1?'],
 '네모탑': ['Is this object shaped like a tower?',
  'Is this tower shaped like stacked square boxes?',
  'Is the height much longer than the width?',
  'Is this object made of wood?',
  'Is the color reddish-brown?',
  'Does the color look like rusty metal?',
  'Is the surface smooth?',
  'Does the sculpture only use dark colors?',
  'Does this sculpture have many layers?',
  'Is the number of layers exactly 5?',
  'Do the lay

In [43]:
questions_temp = {"피노키오":["Is there a prominent human-shaped statue in the picture?",
                    "If there is a statue, is it a character from a fairy tale?",
                    "Does the statue have a particularly long nose?",
                    "Is the statue wearing green-colored clothes?",
                    "Are there flower patterns on the statue's clothes?",
                    "Is the statue holding something?",
                    "Is the object the statue is holding a book?",
                    "Is the hat the statue is wearing a pointed cone shape?",
                    "Are the statue's arms relatively thin and long?",
                    "Is the statue holding up a finger with its right hand, like the number 1?"],
             "네모탑":["Is this object shaped like a tower?",
                    "Is this tower shaped like stacked square boxes?",
                    "Is the height much longer than the width?",
                    "Is this object made of wood?",
                    "Is the color reddish-brown?",
                    "Does the color look like rusty metal?",
                    "Is the surface smooth?",
                    "Does the sculpture only use dark colors?",
                    "Does this sculpture have many layers?",
                    "Is the number of layers exactly 5?",
                    "Do the layers get smaller as they go up?",
                    "Is the bottom part the widest?",
                    "Does each layer have a square opening on the front?",
                    "Does the height of this sculpture look more than twice a person's height?"],
             "지혜의숲 조각상":["Is this picture taken inside a building?",
                         "Does the background show a place with many books?",
                         "Do wooden bookshelves cover the walls?",
                         "Does the room have a high ceiling?",
                         "Is the floor made of wood?",
                         "Is the main object a human-shaped sculpture?",
                         "Is the sculpture sitting down?",
                         "Does the sculpture have its hands near its face?",
                         "Is the sculpture holding binoculars?",
                         "Is the sculpture looking straight ahead?",
                         "Does the sculpture wear a suit?",
                         "Is the suit covered in many bright colors?",
                         "Does the suit have patterns like flowers?",
                         "Does the sculpture's suit have a lot of green and yellow?",
                         "Is the sculpture's face also painted with patterns?",
                         "Is the sculpture sitting on a square base?",
                         "Does the base look like it is made of stacked small blocks?",
                         "Does the base have letters written on it?",
                         "Is a small container placed next to the sculpture?",
                         "Does that small container have patterns on it too?"]}

In [45]:
LANDMARK_NAME = "네모탑" # 탐색할 랜드마크 폴더 이름
landmark_dir = os.path.join(DATA_DIR, LANDMARK_NAME)

In [None]:
# --- 1. 이미지 파일 찾기 ---
image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.webp", "*.jfif"]
image_files = []
for ext in image_extensions:
    image_files.extend(glob.glob(os.path.join(landmark_dir, ext)))

if not image_files:
    print(f"경고: '{landmark_dir}' 폴더에서 이미지를 찾을 수 없습니다. 경로를 확인해주세요.")
else:
    print(f"'{LANDMARK_NAME}' 폴더에서 총 {len(image_files)}개의 이미지를 찾았습니다.")


# --- 2. VQA 실행 및 결과 수집 ---
all_answers_set = set()
image_results_list = [] #  이미지별 결과를 저장할 리스트

questions_list = questions_temp.get(LANDMARK_NAME, [])
total_questions_count = len(questions_list)

if total_questions_count == 0:
    print(f"경고: '{LANDMARK_NAME}'에 대한 질문 리스트가 비어있습니다.")
else:
    # tqdm을 사용하여 진행률 표시
    for img_path in tqdm(image_files, desc=f"'{LANDMARK_NAME}' 이미지 처리 중"):
        try:
            image = Image.open(img_path).convert("RGB")
            img_name = os.path.basename(img_path)

            print("\n" + "="*30)
            print(f"{img_name} 처리 중")
            print("="*30)

            # 이미지 표시
            display(image.resize((300, 300)))

            yes_count = 0
            no_count = 0

            for question in questions_list:
                inputs = processor(images=image, text=question, return_tensors="pt").to(device)

                out = model.generate(
                    pixel_values=pixel_values,
                    input_ids=inputs.input_ids,
                    attention_mask=inputs.attention_mask,
                    max_new_tokens=10
                )
                answer = processor.decode(out[0], skip_special_tokens=True).strip().lower()

                # 상세 로그 출력
                print(f"  Q: {question}")
                print(f"  A: {answer}")

                all_answers_set.add(answer)

                if answer == 'yes':
                    yes_count += 1
                elif answer == 'no':
                    no_count += 1

            # --- 결과 계산 및 저장 ---
            yes_ratio = (yes_count / total_questions_count) if total_questions_count > 0 else 0.0

            print(f"\n  [결과] YES: {yes_count}, NO: {no_count} (Ratio: {yes_ratio:.2%})")

            image_results_list.append({
                "image": img_name,
                "yes_count": yes_count,
                "no_count": no_count,
                "yes_ratio": yes_ratio,
                "total_questions": total_questions_count
            })

        except Exception as e:
            print(f"{img_path} 처리 중 오류 발생: {e}")

print("\n\n" + "="*50)
print(f"VQA 처리 완료. 'image_results_list' 변수에 {len(image_results_list)}개 결과 저장됨.")
print("="*50)

In [48]:
# --- 최종 요약 리스트 출력 ---

# 이전 셀에서 'image_results_list'가 생성되었는지 확인
if 'image_results_list' not in locals() or not image_results_list:
    print("❌ 오류: 'image_results_list' 변수를 찾을 수 없습니다.")
    print("이전 셀을 먼저 실행하여 VQA 처리를 완료해주세요.")
else:
    print("\n" + "="*50)
    print(f"    '{LANDMARK_NAME}' VQA 이미지별 최종 요약 리스트")
    print("="*50)

    # 'yes' 비율이 높은 순으로 정렬
    sorted_results = sorted(image_results_list, key=lambda x: x['yes_ratio'], reverse=True)

    for result in sorted_results:
        print(f" 이미지: {result['image']}")
        print(f"  - YES : {result['yes_count']} / {result['total_questions']}")
        print(f"  - NO  : {result['no_count']}")
        print(f"  - 비율: {result['yes_ratio']:.2%}")
        print("-" * 25)


    '네모탑' VQA 이미지별 최종 요약 리스트
 이미지: wrtFileImageView.jpg
  - YES : 13 / 14
  - NO  : 1
  - 비율: 92.86%
-------------------------
 이미지: images (3).jfif
  - YES : 13 / 14
  - NO  : 1
  - 비율: 92.86%
-------------------------
 이미지: 1685886446687.jpg
  - YES : 12 / 14
  - NO  : 2
  - 비율: 85.71%
-------------------------
 이미지: 20221002_112656.jpg
  - YES : 12 / 14
  - NO  : 2
  - 비율: 85.71%
-------------------------
 이미지: DSC09825.jfif
  - YES : 12 / 14
  - NO  : 2
  - 비율: 85.71%
-------------------------
 이미지: images (1).jfif
  - YES : 12 / 14
  - NO  : 2
  - 비율: 85.71%
-------------------------
 이미지: image_4919719771491473087251.jpg
  - YES : 11 / 14
  - NO  : 3
  - 비율: 78.57%
-------------------------
 이미지: DSC02125.jfif
  - YES : 11 / 14
  - NO  : 3
  - 비율: 78.57%
-------------------------
 이미지: IMG_1512.jfif
  - YES : 11 / 14
  - NO  : 3
  - 비율: 78.57%
-------------------------
 이미지: images.jfif
  - YES : 11 / 14
  - NO  : 3
  - 비율: 78.57%
-------------------------
 이미지: 다운로드 (2).jf