In [1]:
import os
import torch
from transformers import Blip2Processor, Blip2ForConditionalGeneration
from sentence_transformers import SentenceTransformer, util
from PIL import Image

# 설정
device = "cuda" if torch.cuda.is_available() else "cpu"

# BLIP-2 모델과 Processor 로드
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
blip_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b").to(device)

# 텍스트-임베딩 모델 로드 (Sentence Transformers)
text_model = SentenceTransformer('all-MiniLM-L6-v2').to(device)

# 프레임 경로와 캡션 저장소
frame_dir = "/data/ephemeral/home/Frames"
caption_store = []

# 이미지 캡션 생성
def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, text="Describe the image", return_tensors="pt").to(device)
    outputs = blip_model.generate(
    **inputs,
    max_length=150,           # 최대 출력 길이를 100으로 설정
    min_length=60,            # 최소 출력 길이를 20으로 설정
    num_beams=20,              # Beam Search 사용
    temperature=1.3,          # 생성 다양성 제어
    top_p=0.3,                # Top-p 샘플링 (nucleus sampling)
    repetition_penalty=1.2    # 반복 감소
    )
    caption = processor.tokenizer.decode(outputs[0], skip_special_tokens=True)
    return caption

# 모든 프레임에 대해 캡션 생성 및 저장
print("이미지 캡션 생성 중...")
for frame_file in sorted(os.listdir(frame_dir)):
    frame_path = os.path.join(frame_dir, frame_file)
    caption = generate_caption(frame_path)
    caption_embedding = text_model.encode(caption, convert_to_tensor=True)
    caption_store.append((frame_file, caption, caption_embedding))

print("모든 캡션 생성 완료.")

# 검색 함수
def search_by_text(query):
    query_embedding = text_model.encode(query, convert_to_tensor=True)
    similarities = []

    # 모든 캡션과 유사도 계산
    for frame_file, caption, caption_embedding in caption_store:
        similarity = util.pytorch_cos_sim(query_embedding, caption_embedding).item()
        similarities.append((frame_file, caption, similarity))

    # 유사도 순으로 정렬
    similarities = sorted(similarities, key=lambda x: x[2], reverse=True)

    # 가장 유사한 이미지 출력
    print("검색 결과:")
    for i, (frame_file, caption, similarity) in enumerate(similarities[:5]):  # 상위 5개 출력
        print(f"[유사도: {similarity:.2f}] 이미지: {frame_file}, 캡션: {caption}")

    # 가장 유사한 이미지 반환
    most_similar_frame = similarities[0][0]
    return os.path.join(frame_dir, most_similar_frame)

# 텍스트 검색 실행
query = "a man holding popcorn"
most_similar_image = search_by_text(query)

# 결과 이미지 출력
print(f"가장 유사한 이미지: {most_similar_image}")

  from .autonotebook import tqdm as notebook_tqdm
Downloading shards: 100%|██████████| 2/2 [01:11<00:00, 35.88s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  3.31it/s]


이미지 캡션 생성 중...


FileNotFoundError: [Errno 2] No such file or directory: '/data/ephemeral/home/Frames'

In [8]:
pip show numpy


Name: numpy
Version: 1.23.5
Summary: NumPy is the fundamental package for array computing with Python.
Home-page: https://www.numpy.org
Author: Travis E. Oliphant et al.
Author-email: 
License: BSD
Location: /opt/conda/lib/python3.10/site-packages
Requires: 
Required-by: mkl-fft, mkl-random, scikit-learn, scipy, torchelastic, torchvision, transformers
Note: you may need to restart the kernel to use updated packages.


In [3]:
torch.cuda.is_available()

True