# GPT4o assistant Text description

In [None]:
#Install
!pip install openai==1.14.3 --quiet
!pip install -q openai>=1.0.0 transformers pandas openpyxl pillow bert_score

#Import Library
import os, glob, torch, pandas as pd
from PIL import Image, ImageFile
from transformers import Blip2Processor, Blip2ForConditionalGeneration
from bert_score import score as bertscore
from openai import OpenAI, OpenAIError

#Setting
ImageFile.LOAD_TRUNCATED_IMAGES = True
client = OpenAI(api_key="YOUR_API_KEY")  # ← 반드시 유효한 GPT-4o 키로 교체

def check_api_key_validity(api_key: str) -> bool:
    try:
        client = OpenAI(api_key=api_key)
        _ = client.models.list()
        print("API 키가 유효합니다.")
        return True
    except OpenAIError as e:
        print("API 키 오류 발생:")
        print(e)
        return False

#API Key
api_key = "YOUR_API_KEY"
check_api_key_validity(api_key)

#Import BLIP-2
processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
blip_model = Blip2ForConditionalGeneration.from_pretrained(
    "Salesforce/blip2-flan-t5-xl",
    torch_dtype=torch.float16,
    device_map="auto"
)

#Upload Data
df = pd.read_excel("/content2/fewshot_data.xlsx")
answer_cols = [col for col in df.columns if str(col).lower().startswith("answer")]
image_paths = sorted(glob.glob("/content2/*.[jpJP][pnNP]*[gG]"))

#Definition
def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, text="Describe this image in detail.", return_tensors="pt").to("cuda", torch.float16)
    output = blip_model.generate(**inputs)
    return processor.batch_decode(output, skip_special_tokens=True)[0].strip()

#Few-shot Prompting
def build_fewshot_prompt(example_qas, new_caption):
    prompt = ""
    for q, a in example_qas:
        prompt += f"질문: {q}\n답변: {a}\n\n"
    prompt += f"질문: {new_caption}\n답변:"
    return prompt

#GPT-4o
def generate_gpt4o_response(prompt):
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,
            max_tokens=512
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"[ERROR] {e}"

#Generate description
captions, prompts, responses, answer_lists = [], [], [], []

for idx, image_path in enumerate(image_paths):
    try:
        caption = generate_caption(image_path)
        row_answers = [df.loc[idx, col] for col in answer_cols if pd.notna(df.loc[idx, col]) and str(df.loc[idx, col]).strip()]
        fewshot_qas = [(caption, ans) for ans in row_answers[:3]] if row_answers else []

        if not fewshot_qas:
            print(f"예시 없음 → {os.path.basename(image_path)} 생략")
            continue

        prompt = build_fewshot_prompt(fewshot_qas[:-1], caption)
        answer_lists.append(row_answers)
        prompts.append(prompt)
        captions.append(caption)
        response = generate_gpt4o_response(prompt)
        responses.append(response)
        print(f"{os.path.basename(image_path)} 완료")

    except Exception as e:
        print(f"{os.path.basename(image_path)} 실패: {e}")

#BERTScore
best_scores = {
    "precision": [], "recall": [], "f1": [], "matched_answer": []
}

for refs, cand in zip(answer_lists, responses):
    if not refs or "[ERROR]" in cand:
        best_scores["precision"].append(0.0)
        best_scores["recall"].append(0.0)
        best_scores["f1"].append(0.0)
        best_scores["matched_answer"].append("[EMPTY]")
    else:
        P, R, F1 = bertscore(cands=[cand]*len(refs), refs=refs, lang="ko", verbose=False)
        idx = F1.argmax().item()
        best_scores["precision"].append(P[idx].item())
        best_scores["recall"].append(R[idx].item())
        best_scores["f1"].append(F1[idx].item())
        best_scores["matched_answer"].append(refs[idx])

#Display as DataFrame
results_df = pd.DataFrame({
    "image": [os.path.basename(p) for p in image_paths[:len(responses)]],
    "caption": captions,
    "fewshot_prompt": prompts,
    "gpt4o_response": responses,
    "matched_answer": best_scores["matched_answer"],
    "bert_precision": best_scores["precision"],
    "bert_recall": best_scores["recall"],
    "bert_f1": best_scores["f1"]
})
results_df.to_excel("/content2/llm_result_gpt4o_fewshot_blip2.xlsx", index=False)
print("files.download: /content2/llm_result_gpt4o_fewshot_blip2.xlsx")
