# InternLM assistant Text description

In [None]:
#Install
!pip install -q transformers accelerate pandas openpyxl pillow bert_score

In [None]:
#Import Library
import torch, os, glob
import pandas as pd
from PIL import Image
from transformers import (
    InstructBlipProcessor, InstructBlipForConditionalGeneration,
    AutoTokenizer, AutoModelForCausalLM
)
from bert_score import score as bertscore

In [None]:
#Import InternLM
blip_id = "Salesforce/instructblip-vicuna-7b"
llm_id = "internlm/internlm-chat-7b"

In [None]:
blip_processor = InstructBlipProcessor.from_pretrained(blip_id)
blip_model = InstructBlipForConditionalGeneration.from_pretrained(
    blip_id, torch_dtype=torch.float16, device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(llm_id, trust_remote_code=True)
llm_model = AutoModelForCausalLM.from_pretrained(
    llm_id, trust_remote_code=True, torch_dtype=torch.float16
).cuda()
llm_model.eval()

In [None]:
#Upload fewshot
excel_path = "/content/fewshot_data.xlsx"
df = pd.read_excel(excel_path)
answer_cols = ["answer_1", "answer_2", "answer_3", "answer_4", "answer_5", "answer_6"]
image_paths = sorted(glob.glob("/content/*.[jpJP][pnNP]*[gG]"))

In [None]:
#Definition
def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB").resize((384, 384))
    prompt = "You are a helpful assistant. Describe this image in detail."
    inputs = blip_processor(images=image, text=prompt, return_tensors="pt").to("cuda")
    output = blip_model.generate(**inputs)
    caption = blip_processor.batch_decode(output, skip_special_tokens=True)[0].strip()
    return caption.replace(prompt, "").strip()

def generate_llm_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():
        outputs = llm_model.generate(
            **inputs, max_new_tokens=100, do_sample=True,
            temperature=0.7, top_p=0.9
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

In [None]:
#Generate description
captions, llm_responses, answer_lists = [], [], []

for idx, image_path in enumerate(image_paths):
    try:
        caption = generate_caption(image_path)
        captions.append(caption)

        prompt = f"{caption}\nAnswer:"
        response = generate_llm_response(prompt)
        llm_responses.append(response)

        row_answers = [df.loc[idx, col] for col in answer_cols if pd.notna(df.loc[idx, col]) and str(df.loc[idx, col]).strip()]
        answer_lists.append(row_answers if row_answers else ["[EMPTY]"])

        print(f"{os.path.basename(image_path)} 완료")
    except Exception as e:
        print(f"{os.path.basename(image_path)} 실패: {e}")
        captions.append("[ERROR]")
        llm_responses.append(f"[ERROR] {e}")
        answer_lists.append(["[EMPTY]"])

In [None]:
#BERTScore
best_scores = {
    "precision": [], "recall": [], "f1": [], "matched_answer": []
}

for refs, cand in zip(answer_lists, llm_responses):
    P, R, F1 = bertscore(cands=[cand]*len(refs), refs=refs, lang="ko", verbose=False)
    best_idx = F1.argmax().item()
    best_scores["precision"].append(P[best_idx].item())
    best_scores["recall"].append(R[best_idx].item())
    best_scores["f1"].append(F1[best_idx].item())
    best_scores["matched_answer"].append(refs[best_idx])

In [None]:
#Display as DataFrame
results_df = pd.DataFrame({
    "image": [os.path.basename(p) for p in image_paths],
    "caption": captions,
    "llm_response": llm_responses,
    "matched_answer": best_scores["matched_answer"],
    "bert_precision": best_scores["precision"],
    "bert_recall": best_scores["recall"],
    "bert_f1": best_scores["f1"]
})
results_df.to_excel("/content/llm_result_internlm_chat.xlsx", index=False)
print("files.download: /content/llm_result_internlm_chat.xlsx")