# EyesMol model의 inference 코드( 0.3 epochs checkpoint 테스트 - 수정예정 )

In [13]:
import sys
import os
import torch
import json
from PIL import Image
import time
from pathlib import Path
from tqdm import tqdm
from qwen_vl_utils import process_vision_info

In [14]:
sys.path.append('/workspace/EyesMolProject/Qwen2-VL-Finetune')
from src.utils import load_pretrained_model, get_model_name_from_path, disable_torch_init
print("라이브러리 로드 완료!")

라이브러리 로드 완료!


In [15]:
# LoRA 모델 병합
lora_path = '/workspace/checkpoint-18000'
save_path = '/workspace/chebkpoint_merge'  # 기존에 있는 폴더 사용
model_base = '/workspace/Qwen2.5-VL-3B-Instruct'

# f-string을 사용하여 변수를 명령어에 전달
!python /workspace/EyesMolProject/Qwen2-VL-Finetune/src/merge_lora_weights.py --model-path {lora_path} --model-base {model_base} --save-model-path {save_path}

[2025-08-28 08:16:00,785] [INFO] [real_accelerator.py:239:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Loading Qwen2-VL from base model...
Loading checkpoint shards: 100%|██████████████████| 2/2 [00:02<00:00,  1.19s/it]
Loading additional Qwen2-VL weights...
Loading LoRA weights...
Merging LoRA weights...
Model Loaded!!!


In [16]:
# 모델 경로 설정
MODEL_PATH = save_path
MODEL_BASE = model_base  # 이제 위 셀에서 정의된 변수 사용 가능
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 생성 파라미터
MAX_NEW_TOKENS = 512
TEMPERATURE = 0
REPETITION_PENALTY = 1.0

print(f"모델 경로: {MODEL_PATH}")
print(f"베이스 모델: {MODEL_BASE}")
print(f"디바이스: {DEVICE}")

모델 경로: /workspace/chebkpoint_merge
베이스 모델: /workspace/Qwen2.5-VL-3B-Instruct
디바이스: cuda


In [17]:
# 모델 로드
disable_torch_init()
model_name = get_model_name_from_path(MODEL_PATH)
processor, model = load_pretrained_model(
    model_base=MODEL_BASE,
    model_path=MODEL_PATH,
    device_map=DEVICE,
    model_name=model_name,
    load_4bit=False,
    load_8bit=False,
    device=DEVICE,
    use_flash_attn=True,
    use_fast=True
)

print("모델 로드 완료!")

Loading model from /workspace/chebkpoint_merge as a standard model. Adapter files were not found, so it can't be merged


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

모델 로드 완료!


In [23]:
def generate_response(question, image_path=None):
    start_time = time.time()
    conversation = []
    user_content = []

    if image_path:
        user_content.append({"type": "image", "image": image_path})
    if question:
        user_content.append({"type": "text", "text": question})

    conversation.append({"role": "user", "content": user_content})
    prompt = processor.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
    image_inputs, video_inputs = process_vision_info(conversation)

    inputs = processor(
        text=[prompt], 
        images=image_inputs, 
        videos=video_inputs, 
        padding=True, 
        return_tensors="pt"
    ).to(DEVICE)

    generation_args = {
        "max_new_tokens": MAX_NEW_TOKENS,
        "temperature": TEMPERATURE,
        "do_sample": True if TEMPERATURE > 0 else False,
        "repetition_penalty": REPETITION_PENALTY,
        "eos_token_id": processor.tokenizer.eos_token_id,
    }

    with torch.no_grad():
        output_ids = model.generate(**inputs, **generation_args)
        
    input_token_len = inputs['input_ids'].shape[1]
    response_ids = output_ids[:, input_token_len:]
    response = processor.tokenizer.decode(response_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
    inference_time = time.time() - start_time
    return response, inference_time

In [26]:
image_path = "/workspace/8.png"
question_with_image = "이 분자에 대해서 아주 자세하게 설명해줘"
response, inference_time = generate_response(question_with_image, image_path=image_path)
print(f'모델 \n 응답 : {response}')
print(f'소요시간 (초) : {inference_time}')

모델 
 응답 : 이 분자체는 1,2,3,4,5,6-hexahydro-1,2,3-trimethyl-1H-inden-1-ol입니다. 이는 1,2,3,4,5,6-hexahydro-1H-inden-1-ol의 산화산물로, 1,2,3,4,5,6-hexahydro-1H-inden-1-ol의 산화산물로 간주됩니다.
소요시간 (초) : 6.679670333862305
