In [None]:
import sys
import os

sys.path.append("..")
os.environ["GLUE3D_CACHE_DIR"] = "../.cache" # <- setup environment variable to GLUE3D cache data

%load_ext autoreload
%autoreload 2

In [None]:
from glue3d.models.loaders import load_qwenVL_model

# Load qwen2 VL model
model, processor = load_qwenVL_model()

In [None]:
from glue3d.models.hf import BinaryHFGenerator, MultichoiceHFGenerator, CaptioningHFGenerator

def prepare_data(processor, data, text):
    from qwen_vl_utils import process_vision_info

    messages = [{"role": "user","content": [{"type": "image", "image": data}, {"type": "text", "text": text}]}]
    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    image_inputs, video_inputs = process_vision_info(messages)
    return processor(
        text=[text],
        images=image_inputs,
        videos=video_inputs,
        padding=True,
        return_tensors="pt",
    )

# Create answer generator for binary task
class BinaryQwenVLAnswerGenerator(BinaryHFGenerator):
    def __init__(self, model, processor):
        super().__init__(model, processor.tokenizer)
        self.processor = processor

    def prepare_data(self, data, text):
        return prepare_data(self.processor, data, text).to(self.model.device)

# Create answer generator for multiplechoice task
class MultichoiceQwenVLAnswerGenerator(MultichoiceHFGenerator):
    def __init__(self, model, processor):
        super().__init__(model, processor.tokenizer)
        self.processor = processor

    def prepare_data(self, data, text):
        return prepare_data(self.processor, data, text).to(self.model.device)


# Create answer generator for captioning task
class CaptioningQwenVLAnswerGenerator(CaptioningHFGenerator):
    def __init__(self, model, processor):
        super().__init__(model, processor.tokenizer)
        self.processor = processor

    def prepare_data(self, data, text):
        return prepare_data(self.processor, data, text).to(self.model.device)


binary_generator = BinaryQwenVLAnswerGenerator(model, processor)
multichoice_generator = MultichoiceQwenVLAnswerGenerator(model, processor)
caption_generator = CaptioningQwenVLAnswerGenerator(model, processor)

In [None]:
from glue3d.generate_answers import generate_GLUE3D_answers

# Run all GLUE3D tasks
binary_answers = generate_GLUE3D_answers("binary_task", "GLUE3D-image", binary_generator)
multichoice_answers = generate_GLUE3D_answers("multiplechoice_task", "GLUE3D-image", multichoice_generator)
captioning_answers = generate_GLUE3D_answers("captioning_task", "GLUE3D-image", caption_generator)