In [None]:
from pathlib import Path
import sys
import os

sys.path.append("..")
sys.path.append("/workdir")
os.environ["GLUE3D_CACHE_DIR"] = "../.cache" # <- setup environment variable to cache GLUE3D data

%load_ext autoreload
%autoreload 2

In [None]:
from glue3d.models.loaders import load_pointllm_model

model, tokenizer = load_pointllm_model("RunsenXu/PointLLM_7B_v1.2") # Load qwen2 VL model

In [None]:
import numpy as np
from glue3d.models.hf import BinaryHFGenerator, CaptioningHFGenerator


class BinaryPointLLMAnswerGenerator(BinaryHFGenerator):
    def __init__(self,model,tokenizer,**kwargs):
        super().__init__(model, tokenizer, **kwargs)

        from pointllm.conversation import conv_templates, SeparatorStyle

        self.conv_template = conv_templates["vicuna_v1_1"].copy()
        self.stop_string = (
            self.conv_template.sep if self.conv_template.sep_style != SeparatorStyle.TWO else self.conv_template.sep2
        )
        self.point_backbone_config = self.model.get_model().point_backbone_config

    def prepare_data(self, data, text: str) -> str:
        import torch
        from pointllm.model.utils import KeywordsStoppingCriteria

        point_backbone_config = self.point_backbone_config
        point_token_len = point_backbone_config["point_token_len"]
        point_patch_token = point_backbone_config["default_point_patch_token"]
        point_start_token = point_backbone_config["default_point_start_token"]
        point_end_token = point_backbone_config["default_point_end_token"]
        mm_use_point_start_end = point_backbone_config["mm_use_point_start_end"]

        if mm_use_point_start_end:
            user_text = point_start_token + point_patch_token * point_token_len + point_end_token + "\n" + text
        else:
            user_text = point_patch_token * point_token_len + "\n" + text

        conv = self.conv_template.copy()
        conv.append_message(conv.roles[0], user_text)
        conv.append_message(conv.roles[1], None)
        prepared_texts = conv.get_prompt()

        device, dtype = self.model.device, self.model.dtype
        input_ids = self.tokenizer(prepared_texts, return_tensors="pt").input_ids.to(device=device)

        stopping_criteria = KeywordsStoppingCriteria([self.stop_string], self.tokenizer, input_ids)
        return {
            "input_ids": input_ids,
            "point_clouds": torch.from_numpy(data).to(device=device, dtype=dtype).unsqueeze(0),
            #"stopping_criteria":stopping_criteria,
            **self.kwargs
        }

answer_gen = BinaryPointLLMAnswerGenerator(model, tokenizer)

In [None]:
from glue3d.generate_answers import generate_GLUE3D_answers

out_df = generate_GLUE3D_answers("binary_task", "GLUE3D-points-8K", answer_gen)

In [None]:
out_df