In [None]:
from datasets import load_dataset

val_ds = load_dataset("HuggingFaceM4/ChartQA", split="val")
val_ds

In [None]:
def show_sample(ds, index: int = 0):
    query = ds[index]["query"]
    print(query)
    label = ds[index]["label"]
    print(label)
    image = ds[index]["image"]
    return image
show_sample(val_ds)

In [None]:
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor

processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
# Load the model in half-precision on the available device(s)
model = Qwen2VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
)
model

In [None]:
def chat(model, processor, conversation, image, verbose = True):
    # Preprocess the inputs
    text_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
    if verbose:
        print(text_prompt)
    inputs = processor(
        text=[text_prompt], images=[image], padding=True, return_tensors="pt"
    )
    inputs = inputs.to(model.device)
    if verbose:
        print([f"{key}: {value.shape}" for key, value in inputs.items()])
    # Inference: Generation of the output
    output_ids = model.generate(**inputs, max_new_tokens=128)
    generated_ids = [
        output_ids[len(input_ids) :]
        for input_ids, output_ids in zip(inputs.input_ids, output_ids)
    ]
    output_text = processor.batch_decode(
        generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
    )
    return output_text[0]

In [None]:
def make_conversation(sample):
    sample["conversation"] = [
        {"role": "user", "content": [{"type": "image"},{"type": "text",  "text": sample["query"]}]},
        # {"role": "assistant", "content": [{"type": "text",  "text": sample["label"][0]}]}
    ]
    return sample
val_ds = val_ds.map(make_conversation, num_proc=16)
val_ds

In [None]:
index = 2
show_sample(val_ds, index)

In [None]:
result = chat(model, processor, val_ds[index]["conversation"], val_ds[index]["image"])
print(result)

In [None]:
val_ds.select(range(5))

In [None]:
def eval(model, processor, ds, result_path):
    model.eval()
    results = []
    for i, sample in enumerate(ds):
        output = chat(model, processor, sample["conversation"], sample["image"], verbose=False)
        results.append(output)
        print(f"Iteration {i}\n{output}\n\n{'#'*120}")
    ds = ds.add_column("results", results)
    ds = ds.remove_columns("image")
    with open(result_path, "wb") as f:
        ds.to_json(f)
    return ds
eval_ds = eval(model,processor ,val_ds.select(range(2)), "./eval_result.jsonl")

In [None]:
import yaml
with open("./config.yaml") as f:
    yaml_dict = yaml.safe_load(f)
yaml_dict

In [None]:
from pydantic import BaseModel

class EvalConfig(BaseModel):
    dataset: str
    split: str
    model: str
    num_proc: int
    output_path: str
    torch_dtype:str
    device_map:str
    max_new_tokens: int

eval_config = EvalConfig(**yaml_dict)
eval_config