In [None]:
import os
import io
import json
import time
import pickle
import base64
import random
import numpy as np
from tqdm import tqdm
from pathlib import Path
from PIL import Image
from vllm import LLM, SamplingParams
import re

In [None]:
def seed_everything(seed):
    global SEED
    SEED = seed
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(322)

In [None]:
def parse(text: str) -> str:
    pattern = r"FINAL\s*ANSWER\s*:\s*([A-Z]+)"
    match = re.search(pattern, text.strip(), re.IGNORECASE)
    if match:
        return match.group(1).upper()
    return ""

**Qwen 4B:**

1) Qwen/Qwen3-4B-Instruct-2507 (8 GB)
2) Qwen/Qwen3-4B-Thinking-2507 (8 GB)

**Qwen 8B:**

1) Qwen/Qwen3-8B (16 GB)

**For math:**

1) nvidia/OpenMath-Nemotron-7B (15 GB)

In [None]:
MODEL_PATH = ""

In [None]:
llm = LLM(
    model=MODEL_PATH,
    trust_remote_code=True,
    tensor_parallel_size=1,
    gpu_memory_utilization=0.9,
    max_model_len=4096,
    seed=SEED,
    # allowed_local_media_path=os.path.abspath('images'), # FOR VLM
)

In [None]:
sampling_params = SamplingParams(
    temperature=0.4,
    top_p=0.9,
    max_tokens=3600,
)

In [None]:
with open('input.pickle', "rb") as input_file:
    model_input = pickle.load(input_file)

In [None]:
BATCH_SIZE = 8
model_output = []

prompts = []
rids = []

os.makedirs("images", exist_ok=True)

for row in tqdm(model_input):
    rid = row["rid"]
    question = row["question"]
    image = Image.open(io.BytesIO(row["image"]))
    image_path = f"images/{rid}.png"
    image.save(image_path)

    prompt = f"""
Choose all the correct options:\n{question}\n\n
Write a detailed explanation and the correct answer as a sequence of letters you chose and format it as 
FINAL ANSWER: <Answer> (example: FINAL ANSWER: AD). Rely only on the information present in the picture.
Check every fact very carefully and NEVER invent anything new or not related to the picture.
"""

    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                # {"type": "image_url", "image_url": {"url": f"file://{os.path.abspath(image_path)}"}}, # FOR VLM
            ],
        }
    ]

    prompts.append(messages)
    rids.append(rid)

In [None]:
for i in tqdm(range(0, len(prompts), BATCH_SIZE), desc="Inference"):
    batch_prompts = prompts[i:i + BATCH_SIZE]
    batch_rids = rids[i:i + BATCH_SIZE]

    outputs = llm.chat(
        messages=batch_prompts,
        sampling_params=sampling_params,
    )

    for rid, output in zip(batch_rids, outputs):
        text = output.outputs[0].text
        answer_parsed = parse(text)
        model_output.append({"rid": rid, "answer": answer_parsed if answer_parsed else "B"})

In [None]:
with open('output.json', 'w') as output_file:
    json.dump(model_output, output_file, ensure_ascii=False)