# Model evaluation

## Goal

Has the model learned to draw?

## Imports

In [None]:
from vllm import LLM, SamplingParams
from vllm.lora.request import LoRARequest
from transformers import AutoTokenizer, AutoConfig

from arc25.training_tasks import *
from arc25.encoders import create_grid_encoder
from arc25.prompting import create_prompt_from_task, pretty_print_prompt

## Load model

In [None]:
base_model_path = '/home/gbarbadillo/models/Qwen2.5-Coder-0.5B-Instruct'
base_model_path = '/mnt/hdd0/Kaggle/arc25/trainings/20250430_first_trainings/steps_6400/model-6400'
# lora_path = '/mnt/hdd0/Kaggle/arc25/trainings/20250430_first_trainings/steps_6400/checkpoint-6400'
# lora_path = '/mnt/hdd0/Kaggle/arc25/trainings/20250430_first_trainings/random_seed_5_no_dora/checkpoint-200'
lora_path = '/mnt/hdd0/Kaggle/arc25/trainings/20250430_first_trainings/random_seed_4_no_dora_rank16/checkpoint-50'

In [None]:
llm = LLM(
    model=base_model_path,
    enable_lora=True,
    trust_remote_code=True,
    dtype='auto',
    tensor_parallel_size=1, # to use 2 gpus
    max_model_len=10240,
    disable_log_stats=True,
    max_num_seqs=255, # default is supposed to be 256 I have used it to solve some weird illegal memory error
    enforce_eager=True,
)

In [None]:
lora_request = LoRARequest(lora_name='lora', lora_int_id=1, lora_path=lora_path)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(lora_path)

## Inference

In [None]:
task_generator = RandomDrawingTaskOnEmptyImg()
prompt_version = 'code-from-examples-v3'
grid_encoder = create_grid_encoder('GridShapeEncoder(RowNumberEncoder(MinimalGridEncoder()))')

In [None]:
sampling_params = SamplingParams(n=1, temperature=1.0, max_tokens=1024, logprobs=0, skip_special_tokens=False)

In [None]:
task = task_generator.sample()
prompt = create_prompt_from_task(
    task, prompt_version=prompt_version, grid_encoder=grid_encoder, tokenizer=tokenizer, is_train_prompt=False)

In [None]:
pretty_print_prompt(prompt, default_color='white')

In [None]:
task = task_generator.sample()
prompt = create_prompt_from_task(
    task, prompt_version=prompt_version, grid_encoder=grid_encoder, tokenizer=tokenizer, is_train_prompt=True)
pretty_print_prompt(prompt, default_color='white')
prompt = create_prompt_from_task(
    task, prompt_version=prompt_version, grid_encoder=grid_encoder, tokenizer=tokenizer, is_train_prompt=False)
outputs = llm.generate([prompt], sampling_params, use_tqdm=True)
print(outputs[0].outputs[0].text)

In [None]:
#outputs = llm.generate([prompt], sampling_params, use_tqdm=True, lora_request=lora_request)