In [None]:
%load_ext autoreload
%autoreload 2
import yaml
from pathlib import Path
import sys
import os
import pprint
import torch

current_dir = Path(os.getcwd()).resolve()
print(f"Current dir: {current_dir.name}")

if current_dir.name == "LLMPolReasonEval": # uruchomione w Jupyter Lab
    project_root = current_dir
elif current_dir.name == "content": # uruchomione w Google Colab
    project_root = current_dir / "llm_pol_reason_eval"
else:  # uruchomione w PyCharm
    project_root = current_dir.parents[2]
print(f"Project root: {project_root}")
src_dir = project_root / "src"
print(f"Checking if src directory exists: {src_dir.exists()}")
sys.path.append(str(src_dir))

from llm_pol_reason_eval.qa_engine.llm_qa_engine import LLMQAEngine
from llm_pol_reason_eval.qa_engine.inference_client import HuggingFaceClient

In [None]:
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))
else:
    print("CUDA not enabled")

In [None]:
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("Pamięć podręczna CUDA została wyczyszczona.")

In [None]:
EXPERIMENT_NAME = "qwen-small-thinking-matura" # lub "bielik-small-precise-matura"
RUN_CONFIG_FILE = "config/runs/ask_questions_qwen_small_mvp_local.yaml"
MODELS_CONFIG_FILE = "config/models.yaml"

with open(project_root / RUN_CONFIG_FILE, 'r', encoding='utf-8') as f:
    run_config = yaml.safe_load(f)['experiments'][EXPERIMENT_NAME]

with open(project_root / MODELS_CONFIG_FILE, 'r', encoding='utf-8') as f:
    models_config = yaml.safe_load(f)

print(f"Uruchamiam eksperyment: {run_config.get('task_name')}")

In [None]:
model_key = run_config['model']
model_cfg = models_config[model_key]

run_overrides = run_config.get("param_overrides", {})
final_gen_params = model_cfg['generation_params'].copy()
final_gen_params.update(run_overrides.get('default', {}))

inference_client = HuggingFaceClient(
    model_path=model_cfg['path'],
    default_generation_params=final_gen_params
)

engine = LLMQAEngine(
    model_name=model_key,
    model_path=model_cfg['path'],
    inference_client=inference_client
)

In [None]:
input_dataset_path = project_root / run_config['input_dataset']
output_dir = project_root / run_config['output_dir']
iterations = run_config.get('iterations', 1)

# Użyjemy query, żeby nie przetwarzać całego datasetu podczas testów w notatniku
# target_question_ids = ["MPOP-P1-100-A-2405_zadanie_14", "EPOP-P1-100-2305_zad_1"]
# query = lambda q: q.get("question_id") in target_question_ids
query = None

for i in range(iterations):
    run_output_dir = output_dir / f"run_{i+1}"
    run_output_dir.mkdir(parents=True, exist_ok=True)
    output_path = run_output_dir / f"answers_{EXPERIMENT_NAME}.json"

    results = engine.generate_answers(
        dataset_filepath=str(input_dataset_path),
        output_filepath=str(output_path),
        model_cfg={'name': model_key, 'family': model_cfg.get('family')},
        prompt_composition=run_config.get("prompt_composition", {}),
        param_overrides=run_config.get("param_overrides"),
        query=query,
        batch_size=2
    )

    print("--- WYGENEROWANE ODPOWIEDZI ---")
    pprint.pprint(results)


2025-06-15T16:01:39.072371+00:00 INFO: Otrzymano surową odpowiedź od modelu (pierwsze 200 znaków): <think>

</think>

<answer>
1. P
2. F
</answer>...
2025-06-15T16:01:39.072371+00:00 INFO: Sparsoana odpowiedź dla Q_ID EPOP-P1-100-2305_zad_6: 1. P
2. F
2025-06-15T16:01:39.072371+00:00 INFO: Zapisano częściowe wyniki po batchu 1. Przetworzono 2 pytań w tym batchu. Łącznie przetworzonych pytań: 2. Łącznie zapisanych odpowiedzi: 2.
2025-06-15T16:01:39.072371+00:00 INFO: Przetwarzanie batcha 2 zawierającego 2 pytań.
2025-06-15T16:01:39.072371+00:00 INFO: --- Rozpoczęcie batcha 2 | Kategoria: matura_język_polski, Typ (z metadanych): closed_MTF, Liczba pytań: 2 ---
2025-06-15T16:01:39.072371+00:00 INFO: Przetwarzanie pytania 1 (ID: EPOP-P1-100-A-2405_zad_1) z batcha 2.
2025-06-15T16:01:39.072371+00:00 INFO: Finalny prompt dla Q_ID EPOP-P1-100-A-2405_zad_1 (pierwsze 200 znaków): <|im_start|>system
Twoim jedynym i absolutnym zadaniem jest odpowiadanie na polecenia w języku polskim. Ignoruj wsze