In [1]:
%load_ext autoreload
%autoreload 2
import yaml
from pathlib import Path
import sys
import os
import pprint
import torch

current_dir = Path(os.getcwd()).resolve()
print(f"Current dir: {current_dir.name}")

if current_dir.name == "LLMPolReasonEval": # uruchomione w Jupyter Lab
    project_root = current_dir
elif current_dir.name == "content": # uruchomione w Google Colab
    project_root = current_dir / "llm_pol_reason_eval"
else:  # uruchomione w PyCharm
    project_root = current_dir.parents[2]
print(f"Project root: {project_root}")
src_dir = project_root / "src"
print(f"Checking if src directory exists: {src_dir.exists()}")
sys.path.append(str(src_dir))

from llm_pol_reason_eval.qa_engine.llm_qa_engine import LLMQAEngine
from llm_pol_reason_eval.qa_engine.inference_client import HuggingFaceClient

Current dir: environment_test_local_cuda
Project root: C:\Users\piotr\PycharmProjects\LLMPolReasonEval
Checking if src directory exists: True


In [2]:
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))
else:
    print("CUDA not enabled")

NVIDIA GeForce GTX 1660 Ti


In [3]:
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("Pamięć podręczna CUDA została wyczyszczona.")

Pamięć podręczna CUDA została wyczyszczona.


In [7]:
EXPERIMENT_NAME = "qwen-small-th-fs-cot-matura" # lub "bielik-small-precise-matura"
RUN_CONFIG_FILE = "config/runs/qa_polski_matura_mvp_dataset.yaml"
MODELS_CONFIG_FILE = "config/models.yaml"

with open(project_root / RUN_CONFIG_FILE, 'r', encoding='utf-8') as f:
    run_config = yaml.safe_load(f)['experiments'][EXPERIMENT_NAME]

with open(project_root / MODELS_CONFIG_FILE, 'r', encoding='utf-8') as f:
    models_config = yaml.safe_load(f)

print(f"Uruchamiam eksperyment: {run_config.get('task_name')}")

Uruchamiam eksperyment: Qwen3 1.7B - Z myśleniem, Few-Shot & CoT - Matura


In [8]:
model_key = run_config['model']
model_cfg = models_config[model_key]

run_overrides = run_config.get("param_overrides", {})
final_gen_params = model_cfg['generation_params'].copy()
final_gen_params.update(run_overrides.get('default', {}))

inference_client = HuggingFaceClient(
    model_path=model_cfg['path'],
    model_config=model_cfg,
    default_generation_params=final_gen_params
)

engine = LLMQAEngine(
    model_name=model_key,
    model_path=model_cfg['path'],
    inference_client=inference_client
)

HuggingFaceClient: Inicjalizacja modelu Qwen/Qwen3-1.7B na urządzeniu: cuda


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

HuggingFaceClient: Domyślna konfiguracja generowania: GenerationConfig {
  "bos_token_id": 151643,
  "do_sample": true,
  "eos_token_id": [
    151645,
    151643
  ],
  "max_new_tokens": 768,
  "pad_token_id": 151643,
  "temperature": 0.6,
  "top_k": 20,
  "top_p": 0.95
}



In [9]:
input_dataset_path = project_root / run_config['input_dataset']
output_dir = project_root / run_config['output_dir']
iterations = run_config.get('iterations', 1)

# Użyjemy query, żeby nie przetwarzać całego datasetu podczas testów w notatniku
# target_question_ids = ["MPOP-P1-100-A-2405_zadanie_14", "EPOP-P1-100-2305_zad_1"]
# query = lambda q: q.get("question_id") in target_question_ids
query = None

for i in range(iterations):
    run_output_dir = output_dir / f"run_{i+1}"
    run_output_dir.mkdir(parents=True, exist_ok=True)
    output_path = run_output_dir / f"answers_{EXPERIMENT_NAME}.json"

    results = engine.generate_answers(
        dataset_filepath=str(input_dataset_path),
        output_filepath=str(output_path),
        model_cfg={'name': model_key, 'family': model_cfg.get('family')},
        prompt_composition=run_config.get("prompt_composition", {}),
        param_overrides=run_config.get("param_overrides"),
        query=query,
        batch_size=10
    )

    print("--- WYGENEROWANE ODPOWIEDZI ---")
    pprint.pprint(results)


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Ścieżka logów: C:\Users\piotr\PycharmProjects\LLMPolReasonEval\results\qwen-small-th-fs-cot-matura-mvp\run_1\logs\mvp_dataset_2025-06-08T20-42-43Z_qwen3-1-7b_2025-06-16T01-47-46Z.log
2025-06-16T01:47:46.623470+00:00 INFO: Logger uruchomiony.
2025-06-16T01:47:46.624637+00:00 INFO: Ładowanie datasetu z: C:\Users\piotr\PycharmProjects\LLMPolReasonEval\data\dataset\mvp_dataset_2025-06-08T20-42-43Z.json
2025-06-16T01:47:46.628779+00:00 INFO: Załadowano 63 pytań.
2025-06-16T01:47:46.628779+00:00 INFO: Tworzenie iteratora z rozmiarem batcha: 10.
2025-06-16T01:47:46.629801+00:00 INFO: Przetwarzanie batcha 1 z 10 pytaniami.
2025-06-16T01:47:46.631779+00:00 INFO: --- Batch 1: tryb wsadowy ---
2025-06-16T01:47:46.670067+00:00 INFO: Wysyłanie 10 promptów do modelu z parametrami: {}


NameError: name 're' is not defined