In [16]:
%load_ext autoreload
%autoreload 2
import yaml
from pathlib import Path
import sys
import os
import pprint
import torch

current_dir = Path(os.getcwd()).resolve()
if current_dir.name == "LLMPolReasonEval": # uruchomione w Jupyter Lab
    project_root = current_dir
else:  # uruchomione w PyCharm
    project_root = current_dir.parents[2]
print(f"Project root: {project_root}")
sys.path.append(str(project_root / "src"))

from llm_pol_reason_eval.qa_engine.llm_qa_engine import LLMQAEngine
from llm_pol_reason_eval.qa_engine.inference_client import HuggingFaceClient

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Project root: C:\Users\piotr\PycharmProjects\LLMPolReasonEval


In [17]:
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))
else:
    print("CUDA not enabled")

NVIDIA GeForce GTX 1660 Ti


In [18]:
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("Pamięć podręczna CUDA została wyczyszczona.")

Pamięć podręczna CUDA została wyczyszczona.


In [19]:
EXPERIMENT_NAME = "qwen-thinking-matura"

# Wczytaj pliki konfiguracyjne
RUN_CONFIG_FILE = "config/runs/answer_generation_run.yaml"
MODELS_CONFIG_FILE = "config/models.yaml"

with open(project_root / RUN_CONFIG_FILE, 'r', encoding='utf-8') as f:
    run_config = yaml.safe_load(f)['experiments'][EXPERIMENT_NAME]

with open(project_root / MODELS_CONFIG_FILE, 'r', encoding='utf-8') as f:
    models_config = yaml.safe_load(f)

print(f"Uruchamiam eksperyment: {run_config.get('task_name')}")

Uruchamiam eksperyment: Qwen3 1.7B - Odpowiedzi z myśleniem 'krok po kroku'


In [20]:
model_key = run_config['model']
model_cfg = models_config[model_key]

run_overrides = run_config.get("param_overrides", {})
#run_overrides.setdefault("default", {})["max_new_tokens"] = 100

final_gen_params = model_cfg['generation_params'].copy()
final_gen_params.update(run_overrides.get('default', {}))

# Inicjalizacja klienta inferencji - to może potrwać chwilę
inference_client = HuggingFaceClient(
    model_path=model_cfg['path'],
    default_generation_params=final_gen_params
)

engine = LLMQAEngine(
    model_name=model_key,
    model_path=model_cfg['path'],
    inference_client=inference_client
)

HuggingFaceClient: Inicjalizacja modelu Qwen/Qwen3-1.7B na urządzeniu: cuda


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

HuggingFaceClient: Domyślna konfiguracja generowania: GenerationConfig {
  "bos_token_id": 151643,
  "do_sample": true,
  "eos_token_id": [
    151645,
    151643
  ],
  "max_new_tokens": 768,
  "pad_token_id": 151643,
  "temperature": 0.6,
  "top_k": 20,
  "top_p": 0.95
}

Inicjalizacja PromptManager z katalogiem szablonów: C:\Users\piotr\PycharmProjects\LLMPolReasonEval\src\llm_pol_reason_eval\prompts\templates


In [21]:
input_dataset_path = project_root / run_config['input_dataset']
print(f"Wczytuję dataset z: {input_dataset_path}")

# Użyjemy query, żeby nie przetwarzać całego datasetu podczas testów w notatniku
# Możesz usunąć `query`, aby przetworzyć cały plik
target_question_ids = ["MPOP-P1-100-A-2405_zadanie_14", "EPOP-P1-100-2305_zad_1"]
query = lambda q: q.get("question_id") in target_question_ids

per_type_params = run_config.get("param_overrides", {}).get('per_type')

results = engine.generate_answers(
    dataset_filepath=str(input_dataset_path),
    param_overrides=run_overrides,
    query=query
)
# C:\Users\piotr\PycharmProjects\LLMPolReasonEval\src\llm_pol_reason_eval\prompts\templates\qwen3-1-7b
# C:\Users\piotr\PycharmProjects\LLMPolReasonEval\src\llm_pol_reason_eval\prompts\templates\qwen3-1-7b\system.jinja2

Wczytuję dataset z: C:\Users\piotr\PycharmProjects\LLMPolReasonEval\data\dataset\mvp_dataset_2025-06-08T20-42-43Z.json

--- Przetwarzanie batcha | Typ: closed_MTF, Kategoria: matura_język_polski, Pytania: 1 ---
Sprawdzanie szablonu: C:\Users\piotr\PycharmProjects\LLMPolReasonEval\src\llm_pol_reason_eval\prompts\templates\qwen3-1-7b\system.jinja2 dla modelu: qwen3-1-7b
Znaleziono szablon: qwen3-1-7b\system.jinja2 dla modelu: qwen3-1-7b
Używanie szablonu systemowego: qwen3-1-7b/system.jinja2 dla modelu: qwen3-1-7b
Sprawdzanie szablonu: C:\Users\piotr\PycharmProjects\LLMPolReasonEval\src\llm_pol_reason_eval\prompts\templates\qwen3-1-7b\base_question_prompt.jinja2 dla modelu: qwen3-1-7b
Znaleziono szablon: qwen3-1-7b\base_question_prompt.jinja2 dla modelu: qwen3-1-7b
Używanie szablonu użytkownika: qwen3-1-7b/base_question_prompt.jinja2 dla modelu: qwen3-1-7b
Próba załadowania szablonu: qwen3-1-7b/system.jinja2
Loader szuka w katalogu: ['C:\\Users\\piotr\\PycharmProjects\\LLMPolReasonEval\\

In [22]:
print("--- WYGENEROWANE ODPOWIEDZI ---")
pprint.pprint(results)

# Zapisz wyniki
output_dir = project_root / run_config['output_dir']
output_path = output_dir / f"answers_notebook_{EXPERIMENT_NAME}.json"
engine.save_results(output_filepath=str(output_path))

--- WYGENEROWANE ODPOWIEDZI ---
[{'answer_id': 'ans_3b6fba96-89d7-4b49-9bfb-f9007acbbe83',
  'answer_text': '<think>\n'
                 "Okay, let's tackle this question. The user wants me to "
                 'evaluate three statements about Józef Tischner\'s "Dialog" '
                 'based on the provided text. \n'
                 '\n'
                 'First, I need to recall the key points from the text. The '
                 'main ideas are about the importance of courage in starting a '
                 'dialogue, the necessity of mutual understanding, and the '
                 'role of empathy in a genuine dialogue.\n'
                 '\n'
                 'Statement 1: "Gotowość do dialogu wymaga odwagi." The text '
                 'mentions that to start a dialogue, you have to "wychylić '
                 'się, przekroczyć próg," which is a metaphor for courage. So '
                 'this should be true (P).\n'
                 '\n'
                 'Statement 2: "