In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
from pathlib import Path
from dotenv import load_dotenv
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

In [None]:
project_root = Path(os.getcwd()).resolve()
print(f"Project root: {project_root}")
# load_dotenv(os.path.join(project_root,'.env'))
env_path = project_root.parents[2] / '.env'
print(f"Env path: {env_path}")
load_dotenv(env_path)

In [None]:
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
    raise ValueError("Brak tokena Hugging Face. Dodaj HF_TOKEN do pliku .env.")

In [None]:
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))
else:
    print("CUDA not enabled")

In [None]:
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("Pamięć podręczna CUDA została wyczyszczona.")

In [None]:
%%time
model_name = "speakleash/Bielik-1.5B-v3.0-Instruct"
print(f"Pobieranie modelu {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    token=hf_token,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(f"Model załadowany na urządzenie: {device} z typem danych: {model.dtype}")


In [None]:
%%time
prompt = "Jakie są główne cechy modelu Bielik 1.5B?"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(inputs["input_ids"], max_length=100)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Odpowiedź modelu:")
print(response)

In [None]:
%%time
prompt = "Kim jesteś?"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(inputs["input_ids"], max_length=100)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Odpowiedź modelu:")
print(response)

In [None]:
from os import getcwd
getcwd()

In [None]:
import sys
sys.path.append(r"C:\Users\piotr\PycharmProjects\LLMPolReasonEval\src")
from llm_pol_reason_eval.question_processing.dataset_processor import DatasetProcessor
json_file_path = r"/data/dataset_raw/matury/MPOP-P1-100-A-2405-gemini25pro-2025-06-03T22-51-00Z.json"
dataset_processor = DatasetProcessor()
dataset_processor.add_data_from_json_file(json_file_path)

In [None]:
json_data_str = dataset_processor.get_all_data_as_json_string(sort_contexts_by_key=True, sort_questions_by_key=True)

In [None]:
import json
data_dict = json.loads(json_data_str)
len(data_dict.get("questions"))

In [None]:
data_dict.get("questions")[1]

In [None]:
question_text = data_dict.get("questions")[1].get("question_text")
context_id = data_dict.get("questions")[1].get("context_ids")[0]
context_text = all(data_dict.get("contexts")
