In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
from pathlib import Path
from dotenv import load_dotenv
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

In [None]:
project_root = Path(os.getcwd()).resolve()
print(f"Project root: {project_root}")
env_path = project_root.parents[2] / '.env'
print(f"Env path: {env_path}")
# load_dotenv(os.path.join(project_root,'.env'))
load_dotenv(env_path)

In [None]:
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
    raise ValueError("Brak tokena Hugging Face. Dodaj HF_TOKEN do pliku .env.")

In [None]:
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))
    torch.cuda.empty_cache()
    print("Pamięć podręczna CUDA została wyczyszczona.")

In [None]:
%%time
model_name = "speakleash/Bielik-4.5B-v3.0-Instruct"
print(f"Pobieranie modelu {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    token=hf_token,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(f"Model załadowany na urządzenie: {device} z typem danych: {model.dtype}")

In [None]:
%%time
prompt = "Jakie są główne cechy modelu Bielik 4.5B?"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(inputs["input_ids"], max_length=100)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Odpowiedź modelu:")
print(response)