In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
!pip install transformers torch accelerate bitsandbytes sentencepiece --quiet

In [None]:
import os
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

In [None]:
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))
else:
    print("CUDA not enabled")

In [None]:
print(os.getcwd())

In [None]:
model_id = "speakleash/Bielik-11B-v2.3-Instruct"
# model_id = "speakleash/Bielik-1.5B-v3.0-Instruct"

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
%%time
print(f"Ładowanie tokenizera dla modelu: {model_id}")
tokenizer = AutoTokenizer.from_pretrained(model_id)
print("Tokenizer załadowany.")

In [None]:
%%time
print(f"Ładowanie modelu: {model_id} z kwantyzacją...")
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
print("Model załadowany.")

In [None]:
if tokenizer.pad_token_id is None:
    print("Ustawianie pad_token_id na eos_token_id.")
    tokenizer.pad_token_id = tokenizer.eos_token_id
    model.config.pad_token_id = model.config.eos_token_id

In [None]:
def generate_bielik_response(prompt_text, max_new_tokens=256, temperature=0.7, top_k=50, top_p=0.95):
    if tokenizer.chat_template:
        messages = [{"role": "user", "content": prompt_text}]
        formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    else:
        print("OSTRZEŻENIE: Tokenizer nie ma zdefiniowanego szablonu czatu. Używam formatu manualnego.")
        formatted_prompt = f"<|im_start|>user\n{prompt_text}<|im_end|>\n<|im_start|>assistant\n"

    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    print(f"Generowanie odpowiedzi (temp={temperature}, top_k={top_k}, top_p={top_p})...")

    generate_kwargs = {
        "input_ids": inputs["input_ids"],
        "attention_mask": inputs["attention_mask"],
        "max_new_tokens": max_new_tokens,
        "pad_token_id": tokenizer.eos_token_id,
        "eos_token_id": tokenizer.eos_token_id
    }
    if temperature is not None and temperature > 0.0:
        generate_kwargs["temperature"] = temperature
        generate_kwargs["do_sample"] = True
        if top_k is not None and top_k > 0: generate_kwargs["top_k"] = top_k
        if top_p is not None and top_p < 1.0: generate_kwargs["top_p"] = top_p
    else:
        generate_kwargs["do_sample"] = False

    outputs = model.generate(**generate_kwargs)
    response_text = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
    return response_text.strip()

In [None]:
%%time
user_question = "Jaka jest najwyższa góra w Polsce? Odpowiedz 1 słowem"
print(f"\nZadaję pytanie: {user_question}")
model_answer = generate_bielik_response(user_question)
print(f"\nOdpowiedź modelu Bielik:")
print(model_answer)

In [None]:
%%time
user_question_2 = "Napisz krótki wiersz o wiośnie."
print(f"\nZadaję pytanie: {user_question_2}")
model_answer_2 = generate_bielik_response(user_question_2, max_new_tokens=200)
print(f"\nOdpowiedź modelu Bielik:")
print(model_answer_2)

In [None]:
user_question_3 = "Podaj definicję fotosyntezy."
print(f"\nZadaję pytanie: {user_question_3}")
model_answer_3 = generate_bielik_response(user_question_3, temperature=0.0)
print(f"\nOdpowiedź modelu Bielik (greedy):")
print(model_answer_3)

In [None]:
output_dir = "/kaggle/working/"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
output_file_path = os.path.join(output_dir, "bielik_sample_output.txt")
with open(output_file_path, "w", encoding="utf-8") as f:
    f.write(f"Pytanie 1: {user_question}\n")
    f.write(f"Odpowiedź 1 (temp=0.7): {model_answer}\n\n")
    f.write(f"Pytanie 2: {user_question_2}\n")
    f.write(f"Odpowiedź 2 (temp=0.85): {model_answer_2}\n\n")
    f.write(f"Pytanie 3: {user_question_3}\n")
    f.write(f"Odpowiedź 3 (greedy, temp=0.0): {model_answer_3}\n")
print(f"\nPrzykładowe wyniki zapisano w: {output_file_path}")
print("Działanie notatnika zakończone pomyślnie.")