In [None]:
from model import Seq2SeqLLM, TransformersEncoderLLM, TransformerDecoderLLM
from transformers import AutoTokenizer
from torch.nn import functional as F
import torch
import gradio as gr

# -------------------------------
# 0️⃣ Cihaz
# -------------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"

# -------------------------------
# 1️⃣ Tokenizer yükle
# -------------------------------
tokenizer_name = "t5-small"  # Veya kullandığın tokenizer
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# -------------------------------
# 2️⃣ Modeli oluştur
# -------------------------------
vocab_size = tokenizer.vocab_size
embed_dim = 512
num_layers = 8      # checkpoint ile uyumlu
num_heads = 8
expansion = 6       # checkpoint ile uyumlu
max_len = 256
dp = 0.1
drop_path = 0.1
use_swiglu = False

encoder = TransformersEncoderLLM(
    vocab_size=vocab_size,
    embed_dim=embed_dim,
    num_layers=num_layers,
    num_heads=num_heads,
    dp=dp,
    drop_path=drop_path,
    expansion=expansion,
    max_len=max_len,
    use_swiglu=use_swiglu
)

decoder = TransformerDecoderLLM(
    vocab_size=vocab_size,
    embed_dim=embed_dim,
    num_layers=num_layers,
    num_heads=num_heads,
    dp=dp,
    drop_path=drop_path,
    expansion=expansion,
    max_len=max_len,
    use_swiglu=use_swiglu
)

model = Seq2SeqLLM(encoder, decoder).to(device)

# -------------------------------
# 3️⃣ Checkpoint yükle (kısmi uyumlu)
# -------------------------------
checkpoint_path = r"C:\Users\hdgn5\OneDrive\Masaüstü\PyTorch - Transformers  - LLM\LLM & Transformers\LLM Uygulamalar\final_model.pt"
state_dict = torch.load(checkpoint_path, map_location=device)
model.load_state_dict(state_dict, strict=False)  # Uyuşmayan parametreler rastgele başlatılır
model.eval()

# -------------------------------
# 4️⃣ Generation fonksiyonu
# -------------------------------
def generate_seq2seq(model, tokenizer, src_texts, max_len=32, device=device, top_k=50, top_p=0.95):
    model.eval()
    batch_enc = tokenizer(
        src_texts, padding='longest', truncation=True, max_length=max_len, return_tensors='pt'
    )
    enc_ids = batch_enc['input_ids'].to(device)
    enc_mask = batch_enc['attention_mask'].to(device)

    dec_input_ids = torch.full(
        (enc_ids.size(0), 1), tokenizer.pad_token_id, dtype=torch.long, device=device
    )
    outputs = []

    with torch.no_grad():
        for step in range(max_len):
            logits = model(enc_ids, dec_input_ids, src_mask=enc_mask, tgt_mask=None)
            next_token_logits = logits[:, -1, :]

            # Top-K ve Top-P sampling
            top_k_val = min(top_k, next_token_logits.size(-1))
            top_probs, top_indices = torch.topk(F.softmax(next_token_logits, dim=-1), top_k_val, dim=-1)
            filtered_probs = top_probs / top_probs.sum(dim=-1, keepdim=True)
            next_tokens = top_indices.gather(-1, torch.multinomial(filtered_probs, num_samples=1))

            dec_input_ids = torch.cat([dec_input_ids, next_tokens], dim=1)

    for seq in dec_input_ids:
        text = tokenizer.decode(seq, skip_special_tokens=True)
        outputs.append(text)
    return outputs

def generate_answer(input_text):
    return generate_seq2seq(model, tokenizer, [input_text])[0]

# -------------------------------
# 5️⃣ Gradio GUI
# -------------------------------
iface = gr.Interface(
    fn=generate_answer,
    inputs=gr.Textbox(label="Soru", lines=2),
    outputs="text",
    title="Law Chatbot",
    description="Kayıtlı model ile cevap üreten GUI"
)

iface.launch()
