In [None]:
#gerçek çalışan
from flask import Flask, Response, request, render_template_string
from transformers import TextStreamer, StoppingCriteria, BitsAndBytesConfig
from unsloth import FastLanguageModel
from peft import PeftModel
import torch
import queue
import threading

app = Flask(__name__)

# Quantization ayarları
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype = torch.bfloat16,
    bnb_4bit_use_double_quant = True,
)

# Model ve tokenizer'ı yükle
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "model yolu",
    max_seq_length = 2048,
    dtype = torch.bfloat16,
    load_in_4bit = True,
    quantization_config = quantization_config,
    device_map = "auto",
)

# PEFT adaptörünü yükle
model = PeftModel.from_pretrained(model, "model yolu")
FastLanguageModel.for_inference(model)
model.eval()

# Thread yönetimi
current_stop_event = None
stop_lock = threading.Lock()

class StopGenerationCriteria(StoppingCriteria):
    def __init__(self, stop_event):
        super().__init__()
        self.stop_event = stop_event

    def __call__(self, input_ids, scores, **kwargs):
        return self.stop_event.is_set()

class WebStreamer(TextStreamer):
    def __init__(self, tokenizer, queue, **kwargs):
        super().__init__(tokenizer, **kwargs)
        self.queue = queue

    def on_finalized_text(self, text: str, stream_end: bool = False):
        self.queue.put(text)
        if stream_end:
            self.queue.put(None)

def format_alpaca_prompt(instruction):
    return f"""Aşağıda bir görevi açıklayan bir talimat bulunmaktadır. İsteği uygun şekilde tamamlayan bir yanıt yazın.

### Talimat:
{instruction}

### Yanıt:
"""

@app.route('/')
def index():
    return render_template_string('''
<!DOCTYPE html>
<html lang="tr">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Alpaca Chat</title>
  <style>
    /* Stil tanımları aynı kalıyor */
    .container { max-width: 800px; margin: 0 auto; padding: 20px; height: 100vh; display: flex; flex-direction: column; }
    .chat-area { flex-grow: 1; overflow-y: auto; background: #fff; border-radius: 8px; padding: 20px; margin: 10px 0; }
    .message { max-width: 75%; padding: 10px; margin: 5px 0; border-radius: 8px; }
    .user { background: #dcf8c6; margin-left: auto; }
    .ai { background: #f0f0f0; }
    .input-container { display: flex; gap: 10px; }
    textarea { flex-grow: 1; padding: 10px; border-radius: 8px; border: 1px solid #ddd; }
    button { background: #007bff; color: white; border: none; padding: 10px 20px; border-radius: 8px; cursor: pointer; }
  </style>
</head>
<body>
  <div class="container">
    <h1>Alpaca Chat</h1>
    <div id="chatArea" class="chat-area"></div>
    <div class="input-container">
      <textarea id="prompt" rows="3" placeholder="Alpaca modeli için sorunuzu girin..."></textarea>
      <button onclick="sendPrompt()">Gönder</button>
    </div>
  </div>

  <script>
    let currentEventSource = null;

    function sendPrompt() {
      const prompt = document.getElementById('prompt').value.trim();
      if (!prompt) return;

      const chatArea = document.getElementById('chatArea');
      
      // Kullanıcı mesajını ekle
      const userDiv = document.createElement('div');
      userDiv.className = 'message user';
      userDiv.textContent = prompt;
      chatArea.appendChild(userDiv);

      // AI mesaj konteyneri
      const aiDiv = document.createElement('div');
      aiDiv.className = 'message ai';
      chatArea.appendChild(aiDiv);
      
      // Önceki bağlantıyı kapat
      if(currentEventSource) currentEventSource.close();

      // Yeni istek başlat
      currentEventSource = new EventSource(`/generate?prompt=${encodeURIComponent(prompt)}`);
      
      currentEventSource.onmessage = (e) => {
        if(e.data === 'DONE') {
          currentEventSource.close();
          aiDiv.innerHTML += '<div style="color: #666; font-size: 0.8em">▼ Cevap Tamamlandı</div>';
          return;
        }
        aiDiv.textContent += e.data;
        chatArea.scrollTop = chatArea.scrollHeight;
      };
      
      document.getElementById('prompt').value = '';
      chatArea.scrollTop = chatArea.scrollHeight;
    }
  </script>
</body>
</html>
''')

@app.route('/generate')
def generate():
    global current_stop_event
    
    with stop_lock:
        if current_stop_event:
            current_stop_event.set()
        current_stop_event = threading.Event()
    
    stop_event = current_stop_event
    prompt = request.args.get('prompt', '')
    response_queue = queue.Queue()

    # Alpaca formatına dönüştür
    formatted_prompt = format_alpaca_prompt(prompt)
    
    # Tokenizer ayarları
    inputs = tokenizer(
        formatted_prompt,
        return_tensors = "pt",
        padding = True,
        truncation = True,
        max_length = 2048,
    ).to("cuda")

    # Streamer ayarları
    streamer = WebStreamer(
        tokenizer = tokenizer,
        queue = response_queue,
        skip_prompt = True,
        skip_special_tokens = True,
        clean_up_tokenization_spaces = True,
    )

    def generation_task():
        try:
            model.generate(
                **inputs,
                streamer = streamer,
                max_new_tokens = 2048,
                temperature = 0.6,
                top_p = 0.9,
                top_k = 40,
                do_sample = True,
                repetition_penalty = 1.15,
                eos_token_id = tokenizer.eos_token_id,
                pad_token_id = tokenizer.pad_token_id,
                stopping_criteria = [StopGenerationCriteria(stop_event)],
            )
        finally:
            response_queue.put(None)

    threading.Thread(target=generation_task).start()

    def stream():
        while True:
            chunk = response_queue.get()
            if chunk is None: 
                yield "data: DONE\n\n"
                break
            yield f"data: {chunk}\n\n"
    
    return Response(stream(), mimetype="text/event-stream")

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, threaded=True)