<a href="https://colab.research.google.com/github/elemnurguner/data-ai-projects/blob/main/RAG_tabanl%C4%B1_bir_otomatik_yan%C4%B1tlama_sistemi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q langchain-community faiss-cpu sentence-transformers transformers gradio
!pip install -q unstructured pdfminer.six  # PDF ve dosya desteği için

In [None]:
!kill -9 $(lsof -t -i :7860)  # Tüm meşgul portları kapat
!kill -9 $(lsof -t -i :7861)
!fuser -k 80/tcp

In [None]:
!pip install -q torch transformers langchain-community faiss-cpu sentence-transformers gradio unstructured pdfminer.six

import os
import torch
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr

# 1. Dokümanları Oluşturma
def initialize_documents():
    if not os.path.exists("documents"):
        os.makedirs("documents")
        sample_text = """Ürün X Özellikleri:
        - Minimum 4 GB RAM
        - Windows 11/10/8.1 desteği
        - 64-bit işletim sistemi gereksinimi
        - Güncellemeler her Perşembe 02:00'da"""
        with open("documents/teknik_rehber.txt", "w", encoding="utf-8") as f:
            f.write(sample_text)

# 2. Vektör Veritabanı
def create_vector_database():
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
    )
    loader = DirectoryLoader("documents", glob="**/*.txt")
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=512)
    texts = text_splitter.split_documents(documents)
    vector_db = FAISS.from_documents(texts, embeddings)
    vector_db.save_local("faiss_index")

# 3. Dil Modeli
def initialize_model():
    tokenizer = AutoTokenizer.from_pretrained("google/mt5-base")
    model = AutoModelForSeq2SeqLM.from_pretrained("google/mt5-base")
    generator = pipeline(
        "text2text-generation",
        model=model,
        tokenizer=tokenizer,
        device=0 if torch.cuda.is_available() else -1,
        max_length=200,
        num_beams=4
    )
    return generator

# 4. Doğrulama Sistemi
def validate_response(answer: str) -> bool:
    required_terms = ["ram", "gb", "windows", "güncelleme"]
    return any(term in answer.lower() for term in required_terms)

# 5. RAG Motoru
def generate_answer(question: str) -> str:
    try:
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
        )
        vector_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
        relevant_docs = vector_db.similarity_search(question, k=2)
        context = "\n".join([doc.page_content for doc in relevant_docs])

        generator = initialize_model()
        response = generator(
            f"Bağlam: {context}\nSoru: {question}\nCevap:",
            temperature=0.3
        )[0]['generated_text']

        return response if validate_response(response) else "❗ Bilgi bulunamadı."
    except Exception as e:
        return f"⚠️ Hata: {str(e)}"

# 6. Arayüz
def create_interface():
    return gr.Interface(
        fn=generate_answer,
        inputs=gr.Textbox(label="Sorunuz"),
        outputs=gr.Textbox(label="Cevap"),
        examples=[
            ["Ürün için minimum RAM gereksinimi nedir?"],
            ["Hangi Windows sürümleri desteklenir?"]
        ]
    )

# 7. Başlatma
if __name__ == "__main__":
    initialize_documents()
    if not os.path.exists("faiss_index"):
        create_vector_database()
    create_interface().launch(share=True)