In [None]:
print("Hello rumble!")
# PDF-Chatbot – RAG + lokales LLM
# Zellen der Reihe nach ausführen (siehe README).

In [None]:
import os
import nltk
nltk.download("punkt", quiet=True)
nltk.download("punkt_tab", quiet=True)

from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate




In [None]:
# Pfad für den lokalen FAISS-Index (wird beim ersten Lauf erzeugt)
FAISS_INDEX_PATH = "./faiss_index"

In [None]:
def load_pdf(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)

    documents = loader.load()
    
    return documents

In [None]:
extr_data =load_pdf("data/")

In [None]:
#extr_data

In [None]:
def text_splitter(extr_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(extr_data)
    return texts

In [None]:
text_chunks = text_splitter(extr_data)
print(len(text_chunks))

In [None]:
#text_chunks

In [None]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [None]:
embeddings = download_hugging_face_embeddings()

In [None]:
#embeddings

In [None]:
query_results = embeddings.embed_query("What is the purpose of this document?")

In [None]:
#query_results

In [None]:
# Lokaler Vector-Store mit FAISS (keine Cloud, kein API-Key, keine Migration)
doc_search = FAISS.from_documents(text_chunks, embeddings)
doc_search.save_local(FAISS_INDEX_PATH)

In [None]:
# Lokales LLM laden (Qwen2.5-Coder GGUF) – braucht: pip install llama-cpp-python
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Streaming-Callback: Antwort wird während der Generierung angezeigt
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path="llm_model/qwen2.5-coder-32b-instruct-q4_k_m.gguf",
    n_ctx=1024,
    max_tokens=512,
    temperature=0,
    repeat_penalty=1.2,
    verbose=False,
    callback_manager=callback_manager,
)

In [None]:
# RAG-Kette: sucht in deiner PDF, antwortet mit dem LLM
from langchain.chains import RetrievalQA

# Falls doc_search fehlt (z.B. nach Kernel-Neustart): FAISS-Index von Disk laden
try:
    doc_search
except NameError:
    from langchain.vectorstores import FAISS
    try:
        FAISS_INDEX_PATH
    except NameError:
        FAISS_INDEX_PATH = "./faiss_index"
    try:
        embeddings
    except NameError:
        import nltk
        nltk.download("punkt", quiet=True)
        nltk.download("punkt_tab", quiet=True)
        from langchain.embeddings import HuggingFaceEmbeddings
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    doc_search = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
    print("Vector-Store von Disk geladen.")

from langchain.prompts import PromptTemplate

# Prompt: ausführlichere Antwort (mehrere Sätze / kurzer Absatz), nur aus dem Text
QA_PROMPT = PromptTemplate(
    template="""Answer in a short paragraph (4-8 sentences) using ONLY the text below. Be clear and complete. End with a period. Do not invent or repeat. If the answer is not in the text, say: Not in the document.

Text:
{context}

Question: {question}

Answer:""",
    input_variables=["context", "question"],
)

retriever = doc_search.as_retriever(search_kwargs={"k": 2})  # 2 Chunks = weniger Kontext, weniger Verwirrung
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_PROMPT},
)