In [11]:
import os

import tiktoken
import fitz

from qdrant_client import QdrantClient
from dotenv import load_dotenv
from openai import OpenAI

In [12]:
def get_qdrant_client() -> QdrantClient:

    load_dotenv()

    # IP und Port aus Umgebungsvariablen lesen
    host = os.getenv("QDRANT_SERVER_IP", "localhost")
    port = int(os.getenv("QDRANT_PORT", 6333))

    try:
        client = QdrantClient(host=host, port=port)

        # Verbindung testen (z. B. durch Auflisten der Collections)
        _ = client.get_collections()

        print(f"Successfully connected to Qdrant at {host}:{port}!")
        return client

    except Exception as e:
        print(f"Error connecting to Qdrant at {host}:{port}: {e}")
        return None

In [13]:
def is_qdrant_alive(client) -> bool:
    """
    Checks if Qdrant is reachable by sending a simple request.
    """
    try:
        client.get_collections()
        return True
    except Exception as e:
        print(f"Connection check failed: {e}")
        return False



In [14]:
def load_pdf_and_chunk(filepath: str, chunk_size: int = 500, overlap: int = 50) -> list[str]:
    # Existenz der Datei prüfen
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"The file was not found: {filepath}")

    # PDF einlesen
    doc = fitz.open(filepath)
    full_text = ""
    for page in doc:
        full_text += page.get_text("text") + "\n"  # "\n" trennt die Seiten
    doc.close()

    # Chunks erzeugen mit Überlappung
    chunks = []
    start = 0
    while start < len(full_text):
        end = min(start + chunk_size, len(full_text))
        chunks.append(full_text[start:end])
        start += chunk_size - overlap

    return chunks

In [15]:
def get_embedding(text: str, model: str = "text-embedding-3-large") -> list:
    """
    Erstellt ein Embedding für einen gegebenen Text über die OpenAI API.

    Args:
        text (str): Der zu embeddene Text
        model (str): Das OpenAI-Embedding-Modell (default: text-embedding-3-large)

    Returns:
        list: Embedding-Vektor als Liste von Floats
    """
    load_dotenv()
    api_key = os.getenv("OPENAI_API_KEY")

    if not api_key:
        raise ValueError("OPENAI_API_KEY wurde nicht in der .env-Datei gefunden.")

    client = OpenAI(api_key=api_key)

    try:
        response = client.embeddings.create(
            input=text,
            model=model
        )
        return response.data[0].embedding
    except Exception as e:
        print(f"Error while retrieving the embedding: {e}")
        return []

In [16]:
def embed_chunks(chunks: list[str]) -> list[list[float]]:
    return [get_embedding(chunk) for chunk in chunks]


In [17]:
from qdrant_client.models import PointStruct, VectorParams, Distance

def store_embeddings_in_qdrant(client, collection_name: str, chunks: list[str], embeddings: list[list[float]]):
    # Collection neu erstellen (löscht alte Inhalte!)
    client.recreate_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=len(embeddings[0]), distance=Distance.COSINE)
    )

    # Punkte erstellen
    points = [
        PointStruct(id=i, vector=vector, payload={"text": chunks[i]})
        for i, vector in enumerate(embeddings)
    ]

    # Hochladen
    client.upsert(collection_name=collection_name, points=points)

    print(f"{len(points)} Embeddings stored in Qdrant.")


In [18]:
def retrieve_similar_chunks(query: str, client: QdrantClient, collection_name: str, top_k: int = 5) -> list[str]:
    """
    Führt eine semantische Suche in Qdrant basierend auf einer Query durch.

    Args:
        query (str): Die Nutzerfrage
        client (QdrantClient): Verbundener Qdrant-Client
        collection_name (str): Name der zu durchsuchenden Collection
        top_k (int): Anzahl der zurückgegebenen ähnlichen Chunks

    Returns:
        list[str]: Liste der ähnlichsten Text-Chunks
    """
    query_vector = get_embedding(query)
    if not query_vector:
        return []

    search_result = client.search(
        collection_name=collection_name,
        query_vector=query_vector,
        limit=top_k
    )

    return [hit.payload["text"] for hit in search_result]


In [19]:
def answer_with_context(query: str, context_chunks: list[str], model: str = "gpt-4o") -> str:
    """
    Nutzt OpenAI GPT-4o, um eine Antwort auf eine Frage zu geben – basierend auf den gegebenen Kontext-Chunks.

    Args:
        query (str): Die Benutzerfrage
        context_chunks (list[str]): Liste von Texten aus Qdrant
        model (str): OpenAI-Modellname (standardmäßig GPT-4o)

    Returns:
        str: Generierte Antwort
    """
    load_dotenv()
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OPENAI_API_KEY was not found in the .env file.")

    client = OpenAI(api_key=api_key)

    # Prompt zusammenbauen
    context = "\n\n".join(context_chunks)
    prompt = f"Answer the following question based on the context:\n\nContext:\n{context}\n\nQuestion: {query}"

    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant for scientific questions."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.2
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error during answer generation: {e}")
        return "Error during answer generation."


In [20]:


def main():
    client = get_qdrant_client()

    if not is_qdrant_alive(client):
        print("❌ Qdrant is not reachable. Exiting.")
        return

    pdf_path = "/Users/i589466/Desktop/Datenbanken/Datenbanken/the-illusion-of-thinking (1).pdf"

    print("📄 Loading and processing PDF...")
    chunks = load_pdf_and_chunk(pdf_path, chunk_size=500, overlap=50)
    embeddings = embed_chunks(chunks)
    store_embeddings_in_qdrant(client, "db_Benny", chunks, embeddings)

    if is_qdrant_alive(client):
        query= "What distinguishes a Large Reasoning Model (LRM) from a standard Large Language Model (LLM) in terms of architecture and training objectives?"
        retrieved_chunks = retrieve_similar_chunks(query, client, "db_Benny", top_k=5)
        answer = answer_with_context(query, retrieved_chunks)
        print(f"\n🧠 Answer from GPT-4o:\n{answer}")

if __name__ == "__main__":
    main()

Successfully connected to Qdrant at 152.53.228.53:6333!
📄 Loading and processing PDF...


  client.recreate_collection(


183 Embeddings stored in Qdrant.


  search_result = client.search(



🧠 Answer from GPT-4o:
Large Reasoning Models (LRMs) are specialized variants of Large Language Models (LLMs) that are explicitly designed for reasoning tasks. The key distinctions between LRMs and standard LLMs in terms of architecture and training objectives include:

1. **Specialized "Thinking" Mechanisms**: LRMs incorporate mechanisms such as long Chain-of-Thought (CoT) processes with self-reflection. These mechanisms are intended to enhance the model's ability to perform complex reasoning tasks by simulating a more human-like thought process.

2. **Focus on Reasoning Tasks**: While standard LLMs are generally trained for a broad range of language understanding tasks, LRMs are specifically optimized for reasoning. This involves tailoring their architecture and training to better handle tasks that require logical deduction, problem-solving, and other forms of reasoning.

3. **Performance Regimes**: LRMs are designed to perform differently across tasks of varying complexity. They are