<a href="https://colab.research.google.com/github/dgaida/llm_client/blob/master/notebooks/RAGChatbot_groq_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Chatbot using Retrieval Augmented Generation
# Uses Groq Inference Platform to run LLM online

# TODO: benötigt groq API Key in GROQ_API_KEY und HuggingFace Token in HF_TOKEN (für Embedding Modell)
# TODO: erstelle Ordner pdfs und lade ein oder mehrere PDFs in diesen Ordner hoch

# IMPORTANT: run this cell first, then a message appears that you have to restart the session.
# in the notebook click on "restart session". Afterwards run all other cells.
# DO NOT RUN all cells immediately after opening the notebook! Run the first cell only. Then the rest.

# install packages
!apt-get -qq install -y poppler-utils tesseract-ocr libmagic1
!pip install "unstructured[all-docs]" python-magic llama-index==0.12.35 llama-index-vector-stores-chroma llama-index-embeddings-huggingface chromadb pdf2image pytesseract gradio


In [None]:
# install package that lets you use Groq or OpenAI
!git clone https://github.com/dgaida/llm_client.git
%cd llm_client
!pip install .
%cd ..

In [None]:
# --- Import required libraries ---
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import gradio as gr
from llm_client import LLMClient, LLMClientAdapter

import sys
sys.path.append("/content/llm_client/notebooks")

import utils

# --- Step 1: Set up PDF file path ---
# Put your PDF files in a folder named "pdfs" in the current directory
PDF_DIR = "pdfs"

In [None]:
# --- Step 2: Load and parse PDFs with uMiner (via UnstructuredReader) ---
all_documents = utils.read_pdf_files_with_unstructured_reader(PDF_DIR)

print(f"Loaded {len(all_documents)} documents")
for doc in all_documents[:3]:
    print(doc.text[:300])  # Zeige ersten Ausschnitt

In [None]:
# split documents into smaller chunks for better retrieval
node_parser = SentenceSplitter(chunk_size=256, chunk_overlap=0)  # 512, 50

In [None]:
# this is the embedding model
# models can be found here: https://huggingface.co/spaces/mteb/leaderboard
embed_model = HuggingFaceEmbedding(model_name = "intfloat/e5-small-v2")
#embed_model = HuggingFaceEmbedding(model_name = "sentence-transformers/all-MiniLM-L6-v2")

In [None]:
# client that uses openAI or Grog API, here groq API
client = LLMClient(api_choice = "groq")

In [None]:
# Adapter erzeugen
adapter = LLMClientAdapter(client=client)

In [None]:
# Apply settings globally - LlamaIndex uses this Settings object internally
Settings.llm = adapter  # None
Settings.embed_model = embed_model
Settings.node_parser = node_parser

In [None]:
# --- Step 4: Create vector store and index ---
index = utils.create_chromadb_vector_store_and_index(all_documents)

In [None]:
# --- Step 5: Create query engine ---
query_engine = index.as_query_engine()

In [None]:
# this function does everyting.
# takes the query of the user and embeds it into the vector space,
# does a semantic search in the vector database (compares vectors)
# returns the text of the most similar vector
# passes this text as context to the LLM together with the query
# retrieves the response of the LLM and returns it as a string
# llama_index does this all internally using the Settings object.
def chat_with_pdf(query, history=None):
    return utils.safe_query_engine_call(query_engine, query)

In [None]:
print(chat_with_pdf("Welche Module gibt es in dem Studiengang?"))

In [None]:
# --- Step 7: Build Gradio Interface ---
# This creates a simple web UI for asking questions
chat_ui = gr.ChatInterface(
    fn=chat_with_pdf,
    title="PDF RAG Chatbot",
    description="Ask questions about the content of your PDF documents.",
    theme="default",
    examples=["What is this PDF about?", "Summarize the second section."],
)

chat_ui.launch()