In [5]:
import os
from dotenv import load_dotenv

load_dotenv()

langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY

In [6]:
# PDF LOADER

from langchain.document_loaders import PyPDFLoader

# Define paths for PDFs
pdf_files = [
    "abus_elektro-seilzug_programm.pdf",
    # "abus_hb-system_programm.pdf",
    # "abus_laufkran_programm.pdf"
]

loaders = [PyPDFLoader(pdf_file) for pdf_file in pdf_files]
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [7]:
# 1. Image Extraction Functions
# This section includes functions for extracting images from PDF files and encoding them to base64.

import os
import base64
import fitz  # PyMuPDF

# Define helper functions for image extraction and encoding
def extract_images_from_pdf(pdf_path, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    doc = fitz.open(pdf_path)
    images = []
    
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        image_list = page.get_images(full=True)
        
        for img_index, img in enumerate(image_list):
            xref = img[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]
            image_name = f"image_page{page_num + 1}_{img_index}.png"
            image_path = os.path.join(output_folder, image_name)
            
            # Save the image
            with open(image_path, "wb") as img_file:
                img_file.write(image_bytes)
                images.append(image_path)

    return images

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [8]:
# 2. Process PDF Files
# This loop processes each PDF file, extracts text and tables, and images.

from unstructured.partition.pdf import partition_pdf
from nltk.tokenize import sent_tokenize
from unstructured.documents.elements import Table, CompositeElement

# Extract images from PDFs
image_elements = []

# Extract text and tables from PDFs
text_elements = []
table_elements = []

# Process each PDF file
for pdf_file in pdf_files:
    raw_elements = partition_pdf(
        filename=pdf_file,
        chunking_strategy="by_title",
        infer_table_structure=True,
        max_characters=1200,
        new_after_n_chars=1500,
        combine_text_under_n_chars=200,
        strategy="hi_res",
    )
    
    # Separate text and table elements
    for element in raw_elements:
        if isinstance(element, CompositeElement):
            text_elements.append(element)
        elif isinstance(element, Table):
            table_elements.append(element)

# Extract images
output_folder = "./extracted_images"
image_paths = extract_images_from_pdf(pdf_file, output_folder)
image_elements.extend([encode_image(image) for image in image_paths])

# Format text and tables for output
table_elements = [table.text for table in table_elements]
text_elements = [text.text for text in text_elements]

# Split text elements into sentences
split_texts = []
for text_element in text_elements:
    sentences = sent_tokenize(text_element)
    split_texts.extend(sentences)

print(f"\nNumber of table elements: {len(table_elements)}")
print(f"Number of text elements: {len(text_elements)}")
print(f"Number of image elements: {len(image_elements)}")

  from .autonotebook import tqdm as notebook_tqdm



Number of table elements: 5
Number of text elements: 63
Number of image elements: 113


In [9]:
# 3. Text Cleaning
# This function cleans the extracted text to remove noise and irrelevant characters.

import re

# Clean text chunks by removing standalone numbers, isolated letters, and meaningless characters
def clean_text_chunks(text_chunks):
    cleaned_chunks = []
    
    for chunk in text_chunks:
        # 1. Concatenate isolated alphabet characters, e.g., "A B U S" -> "ABUS"
        chunk = re.sub(r'\b([A-Za-zäöüÄÖÜß])(?:\s+([A-Za-zäöüÄÖÜß]))+\b', lambda x: ''.join(x.group(0).split()), chunk)
        
        # 2. Remove spaces within URLs and email addresses
        chunk = re.sub(r'(www\.\s*|https?://\s*|@\s*|\.de\s*|\.com\s*)', lambda x: x.group(0).replace(" ", ""), chunk)
        
        # 3. Normalize whitespace and strip any leading/trailing spaces
        chunk = re.sub(r'\s+', ' ', chunk).strip()
        
        # 4. Append only meaningful text chunks (avoid short or empty chunks)
        if len(chunk) > 2 or re.search(r'[A-Za-zäöüÄÖÜß]{2,}', chunk):
            cleaned_chunks.append(chunk)
    
    return cleaned_chunks

# Apply the cleaning function to remove unimportant text chunks
cleaned_texts = clean_text_chunks(split_texts)

for idx, table_content in enumerate(table_elements):
    print(f"\nTable {idx + 1} content:\n{table_content}")

for idx, chunk in enumerate(cleaned_texts):
    print(f"\nText Chunk {idx + 1} content:\n{chunk}")


Table 1 content:
Modell Seileinscherung Tragf. (t) Hakenweg (m) C (mm) H (mm) GM 5000 4/2 20,0 12 20 30 37 413 985 8/2 40,0 6 10 15 18,5 635 1060 GM 6000 4/2 25,0 12 20 30 37 419 1035 8/2 50,0 6 10 15 18,5 643 1105 GM 7000 4/2 40,0 16 30 45 - 668 1220 6/2 63,0 10,6 20 30 36 897 1218 8/2 80,0 8 15 22,5 27,5 915 1275 10/2 100,0 12 18 22 - 960 1265 12/2 120,0 15 18 - - 1400 1200

Table 2 content:
GM 800.2000 L-20 1.6000. 4. €E 100. 20 [_ Fahrgeschwindigkeit (m/min) Laufrad-Durchmesser [mm] Bauart M ° dellgeneration Hakenweg (mm) Angezogene Strange Laststrange riebwerksgruppe Seilgeschwindigkeit (m/min] Motorausfilhrung (kg) ragfahigkeit Modellgréfe (GM 800) ° Baureihe Bisherige Bezeichnung: GM 820 L6-204.41.06.3.E 9EILEINSCHERUNG DER Neue Bezeichnung (36 - 43 Zeichen): GM 800.2000L-204.41.6000.4.E 100.20

Table 3 content:
Bauart 4/1 - E, D, U Ko “ Bauart 6/1 -D Bauart 8/2 - D Bauart 4/2 -Z Bauart 8/2 - Z Pp oy Bauart 10/2 -Z Bauart 12/2 - Z AP?

Table 4 content:
durchschnittliche Hubgesc

In [10]:
# 4. IMAGE CAPTIONING + TEXT AROUND

import os
import glob
import fitz  # PyMuPDF
import re
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
from sentence_transformers import SentenceTransformer

# Load BLIP processor and model from Hugging Face
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Directory containing the images
image_folder = r"C:\Coding\VSCode\chatbot\extracted_images"
pdf_path = r"C:\Coding\VSCode\chatbot\abus_elektro-seilzug_programm.pdf"

# Get a list of all image file paths in the specified directory
image_paths = glob.glob(os.path.join(image_folder, "*.png"))

# Function to extract nearby text for each image based on its position in the PDF
def extract_nearby_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    page_texts = {}
    
    for page_num in range(doc.page_count):
        page = doc[page_num]
        page_texts[page_num + 1] = []
        
        # Extract all text and their bounding boxes on the page
        for block in page.get_text("dict")["blocks"]:
            if "lines" in block:
                for line in block["lines"]:
                    for span in line["spans"]:
                        # Store text and its position
                        page_texts[page_num + 1].append({
                            "text": span["text"],
                            "bbox": span["bbox"]
                        })
    
    doc.close()
    return page_texts

# Extract the text positions and contents from the PDF document
page_text_data = extract_nearby_text_from_pdf(pdf_path)

# Function to find nearby text around an image based on its page and approximate position
def get_nearby_text(image_path):
    # Extract page number from image file name (e.g., "image_page13_1.png" -> page 13)
    match = re.search(r'image_page(\d+)_\d+', os.path.basename(image_path))
    if match:
        page_num = int(match.group(1))
        text_entries = page_text_data.get(page_num, [])
        
        nearby_texts = []
        for entry in text_entries:
            # Simple heuristic: add the text if it’s within a certain range
            if entry["bbox"][1] < 400: 
                nearby_texts.append(entry["text"])

        # Join nearby texts to provide additional context
        return " ".join(nearby_texts)
    
    return ""

# Function to generate a caption for an image using the BLIP model
def generate_caption(image_path):
    try:
        # Open the image and convert to RGB mode to ensure compatibility
        with Image.open(image_path).convert("RGB") as image:
            # Prepare the inputs for unconditional captioning
            inputs = processor(images=image, return_tensors="pt")
            # Generate a caption with a specified maximum length
            outputs = model.generate(**inputs, max_new_tokens=50)
            caption = processor.decode(outputs[0], skip_special_tokens=True)
        return caption
    
    except Exception as e:
        # Handle any errors that arise during image processing
        print(f"Error processing image at {image_path}: {e}")
        return None

In [11]:
# 5. Storing Image_Text Embedding in FAISS

import faiss

image_text_embeddings = []
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Loop over each image path, generate captions, combine with nearby text, and embed
for path in image_paths:
    nearby_text = get_nearby_text(path)
    caption = generate_caption(path)
    
    if caption:
        print(f"\n{path}")
        print(f"Caption: {caption}")
        print(f"Context: {nearby_text}")

        # Embed the combined caption and nearby text
        combined_text = f"{caption}. Context: {nearby_text}" if nearby_text else caption
        embedding = embedding_model.encode(combined_text)
        image_text_embeddings.append(embedding)
    else:
        print(f"Skipping {os.path.basename(path)} due to processing error.")

# Convert embeddings to numpy array for FAISS indexing
image_text_embeddings = np.array(image_text_embeddings).astype('float32')

# Initialize FAISS index
dimension = image_text_embeddings.shape[1]
image_text_faiss_index = faiss.IndexFlatL2(dimension)

# Add embeddings to FAISS index
image_text_faiss_index.add(image_text_embeddings)

# Verify the number of embeddings stored in FAISS
print(f"Number of embeddings stored in FAISS: {image_text_faiss_index.ntotal}")




C:\Coding\VSCode\chatbot\extracted_images\image_page10_0.png
Caption: a man holding a tablet in a factory
Context: ABUS Krane mit ABUControl-Steuerung  können über Fahrprofile auf Ihre Gegeben- heiten angepasst werden. Krane mit zwei  festen Geschwindigkeiten fahren z.B. völlig  anders als frequenzumrichtergesteuerte Kra- ne.  Fahr- und Hubprofile  ermöglichen die  Anpassung der Krane an Ihre aktuelle bzw.  gewünschte Situation. Damit entfällt bei vor- handenen Kranen der Umstellungsaufwand  Ihrer Kranfahrer und sie spüren eher die  gestiegene Produktivität durch die zusätzliche  Krananlage. ABUCONTROL: HEBT KRANE AUF EIN NEUES LEVEL A B U C o n t r o l    |   A B U S  E l e k t r o - S e i l z ü g e Betriebsdaten, Einstellungen, Service-Informationen. Mittels  der modernen Oberfläche KranOS behalten Sie den Kran im  Auge. Kabellos von jedem Laptop oder Tablet mit Browser.  Und die jährlich wiederkehrende Prüfung geht noch ein biss- chen flotter. ABUControl setzt auf bewährte, am frei

The channel dimension is ambiguous. Got image shape (1, 196, 3). Assuming channels are the first dimension.



C:\Coding\VSCode\chatbot\extracted_images\image_page13_1.png
Caption: the ins ins ins ins ins ins ins ins ins ins ins ins ins ins ins ins ins ins ins ins
Context: Laufkrane Laufkrane Hängebahnsysteme Schwenkkrane Elektro-Kettenzüge Elektro-Seilzüge Leichtportalkrane Programmübersicht Flexibles Baukastenprinzip   für individuelle Lösungen Hängebahnsysteme KRANE AUF   EINEM HOHEN  LEVEL  ABUS Halbportalkrane EHPK sind Meister, wenn es darum geht, mit geringen Investitionen   eine möglichst große Fläche zur erreichen und zu versorgen. Dabei ist die statische Belastung   der Hallenkonstruktion gering. Gabelstapler und andere Flurfahrzeuge in der Halle   werden nicht beeinträchtigt. Bewegungsfreiheit wird so zum Produktionsfaktor. Freie Bahn für Produktivität ABUS Halbportalkran EHPK Elektro-Kettenzüge Schwenkkrane Komplettlösungen aus dem Hause ABUS Zu wissen, was am Haken hängt, das ist für viele Kranbetreiber wichtig: Waren abwiegen und   an Kunden berechnen, LKWs beladen, Produktionsan

NameError: name 'np' is not defined

In [None]:
# 6. Storing Text Embedding

import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Initialize the sentence transformer for text embeddings
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

text_embeddings = []
for chunk in cleaned_texts:
    embedding = embedding_model.encode(chunk)  # Use sentence-transformers to embed text
    text_embeddings.append(embedding)

# Convert to numpy array for FAISS indexing
text_embeddings = np.array(text_embeddings).astype('float32')

dimension = text_embeddings.shape[1]  # Get the dimension of the embeddings
text_faiss_index = faiss.IndexFlatL2(dimension)

# Add the text embeddings to the FAISS index
text_faiss_index.add(text_embeddings)

# Verify the number of embeddings stored
print(f"Number of embeddings stored in FAISS: {text_faiss_index.ntotal}")

Number of embeddings stored in FAISS: 259


In [None]:
# 7. Combine Embbedings

import numpy as np
import faiss

# Combine image_text_embeddings and text_embeddings into a single array
combined_embeddings = np.vstack((image_text_embeddings, text_embeddings)).astype('float32')

# Initialize a single FAISS index with the combined dimension
dimension = combined_embeddings.shape[1]
combined_faiss_index = faiss.IndexFlatL2(dimension)

# Add combined embeddings to the FAISS index
combined_faiss_index.add(combined_embeddings)
print(f"Total combined embeddings in FAISS: {combined_faiss_index.ntotal}")

Total combined embeddings in FAISS: 371


In [None]:
# 8. Querying FAISS
# This code allows querying the FAISS index to retrieve the nearest neighbors for similar text entries.

query_text = "ABULiner"
query_embedding = embedding_model.encode(query_text).astype('float32')

# Reshape query_embedding to (1, embedding_dim) to match FAISS's expected input
query_embedding = query_embedding.reshape(1, -1)  # (1, n) where n is the embedding dimension

# Perform the search in FAISS
k = 5  # Number of nearest neighbors to retrieve
distances, indices = combined_faiss_index.search(query_embedding, k)

# Output the results
print(f"Top {k} closest embeddings for query '{query_text}':\n")
for idx, (distance, index) in enumerate(zip(distances[0], indices[0])):
    # Ensure the index is valid and within bounds of cleaned_texts
    if index < len(combined_text):
        print(f"Result {idx + 1}: Text Chunk - {combined_text} (Distance: {distance})")
    else:
        print(f"Result {idx + 1}: Invalid index {index} (Distance: {distance})")

Top 5 closest embeddings for query 'Krane':

Result 1: Text Chunk - a white and red alarm clock with a red light. Context: ZUSATZAUSRÜSTUNGEN ABUS Steuereinheit LIS – das intelligente  Herzstück der ABUS Seilzüge   ABUS Seilzüge sind mit einer intelli­genten Hubwerkssteuerung  LIS ausgestattet. Sie ist in zwei Varianten LIS-SE oder LIS-SV  verfügbar. Beide beinhalten eine Überlast­sicherung, die das  Hubwerk sicher vor Überlastung schützt. Während bei der  LIS-SV die Lasterfassung durch einen Dehnungsmeßstreifen-  Signalgeber erfolgt, wird bei der LIS-SE bei laufendem  Motor eine dreiphasige Strom- und Spannungsmessung  vorgenommen. Aus diesen beiden Größen berechnet LIS-SE  die aktuelle Hakenlast. Zur Visualisierung der ausgewerteten  Hakenlast kann optional an beiden Geräten eine Lastanzeige  angeschlossen werden.  Zur  Ermittlung  der  verbrauchten  Nutzungsdauer  sind alle LIS-Steuereinheiten serienmäßig mit einem  Betriebsstundenzähler  ausgestattet.  Für  eine  exakte  Z u s a t 

In [None]:
# 9. Print extracted information
print(f"\nNumber of table elements: {len(table_elements)}")
print(f"Number of text elements: {len(text_elements)}")
print(f"Number of image elements: {len(image_elements)}")

for idx, table_content in enumerate(table_elements):
    print(f"\nTable {idx + 1} content:\n{table_content}")

for idx, chunk in enumerate(cleaned_texts):
    print(f"\nText Chunk {idx + 1} content:\n{chunk}")


Number of table elements: 5
Number of text elements: 63
Number of image elements: 113

Table 1 content:
Modell Seileinscherung Tragf. (t) Hakenweg (m) C (mm) H (mm) GM 5000 4/2 20,0 12 20 30 37 413 985 8/2 40,0 6 10 15 18,5 635 1060 GM 6000 4/2 25,0 12 20 30 37 419 1035 8/2 50,0 6 10 15 18,5 643 1105 GM 7000 4/2 40,0 16 30 45 - 668 1220 6/2 63,0 10,6 20 30 36 897 1218 8/2 80,0 8 15 22,5 27,5 915 1275 10/2 100,0 12 18 22 - 960 1265 12/2 120,0 15 18 - - 1400 1200

Table 2 content:
GM 800.2000 L-20 1.6000. 4. €E 100. 20 [_ Fahrgeschwindigkeit (m/min) Laufrad-Durchmesser [mm] Bauart M ° dellgeneration Hakenweg (mm) Angezogene Strange Laststrange riebwerksgruppe Seilgeschwindigkeit (m/min] Motorausfilhrung (kg) ragfahigkeit Modellgréfe (GM 800) ° Baureihe Bisherige Bezeichnung: GM 820 L6-204.41.06.3.E 9EILEINSCHERUNG DER Neue Bezeichnung (36 - 43 Zeichen): GM 800.2000L-204.41.6000.4.E 100.20

Table 3 content:
Bauart 4/1 - E, D, U Ko “ Bauart 6/1 -D Bauart 8/2 - D Bauart 4/2 -Z Bauart 8/2 -

In [None]:
# 10. Function to Retrieve and Generate Response

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch

# Load a pre-trained language model and tokenizer from Hugging Face
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")

# Initialize the summarization pipeline to handle long inputs from retrieved texts
# NO NEED(?)
summarization_pipe = pipeline("summarization", model=model, tokenizer=tokenizer)

def retrieve_and_generate(query_text, top_k=5):
    # Generate the query embedding
    query_embedding = embedding_model.encode(query_text).astype('float32')
    
    # Retrieve the top-k most similar embeddings from FAISS
    distances, indices = text_faiss_index.search(np.array([query_embedding]), top_k)
    
    # Gather retrieved text chunks based on indices
    retrieved_texts = [split_texts[idx] for idx in indices[0]]
    context = " ".join(retrieved_texts)  # Combine retrieved texts into a single context

    # Generate a summary or answer based on the context
    response = summarization_pipe(context, max_length=75, min_length=20, do_sample=False)
    return response[0]['summary_text']

In [None]:
# 11. Example Query and Response

query_text = "What are ABULiner?"
response = retrieve_and_generate(query_text)
print(f"\nGenerated Answer:\n{response}")


Generated Answer:
ABUS hat sich spezialisiert auf flurfreie Hebe- und Fördertechnik. Die Steuerung der Seilzüge erfolgt von der Kransteuerung aus wahlweise einzeln.


In [None]:
# SUMMARIZER, JGN DIPAKE DULU

# import logging

# from sentence_transformers import SentenceTransformer
# from transformers import pipeline
# from langchain.embeddings import OpenAIEmbeddings
# from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer

# # Set logging level to ERROR for transformers
# logging.getLogger("transformers").setLevel(logging.ERROR)

# # Initialize the summarization pipeline
# summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# # Define a context prompt
# context_prompt = "Summarize the technical information on crane operating functions, data and safety from the ABUS Kransysteme GmbH PDF:"

# # Define max length parameters
# max_length = 130 
# max_new_tokens = 50

# # Print the content of the split text chunks and summarize
# summarized_texts = []
# for idx, chunk in enumerate(split_texts):
#     print(f"\nText Chunk {idx + 1} content:\n{chunk}")

#     # Dynamically set max_length based on input length
#     input_length = len(chunk.split())
#     max_length = min(130, input_length // 2)
    
#     # Prepend context prompt to the chunk
#     full_chunk = context_prompt + " " + chunk

#     # Summarize the chunk with context prompt
#     summary = summarizer(full_chunk, max_length=max_length, min_length=30, max_new_tokens=max_new_tokens, do_sample=False)
#     summarized_text = summary[0]['summary_text']
#     summarized_texts.append(summarized_text)
#     print(f"Summary {idx + 1} content:\n{summarized_text}")

# Now generate embeddings for the summarized texts
# embedding_model = OpenAIEmbeddings()
# embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# summarized_embeddings = [embedding_model.embed(text) for text in summarized_texts]

# Print the embeddings (for verification)
# for idx, embedding in enumerate(summarized_embeddings):
#     print(f"\nEmbedding for Summary {idx + 1}:\n{embedding}")

In [None]:
# Semantic Routing
# from sklearn.cluster import KMeans


# def semantic_routing(query_text):
#     if "image" in query_text.lower():
#         return "image"
#     elif "table" in query_text.lower():
#         return "table"
#     return "text"

# # CRAG Re-ranking
# def crag_rerank(query_embedding, retrieved_texts, retrieved_embeddings, k=5):
#     kmeans = KMeans(n_clusters=min(k, len(retrieved_embeddings)))
#     kmeans.fit(retrieved_embeddings)
#     cluster_labels = kmeans.labels_
#     cluster_centers = kmeans.cluster_centers_

#     # Calculate distances from query to each cluster center
#     distances = [np.linalg.norm(query_embedding - center) for center in cluster_centers]
#     sorted_indices = np.argsort(distances)
    
#     # Rank and select closest cluster and aggregate texts
#     aggregated_texts = []
#     for idx in sorted_indices[:k]:
#         cluster_texts = [retrieved_texts[i] for i in range(len(retrieved_texts)) if cluster_labels[i] == idx]
#         aggregated_texts.append(" ".join(cluster_texts))
#     return aggregated_texts

# # Retrieve and Generate with Semantic Routing and CRAG Re-ranking
# def retrieve_and_generate(query_text, top_k=5):
#     # Semantic Routing
#     query_type = semantic_routing(query_text)
#     query_embedding = embedding_model.encode(query_text).astype('float32')

#     if query_type == "text":
#         distances, indices = faiss_index.search(np.array([query_embedding]), top_k)
#         retrieved_texts = [cleaned_texts[i] for i in indices[0]]
#         retrieved_embeddings = [text_embeddings[i] for i in indices[0]]

#         # CRAG re-ranking
#         ranked_texts = crag_rerank(query_embedding, retrieved_texts, retrieved_embeddings, k=top_k)
#         context = " ".join(ranked_texts)

#     elif query_type == "table":
#         # Clean table contents if needed
#         cleaned_tables = clean_text_chunks([table.text for table in table_elements])
#         context = " ".join(cleaned_tables)

#     elif query_type == "image":
#         # Provide descriptions or labels for images
#         context = "Here are the relevant images: " + ", ".join(["Image " + str(i) for i in range(len(image_elements))])

#     # Limit context size to prevent memory overflow
#     if len(context) > 512:
#         context = context[:512]  # Truncate context to 512 characters

#     return context

# # Example query
# query_text = "What are ABULiner?"

# # Call the function
# context = retrieve_and_generate(query_text)

# # Print the output
# print("Generated Context:", context)