In [1]:
# import os
# from dotenv import load_dotenv

# load_dotenv()

# langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
# GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

# os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
# os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY

In [2]:
import json
import os
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Paths to files
json_file_path = r"C:/Coding/VSCode/chatbot/output/abus_elektro-seilzug_programm/auto/abus_elektro-seilzug_programm_content_list.json"
images_base_dir = r"C:/Coding/VSCode/chatbot/output/abus_elektro-seilzug_programm/auto"

# Load JSON content
with open(json_file_path, "r", encoding="utf-8") as f:
    content_list = json.load(f)

# Prepare data for embedding and store associated page indices and image paths
texts = []
page_indexes = []
image_paths = []

for item in content_list:
    if item["type"] == "text" and item["text"].strip():
        texts.append(item["text"].strip())
        page_indexes.append(item.get("page_idx"))
        image_paths.append(None)
    elif item["type"] == "image":
        caption = " ".join(item.get("img_caption", [])).strip() or "No caption available"
        texts.append(caption)
        page_indexes.append(item.get("page_idx"))
        # Correctly join the base directory and image path, then normalize it
        image_path = os.path.normpath(os.path.join(images_base_dir, item["img_path"]))
        image_paths.append(image_path)

# Initialize embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Compute embeddings
embeddings = model.encode(texts, convert_to_numpy=True)

# Create and populate FAISS index
embedding_dim = embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)
index.add(embeddings)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def search(query, top_k=5):
    # Encode the query to generate embedding
    query_embedding = model.encode([query]).astype("float32")

    # Search in the FAISS index
    distances, indices = index.search(query_embedding, top_k)

    # Collect relevant page indices
    relevant_pages = set()
    for idx in indices[0]:
        if idx < len(page_indexes):
            relevant_pages.add(page_indexes[idx])

    # Display the results
    print(f"\nTop {top_k} results for the query: '{query}'\n")
    if relevant_pages:
        for i, idx in enumerate(indices[0]):
            if idx < len(texts):
                page_idx = page_indexes[idx]
                text = texts[idx]
                image_path = image_paths[idx]
                print(f"Result {i + 1}:")
                print(f"Page Index: {page_idx}")
                if image_path:
                    print(f"Image Path: {image_path}")
                    print(f"Caption: {text}")
                else:
                    print(f"Text: {text}")
                print(f"Distance: {distances[0][i]:.4f}")
                print("-" * 50)
    else:
        print("No relevant context found for the query.")

# Example query
search("Schnellsteckverbindung")
search("ABUCONTROL: HEBT KRANE AUF EIN NEUES LEVEL")


Top 5 results for the query: 'Schnellsteckverbindung'

Result 1:
Page Index: 3
Text: Schnellsteckverbindung
Distance: 0.0000
--------------------------------------------------
Result 2:
Page Index: 3
Text: Durch die bereits vorinstallierten ABUS Schnellsteckverbindungen werden Montage- und Wartungsarbeiten auf ein Minimum beschränkt. Mit wenigen Handgriffen lassen sich elektrische Verbindungen herstellen oder lösen. Ein weiteres Plus: Verwechslungen beim Anschließen sind unmöglich.
Distance: 0.9597
--------------------------------------------------
Result 3:
Page Index: 3
Text: Sicherheitsbremse
Distance: 1.0576
--------------------------------------------------
Result 4:
Page Index: 4
Text: Schrägverzahnte Präzisions-Flachgetriebe in Leichtmetallgehäusen mit einsatzgehärteten Verzahnungen, hochwertiger Oberflächenbehandlung und Lebensdauer- Ölschmierung sorgen für hohe Sicherheit und leisen Lauf bei einem Minimum an Wartungsaufwand.
Distance: 1.0991
----------------------------------