## Indexing

In [1]:
import chromadb

# Adapt to your chroma instance
client = chromadb.HttpClient("localhost", 8000)
#client = chromadb.PersistentClient()

collection_name = 'test'

In [2]:
from uuid import uuid4
import os
from langchain_chroma import Chroma

from LawEmbeddings import LawEmbeddings
from LawDocumentLoader import LawDocumentLoader

def index_law(law, collection):
    filename = f"data/{law}.txt"
    if not os.path.exists(filename):
        raise FileNotFoundError(f"Die LAW.txt file does not exist. Please make sure to run the converter first.")

    lawembeddings = LawEmbeddings()
    vectorstore = Chroma(
        client=client,
        collection_name=collection,
        embedding_function=lawembeddings,
    )
    
    loader = LawDocumentLoader(filename)
    for doc in loader.lazy_load():
        vectorstore.add_documents(documents=[doc], ids=[str(uuid4())])

In [3]:
index_law('bgb', collection_name)

### Lookup indexed documents

In [7]:
from LawEmbeddings import LawEmbeddings

def check_indexed_documents(collection):
    lawembeddings = LawEmbeddings()
    vectorstore = Chroma(
        client=client,
        collection_name=collection,
        embedding_function=lawembeddings,
    )
    
    all_documents = vectorstore._collection.get()['metadatas']
    print(len(all_documents))
    
    for i, metadata in enumerate(all_documents):
        print(f"Dokument {i+1}: {metadata}")

check_indexed_documents(collection_name)

222
Dokument 1: {'law': 'BGB', 'paragraph': '1', 'title': 'Beginn der Rechtsfähigkeit'}
Dokument 2: {'law': 'BGB', 'paragraph': '2', 'title': 'Eintritt der Volljährigkeit'}
Dokument 3: {'law': 'BGB', 'paragraph': '7', 'title': 'Wohnsitz; Begründung und Aufhebung'}
Dokument 4: {'law': 'BGB', 'paragraph': '8', 'title': 'Wohnsitz nicht voll Geschäftsfähiger'}
Dokument 5: {'law': 'BGB', 'paragraph': '9', 'title': 'Wohnsitz eines Soldaten'}
Dokument 6: {'law': 'BGB', 'paragraph': '11', 'title': 'Wohnsitz des Kindes'}
Dokument 7: {'law': 'BGB', 'paragraph': '12', 'title': 'Namensrecht'}
Dokument 8: {'law': 'BGB', 'paragraph': '13', 'title': 'Verbraucher'}
Dokument 9: {'law': 'BGB', 'paragraph': '14', 'title': 'Unternehmer'}
Dokument 10: {'law': 'BGB', 'paragraph': '21', 'title': 'Nicht wirtschaftlicher Verein'}
Dokument 11: {'law': 'BGB', 'paragraph': '22', 'title': 'Wirtschaftlicher Verein'}
Dokument 12: {'law': 'BGB', 'paragraph': '23', 'title': '(weggefallen)'}
Dokument 13: {'law': 'BGB',