In [13]:
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_postgres import PGVector
from index_with_ids import index_with_ids
from custom_sql_record_manager import CustomSQLRecordManager
from database import COLLECTION_NAME, CONNECTION_STRING

# Create example documents and record manager
documents = [
    Document(page_content="Document 111 content", metadata={"source": "source_11"}),
    Document(page_content="Document 311 content", metadata={"source": "source_31"}),
]

namespace = f"{COLLECTION_NAME}"
record_manager = CustomSQLRecordManager(namespace, db_url=CONNECTION_STRING)

embeddings = OpenAIEmbeddings()
vectorstore = PGVector(
    embeddings=embeddings,
    collection_name=COLLECTION_NAME,
    connection=CONNECTION_STRING,
    use_jsonb=True,
)

indexing_result = index_with_ids(documents, record_manager, vectorstore, cleanup="incremental", source_id_key="source")
print("Initial Indexing result:", indexing_result)

indexing_result = index_with_ids(documents, record_manager, vectorstore, cleanup="full", source_id_key="source")
print("Second Indexing result:", indexing_result)

# documents = [
#     # Document(page_content="Document 11 content", metadata={"source": "source_11"}),
#     Document(page_content="Document 32 content", metadata={"source": "source_31"}),
#     Document(page_content="Document 22 content", metadata={"source": "source_11"}),
#     Document(page_content="Document 44 content", metadata={"source": "source_31"}),
    
# ]

# indexing_result = index_with_ids(documents, record_manager, vectorstore, cleanup="full", source_id_key="source")
# print("After Deletion Indexing result:", indexing_result)


Initial Indexing result: {'status': 'success', 'ids': [{'key': '3dade02b-1f2e-5b0b-b835-de895ee38d06', 'operation': 'INS'}, {'key': '7da5d2fd-1437-57de-b3e8-309f4a053eac', 'operation': 'INS'}, {'key': 'da8ec9e5-3c8d-5f58-ae34-1c8b584a6688', 'operation': 'DEL'}, {'key': 'a4801c03-4797-5a1b-b3f1-4cbba079a63f', 'operation': 'DEL'}], 'results': [{'num_added': 2, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 2}]}
Second Indexing result: {'status': 'success', 'ids': [{'key': '3dade02b-1f2e-5b0b-b835-de895ee38d06', 'operation': 'SKIP'}, {'key': '7da5d2fd-1437-57de-b3e8-309f4a053eac', 'operation': 'SKIP'}], 'results': [{'num_added': 0, 'num_updated': 0, 'num_skipped': 2, 'num_deleted': 0}]}


In [12]:
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_postgres import PGVector
from index_with_ids import index_with_ids
from custom_sql_record_manager import CustomSQLRecordManager
from database import COLLECTION_NAME, CONNECTION_STRING
from langchain.indexes import SQLRecordManager, index

# Create example documents and record manager
documents = [
    Document(page_content="Document 11 content", metadata={"source": "source_11"}),
    Document(page_content="Document 31 content", metadata={"source": "source_31"}),
]

namespace = f"{COLLECTION_NAME}"
record_manager = SQLRecordManager(namespace, db_url=CONNECTION_STRING)

embeddings = OpenAIEmbeddings()
vectorstore = PGVector(
    embeddings=embeddings,
    collection_name=COLLECTION_NAME,
    connection=CONNECTION_STRING,
    use_jsonb=True,
)

indexing_result = index(documents, record_manager, vectorstore, cleanup="incremental", source_id_key="source")
print("Initial Indexing result:", indexing_result)

indexing_result = index(documents, record_manager, vectorstore, cleanup="full", source_id_key="source")
print("Second Indexing result:", indexing_result)

# documents = [
#     # Document(page_content="Document 11 content", metadata={"source": "source_11"}),
#     Document(page_content="Document 31 content", metadata={"source": "source_31"}),
#     Document(page_content="Document 22 content", metadata={"source": "source_11"}),
#     Document(page_content="Document 44 content", metadata={"source": "source_31"}),
    
# ]

# indexing_result = index(documents, record_manager, vectorstore, cleanup="full", source_id_key="source")
# print("After Deletion Indexing result:", indexing_result)


Initial Indexing result: {'num_added': 2, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 2}
Second Indexing result: {'num_added': 0, 'num_updated': 0, 'num_skipped': 2, 'num_deleted': 0}
