#### Connecting to PostgreSQL

In [None]:
from pathlib import Path

In [None]:
from os import getenv
from dotenv import load_dotenv, find_dotenv

In [None]:
from urllib.parse import quote

In [None]:
load_dotenv()
database_user = "postgres"
database_password = getenv('POSTGRES_PASSWORD')
database_host = "localhost"  # getenv('POSTGRES_HOST')
database_port = 5434
database_name =  "postgres" # getenv('POSTGRES_DB')

In [None]:
postgres_uri = f'postgresql://{database_user}:{quote(database_password)}@{database_host}:{database_port}/{database_name}'

In [None]:
from  psycopg import  connect


In [None]:
database_connection = connect(
    conninfo=postgres_uri,
    autocommit=True,
)

In [None]:
from src.rag.components.shared.databases.postgres import PostgresVectorDBClient

In [None]:
database_client = PostgresVectorDBClient(
    connection=database_connection,
    namespace='my_documents',
)

## Need to create the document table
## Create the chunk table
### write and work on the insertion script.

In [None]:
from src.rag.schemas.document import Document, Node

In [None]:
from psycopg import Connection, sql
from psycopg.pq import TransactionStatus

In [None]:
database_client.create_table(
    name='documents',
    schema=Document.to_sql_schema(),
    if_not_exists=True,
)

In [None]:
embedding_size = 1024

In [None]:
database_client._full_table_name("documents")

In [None]:
database_client.create_table(
    name='nodes',
    schema=Node.to_sql_schema(embedding_dimension=embedding_size, table_prefix="my_documents"),
    if_not_exists=True,
)

In [None]:
from src.rag.components.shared.io import IOManager

In [117]:
document_with_embedding_path = Path.cwd().joinpath(
    "datasets", "parsed_documents_with_embeddings")

In [118]:
io_manager = IOManager(input_document_path=document_with_embedding_path, output_path=document_with_embedding_path)

In [119]:
test_nodes = io_manager.load_nodes_document(start_index=11, end_index=20)

In [120]:
test_nodes[0].document.model_dump()

{'doc_id': 'd106a8d1-c062-4a70-abde-54cff8a2bb74',
 'file_path': '/Users/esp.py/Documents/Visa-Documents/Murhabazi_Buzina_certificate_registration.pdf',
 'filename': 'Murhabazi_Buzina_certificate_registration.pdf',
 'num_pages': 1,
 'coordinate_system': 'BOTTOM-LEFT',
 'table_parsing_kwargs': None,
 'last_modified_date': datetime.datetime(2021, 12, 17, 15, 36, 18),
 'last_accessed_date': datetime.datetime(2023, 6, 20, 14, 10, 0, 893355),
 'creation_date': datetime.datetime(2023, 6, 20, 13, 11, 54, 752321),
 'file_size': 550464,
 'object': 'ingest.document',
 'doc_metadata': None}

In [None]:
test_nodes_sql = [doc.to_sql_insert(
    "my_documents") for doc in test_nodes]

In [None]:
from uuid import uuid4

In [None]:
def get_connection():

In [None]:
len(documents)

In [None]:
from typing import Dict, List

In [None]:
document = database_client.find_by_id_or_create("documents", document_dict, id_field="doc_id")

In [None]:
document

For insertion: 

- This guide recomend: 
    - remove indexes
    - remove foreign keys and then insert data and add them back once the data is insert.
    https://www.postgresql.org/docs/current/populate.html

data

In [None]:
data_insert = database_client.bulk_insert(
    table_name="nodes",
    data=test_nodes_sql,
    returning=["node_id"])

In [None]:
data_insert

In [None]:
len(columns)

In [104]:
database_client.add_foreign_key_to_table(table_name="nodes",
                                         column_name="document_id",
                                         foreign_table="documents",
                                         foreign_column="doc_id",
                                         if_not_exists=False)

ForeignKeyViolation: insert or update on table "my_documents_nodes" violates foreign key constraint "my_documents_nodes_document_id_fk"
DETAIL:  Key (document_id)=(ed5c8675-eed5-4fec-a7d8-9bd831e6fc82) is not present in table "my_documents_documents".

In [None]:
database_client.create_index(
    table_name="nodes",
    column_name="embedding",
    index_config="Using vchordrq",
    if_not_exists=True
)

the index query is:  CREATE INDEX IF NOT EXISTS "my_documents_nodes_embedding_index" ON "my_documents_nodes"  Using vchordrq ("embedding" vector_l2_ops)


In [110]:
with database_client._transaction() as cursor:
    cursor.execute("delete from my_documents_documents")
    cursor.execute("delete from my_documents_nodes")