# Setup Neo4j Database for Processor Example

Run this notebook to initialize the database for the processor application. Set the parameters accordingly to match those you setup in the .env file in the /config folder (see README.MD)

In [17]:
%pip install neo4j
%pip install passlib bcrypt



Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [18]:
from neo4j import GraphDatabase
from neo4j.exceptions import Neo4jError
from passlib.context import CryptContext
from uuid import uuid4

In [19]:
# Configuration Parameters
NEO4J_URI = 'bolt://localhost:7687'
NEO4J_USERNAME = 'neo4j'
NEO4J_PASSWORD = 'password'  # Change this to your Neo4j password

DEFAULT_USER_UUID='00000000-0000-0000-0000-000000000000'  # Default user UUID
DEFAULT_USER_USERNAME='test'  # Default user name
DEFAULT_USER_NAME='test'  # Default user name
DEFAULT_USER_EMAIL="test@user.com"  # Default user email
DEFAULT_USER_PASSWORD = 'password'  # Default password for the user in your database

EMBEDDING_DIMENSION = 1536  # Example embedding dimension, adjust as needed


In [21]:
# Initialize Neo4j Driver
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

# Password hashing context
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")


In [22]:
def initialize_db():
    messages = []
    with driver.session() as session:
         # Create UUID constraint for User
        try:
            session.run("CREATE CONSTRAINT unique_user_uuid FOR (u:User) REQUIRE u.uuid IS UNIQUE")
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("UUID constraint for User already exists.")
            else:
                 messages.append(f"Error creating UUID constraint for User: {str(e)}")
        
        # Create Email constraint for User
        try:
            session.run("CREATE CONSTRAINT unique_user_email FOR (u:User) REQUIRE u.email IS UNIQUE")
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("Email constraint for User already exists.")
            else:
                 messages.append(f"Error creating Email constraint for User: {str(e)}")
        
        # Create default user    
        try:
            # generate bcrypt hash of the default password
            hashed_password = pwd_context.hash(DEFAULT_USER_PASSWORD)
            session.run("CREATE (u:User {uuid: $uuid, username: $username, email: $email, name: $name, password: $password, disabled: $disabled, datecreated: datetime()})",
                        {"uuid": str(DEFAULT_USER_UUID), "username": DEFAULT_USER_USERNAME, "email": DEFAULT_USER_EMAIL, "name": DEFAULT_USER_NAME, "password": hashed_password, "disabled": False})
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.ConstraintValidationFailed":
                messages.append("Default user already exists.")
            else:
                 messages.append("Error creating default user: {str(e)}")      

        # Create document uuid constraint
        try:
            session.run("CREATE CONSTRAINT document_unique_uuid FOR (u:Document) REQUIRE u.uuid IS UNIQUE")
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("UUID constraint for Document already exists.")
            else:
                raise (f"Error creating UUID constraint for Document: {str(e)}")
        
        # Create index for name on Document
        try:
            session.run("CREATE INDEX document_name FOR (n:Document) ON (n.name)")
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("Name index for Document already exists.")
            else:
                 messages.append (f"Error creating name index for Document: {str(e)}")
        
        # Create full text index for Document
        try: 
            session.run("CREATE FULLTEXT INDEX titlesAndDescriptions FOR (n:Document) ON EACH [n.name, n.summary, n.text]")
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("Full text index for Document already exists.")
            else:
                 messages.append (f"Error creating full text index for Document: {str(e)}")    

        # Create page uuid constraint
        try:
            session.run("CREATE CONSTRAINT page_unique_uuid FOR (u:Page) REQUIRE u.uuid IS UNIQUE")
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("UUID constraint for Page already exists.")
            else:
                 messages.append("Error creating UUID constraint for Page: {str(e)}")

        # Create full text index for Page
        try: 
            session.run("CREATE FULLTEXT INDEX pageNameAndText FOR (n:Page) ON EACH [n.name, n.summary, n.text]")
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("Full text index for Page already exists.")
            else:
                 messages.append(f"Error creating full text index for Page: {str(e)}")    
        
        # Create page uuid constraint
        try:
            session.run("CREATE CONSTRAINT child_unique_uuid FOR (u:Child) REQUIRE u.uuid IS UNIQUE")
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("UUID constraint for Child already exists.")
            else:
                messages.append(f"Error creating UUID constraint for Page: {str(e)}")

        # Create full text index for Child
        try: 
            session.run("CREATE FULLTEXT INDEX childNameAndText FOR (n:Child) ON EACH [n.name, n.summary, n.text]")
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("Full text index for Child already exists.")
            else:
                 messages.append(f"Error creating full text index for Page: {str(e)}")  
        # Create vector indexes
        # Create vector index for child
        try:
            session.run(
                "CALL db.index.vector.createNodeIndex('parent_document', 'Child', 'embedding', $dimension, 'cosine')",
                {"dimension": EMBEDDING_DIMENSION},
            )
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("Vector index for parent_document already exists.")
            else:
                 messages.append(f"Error creating full text index for Document: {str(e)}")    

        # Create vector index for pages
        try:
            session.run(
                "CALL db.index.vector.createNodeIndex('typical_rag', "
                "'Page', 'embedding', $dimension, 'cosine')",
                {"dimension": EMBEDDING_DIMENSION},
            )
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("Vector index for typical_rag already exists.")
            else:
                 messages.append(f"Error creating typical_rag: {str(e)}")    


        # Create vector index
        try:
            session.run(
                "CALL db.index.vector.createNodeIndex('hypothetical_questions', "
                "'Question', 'embedding', $dimension, 'cosine')",
                {"dimension": EMBEDDING_DIMENSION},
            )
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("Vector index for hypothetical_questions already exists.")
            else:
                 messages.append(f"Error creating vector index for hypothetical_questions: {str(e)}")    
         
        # Create vector index
        try:
            session.run(
                "CALL db.index.vector.createNodeIndex('summary', "
                "'Summary', 'embedding', $dimension, 'cosine')",
                {"dimension": EMBEDDING_DIMENSION},
            )
        except Neo4jError as e:
            if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
                messages.append("Vector index for summary exists.")
            else:
                messages.append(f"Error creating vector index for summary: {str(e)}")    
        
        # TODO: Add more initialization logic, like creating other indexes or constraints.

    if messages:
        return {"status": "Database initialized with warnings", "messages": messages}
    else:
        return {"status": "Database initialized successfully"}


In [23]:
# Call the function to initialize the database
initialize_db_result = initialize_db()
initialize_db_result

{'status': 'Database initialized successfully'}