# Populating Neo4j Graph Database

In [8]:
import os
from langchain_community.graphs import Neo4jGraph

from dotenv import load_dotenv

load_dotenv()

neo4j_url = os.getenv("NEO4J_CONNECTION_URL")
neo4j_user = os.getenv("NEO4J_USER")
neo4j_password = os.getenv("NEO4J_PASSWORD")

# https://api.python.langchain.com/en/latest/graphs/langchain_community.graphs.neo4j_graph.Neo4jGraph.html
graph = Neo4jGraph(neo4j_url, neo4j_user, neo4j_password)

def wipe_graph(graph=graph):
    graph.query(query="MATCH (n) DETACH DELETE n")


#### Reading database and table metadata.

In [105]:
import json

with open("metadata.json", encoding='utf-8') as file:
    metadata = json.load(file)

wipe_graph()

#### Creating entities in Neo4j graph database

In [106]:
query_string = """
CREATE CONSTRAINT table_uniqueness_rule IF NOT EXISTS 
    FOR (tb:Table) REQUIRE tb.table_full_name IS UNIQUE
"""

graph.query(query=query_string)

query_string = """
CREATE CONSTRAINT column_uniqueness_rule IF NOT EXISTS 
    FOR (col:Column) REQUIRE col.column_name IS UNIQUE
"""

graph.query(query=query_string)

query_string = """
CREATE CONSTRAINT database_uniqueness_rule IF NOT EXISTS 
    FOR (db:Database) REQUIRE db.database_name IS UNIQUE
"""

graph.query(query=query_string)

[]

In [107]:
for table in metadata['table_metadata']:

    params = {
        "table_name": table['table_name'],
        "database_name": table['database_name'],
        "table_full_name": '.'.join([table['database_name'], table['table_name']]),
        "table_description": table['table_description'],
        "table_columns": [t['column_name'] for t in table['table_columns']],
        "table_primary_key": table['table_primary_key']
    }
    query_string = """
    CREATE (tb:Table {
        table_name: $table_name,
        database_name: $database_name,
        table_full_name: $table_full_name,
        table_description: $table_description,
        table_columns: $table_columns,
        table_primary_key: $table_primary_key
        }
    )
    """

    graph.query(query=query_string, params=params)

    print(f"Entity {table['table_name']} created successfully...")

Entity fact_orders created successfully...
Entity dim_order_items created successfully...
Entity dim_sellers created successfully...
Entity dim_products created successfully...
Entity dim_order_reviews created successfully...
Entity dim_order_payments created successfully...
Entity dim_geolocation created successfully...
Entity dim_customers created successfully...


In [108]:
for database in metadata['database_metadata']:

    for table in metadata['table_metadata']:

        params = {
            "database_name": database['database_name'],
            "database_description": database['database_description'],
            "table_name": table['table_name']
        }

        query_string = """
        MERGE (db:Database {database_name: $database_name})
        ON CREATE SET // On first execution creates the database entity
            db.database_description = $database_description,
            db.database_tables = [$table_name]
        ON MATCH SET // Updates table list in database entity if a database with the same name exists
            db.database_tables = db.database_tables + [$table_name]
        """

        graph.query(query=query_string, params=params)

In [109]:
query_string = """
    MATCH (tb:Table)
    MATCH (db:Database)
    WHERE tb.table_name in db.database_tables
    CREATE (db)-[:CONTAINS]->(tb)
"""

graph.query(query=query_string)

[]

In [110]:
for table in metadata['table_metadata']:
    for column in table['table_columns']:
        params = {
            "table_name": table['table_name'],
            "column_name": column['column_name'],
            "column_description": column['column_description'],
            "data_type": column['data_type']
        }

        query_string = """
        MATCH (tb:Table) WHERE tb.table_name = $table_name 
        MERGE (col:Column {column_name: $column_name})
        ON CREATE SET
            col.column_description = $column_description,
            col.data_type = $data_type,
            col.located_at = [$table_name],
            col.is_primary_key_at = [],
            col.is_foreign_key_at = []
        ON MATCH SET
            col.located_at = col.located_at + [tb.table_name]
        """

        graph.query(query=query_string, params=params)

In [111]:
query_string = """ 
    MATCH (tb:Table)
    MATCH (col:Column)
    WHERE tb.table_name in col.located_at
    CREATE (tb)-[:HAS_COLUMN]->(col)
"""

graph.query(query=query_string)

[]

In [112]:
query_string = """ 
    MATCH (tb:Table)
    MATCH (col:Column) WHERE col.column_name in tb.table_primary_key
    MERGE (col)-[pk:IS_PRIMARY_KEY]->(tb)
"""

graph.query(query=query_string)

[]

In [68]:
query_string = """
MATCH (tb:Table)
MATCH (col:Column)
WHERE (col)-[:IS_PRIMARY_KEY]->(tb)
SET col.is_foreign_key_at = apoc.coll.disjunction(col.located_at, [tb.table_name])
"""

graph.query(query=query_string)

[]

In [59]:
query_string = """ 
    MATCH (tb:Table)
    MATCH (col:Column) 
    WHERE tb.table_name IN col.is_foreign_key_at
    MERGE (col)-[pk:IS_FOREIGN_KEY]->(tb)
"""

graph.query(query=query_string)

[]

In [None]:
query_string = """ 
    MATCH (tb:Table)
    MATCH (col:Column) WHERE col.column_name in tb.table_primary_key
    SET col.is_primary_key_at = col.is_primary_key_at + tb.table_name
"""

graph.query(query=query_string)

In [None]:
query_string = """ 
    MATCH (tb:Table)
    MATCH (col:Column) WHERE col.column_name in tb.table_primary_key
    SET col.is_foreign_key_at = apoc.coll.disjunction(col.located_at, col.is_primary_key_at)
"""

graph.query(query=query_string)

In [None]:
query_string = """ 
    MATCH (tb:Table)
    MATCH (col:Column) WHERE tb.table_name in col.is_foreign_key_at
    MERGE (col)-[:IS_FOREIGN_KEY]->(tb)
"""

graph.query(query=query_string)

### Creating Vector Index

In [53]:
graph.query("""
         CREATE VECTOR INDEX `database_embeddings` IF NOT EXISTS
          FOR (db:Database) ON (db.text_embeddings) 
          OPTIONS { indexConfig: {
            `vector.dimensions`: 1536,
            `vector.similarity_function`: 'cosine'    
         }}
""")

graph.query("""
         CREATE VECTOR INDEX `table_embeddings` IF NOT EXISTS
          FOR (tb:Table) ON (tb.text_embeddings) 
          OPTIONS { indexConfig: {
            `vector.dimensions`: 1536,
            `vector.similarity_function`: 'cosine'    
         }}
""")

graph.query("""
         CREATE VECTOR INDEX `columns_embeddings` IF NOT EXISTS
          FOR (col:Column) ON (col.text_embeddings) 
          OPTIONS { indexConfig: {
            `vector.dimensions`: 1536,
            `vector.similarity_function`: 'cosine'    
         }}
""")

[]