In [1]:
from google.cloud import bigquery
import google.auth

from neo4j import GraphDatabase

import os
import json
from dotenv import dotenv_values
import time

from main import NodeImportBenchmark, NodeQueryBenchmark

from base.operations import NoSQLKnowledgeGraph
from databases.firestore_kg import FirestoreKG
from databases.n4j import AuraKG
from databases.mdb import MongoKG

from datamodel.data_model import NodeData, EdgeData, CommunityData

  from .autonotebook import tqdm as notebook_tqdm


#### Setting env and global variables

In [2]:
secrets = dotenv_values("../.env")
credentials, _ = google.auth.load_credentials_from_file(secrets["GCP_CREDENTIAL_FILE"])

## Firestore Knowledge Graph vs. AuraDB (Neo4J) latency comparison

#### Define Knowledge Graph DB Interface Options

In [3]:
fskg = FirestoreKG(gcp_credential_file=secrets["GCP_CREDENTIAL_FILE"],
                   gcp_project_id=str(secrets["GCP_PROJECT_ID"]),
                   firestore_db_id=str(secrets["WIKIDATA_FS_DB"]),
                   node_collection_id=str(secrets["NODE_COLL_ID"]),
                   edges_collection_id=str(secrets["EDGES_COLL_ID"]),
                   community_collection_id=str(
                       secrets["COMM_COLL_ID"])
                   )

aura_kg = AuraKG(uri=str(secrets["NEO4J_URI"]),
                 auth=(str(secrets["NEO4J_USERNAME"]),
                       str(secrets["NEO4J_PASSWORD"]))
                 )

mdb_username = str(secrets["MDB_USERNAME"])
mdb_passowrd = str(secrets["MDB_PASSWORD"])
mdb_cluster = str(secrets["MDB_CLUSTER"])
mdb_uri = f"mongodb+srv://{mdb_username}:{mdb_passowrd}@cluster0.pjx3w.mongodb.net/?retryWrites=true&w=majority&appName={mdb_cluster}"

mkg = MongoKG(
    mdb_uri=mdb_uri,
    mdb_db_id=str(secrets["MDB_DB_ID"]),
    node_coll_id=str(secrets["NODE_COLL_ID"]),
    edges_coll_id=str(secrets["EDGES_COLL_ID"]),
    community_collection_id=str(secrets["COMM_COLL_ID"])
)

Pinged your deployment. You successfully connected to MongoDB!


### Importing Nodes Comparison

#### Fetch graph data from BigQuery

In [4]:
import_lim = 100
task_index = int(os.getenv('CLOUD_RUN_TASK_INDEX', '0'))
task_count = int(os.getenv('CLOUD_RUN_TASK_COUNT', '1'))
rows_per_task = import_lim // task_count
offset = task_index * rows_per_task + 302300

print(
    f'$$$$ Task Index {task_index}, Task Count {task_count}, Offset {offset}')

# Fetch Node data from BigQuery
client = bigquery.Client(project=str(
    secrets["GCP_PROJECT_ID"]), credentials=credentials)

query_job = client.query(
    f"SELECT * FROM poerschmann-sem-search.wikidata_kg.entity_doc_alias_joined LIMIT {rows_per_task} OFFSET {offset}")

$$$$ Task Index 0, Task Count 1, Offset 302300


#### Run Node Import Benchmark

In [5]:
add_nodes_testing = NodeImportBenchmark(
    benchmark_name="Node Import", import_lim=import_lim, options_dict={"Firestore": fskg, "Mongo": mkg, "Aura": aura_kg})
add_nodes_testing(records=query_job)

$$$$ Starting Benchmark Node Import with options: ['Firestore', 'Mongo', 'Aura'] $$$$
Firestore time for 100 Node Import: 3.320274591445923
Mongo time for 100 Node Import: 2.5162739753723145
Aura time for 100 Node Import: 1.850555419921875
hEllO wOrlD!


### Querying Nodes Comparison

In [7]:
query_nodes_testing = NodeQueryBenchmark(
    benchmark_name="Node Query", import_lim=import_lim, options_dict={"Firestore": fskg, "Mongo": mkg, "Aura": aura_kg})
query_nodes_testing(records=query_job)

$$$$ Starting Benchmark Node Query with options: ['Firestore', 'Mongo', 'Aura'] $$$$
Firestore time for 100 Node Query: 0.9357259273529053
Mongo time for 100 Node Query: 0.45427942276000977
Aura time for 100 Node Query: 6.676049470901489
hEllO wOrlD!


### Querying deeply nested structures comparison

Challenge: Finding friends of friends of "Q901" (2nd degree directed)

In [8]:
f0f_list = []

node_data = fskg.get_node(node_uid='Q901')

for e in node_data.edges_from:
    neigh_node = fskg.get_node(node_uid=e)
    f0f_list.append(neigh_node.edges_from)

len(sum(f0f_list, []))

113

In [10]:
# f0f_list = []

# node_data = mkg.get_node(node_uid='Q901')

# for e in node_data.edges_from:
#     neigh_node = mkg.get_node(node_uid=e)
#     f0f_list.append(neigh_node.edges_from)

# len(sum(f0f_list, []))

f0f_list

In [11]:
records, summary, keys = aura_kg.driver.execute_query(
        "MATCH (n)-[]-()-[]-(result) WHERE n.node_uid = 'Q901' RETURN result")

len(records)

53

In [10]:
records, summary, keys = aura_kg.driver.execute_query(
        """MATCH (n)-[:DIRECTED]-()-[:DIRECTED]-(result)
            WHERE n.node_uid = 'Q901'
            RETURN result""")

len(records)

53

In [11]:
records, summary, keys = aura_kg.driver.execute_query(
        """MATCH (n)-[:DIRECTED*2]-(result)
            WHERE n.node_uid = 'Q901'
            RETURN result""")

len(records)

53

Challenge 2: Finding friends of friends of friends "Q901" (3rd degree undirected)

In [26]:
f0fof_list = []

node_data = fskg.get_node(node_uid='Q901')

for e in node_data.edges_from + node_data.edges_to:
    neigh_node = fskg.get_node(node_uid=e)
    for e2 in neigh_node.edges_from + neigh_node.edges_to:
        neigh_node2 = fskg.get_node(node_uid=e2)
        f0fof_list.append(neigh_node2.edges_from)
        f0fof_list.append(neigh_node2.edges_to)

len(sum(f0fof_list, []))

51947

In [27]:
with GraphDatabase.driver(uri=aura_kg.uri, auth=aura_kg.auth) as driver:
    driver.verify_connectivity()

    # Use a parameter for node_uid in the Cypher query
    records, summary, keys = driver.execute_query(
        "MATCH (n)-[]-()-[]-()-[]-(result) WHERE n.node_uid = 'Q901' RETURN result")

len(records)

10078

### Running Community Identification Comparison

tbd in comparison