# Simple queries and visuals

Take the data from LanceDB and form up some dataframes to load up KuzuDB with.  Then do a few simple visualizations to see relations.

Next steps:
- https://docs.kuzudb.com/extensions/vector/
- https://docs.kuzudb.com/extensions/full-text-search/


In [3]:
import kuzu
from yfiles_jupyter_graphs_for_kuzu import KuzuGraphWidget
import lancedb
import pandas as pd
import hashlib
from sentence_transformers import SentenceTransformer


In [4]:
model = SentenceTransformer("all-MiniLM-L6-v2")

  return torch._C._cuda_getDeviceCount() > 0


In [5]:
# Open a new in-memory database
db = kuzu.Database("./graphdb")
conn = kuzu.Connection(db)
conn.execute("INSTALL vector; LOAD vector;")

[<kuzu.query_result.QueryResult at 0x717db3cb9010>,
 <kuzu.query_result.QueryResult at 0x717db3e9b250>]

### Vector Query

In [6]:
query_vector = model.encode("convolutional neural network").tolist()
result = conn.execute(
    """
    CALL QUERY_VECTOR_INDEX(
        'CLAIMS',
        'text_vec_index',
        $query_vector,
        5
    )
    RETURN node.id, node.filename, node.text ORDER BY distance;
    """,
    {"query_vector": query_vector})

In [7]:
r = result.get_as_df()

In [8]:
r

Unnamed: 0,node.id,node.filename,node.text
0,2202.05901v2.pdf_conclusions_0,2202.05901v2.pdf,Convolutional neural networks can reliably cha...
1,2202.05901v2.pdf_Cardinal_0,2202.05901v2.pdf,This study demonstrates a convolutional neural...
2,2202.05901v2.pdf_futureDirections_0,2202.05901v2.pdf,"Implementing a single, physics-based loss func..."
3,2202.05901v2.pdf_Supporting_0,2202.05901v2.pdf,Focusing on the convolutional neural network's...
4,2202.05901v2.pdf_futureDirections_1,2202.05901v2.pdf,Exploring physics-based turbulence enhancement...


In [9]:
result = conn.execute(
    """
    CALL QUERY_VECTOR_INDEX(
        'CLAIMS',
        'text_vec_index',
        $query_vector,
        5
    )
    WITH node AS n, distance
    MATCH (n)-[b]->(c)
    RETURN n.id, b, c.filename, c.text
    """,
    {"query_vector": query_vector})

    # ORDER BY distance LIMIT 3;


In [10]:
r = result.get_as_df()

In [11]:
r

Unnamed: 0,node.id,b,c.filename,c.text
0,2202.05901v2.pdf_Cardinal_0,"{'_src': {'offset': 0, 'table': 1}, '_dst': {'...",2202.05901v2.pdf,convolutional neural network
1,2202.05901v2.pdf_Supporting_0,"{'_src': {'offset': 3, 'table': 1}, '_dst': {'...",2202.05901v2.pdf,convolutional neural network
2,2202.05901v2.pdf_conclusions_0,"{'_src': {'offset': 21, 'table': 1}, '_dst': {...",2202.05901v2.pdf,Transformer model
3,2202.05901v2.pdf_futureDirections_0,"{'_src': {'offset': 25, 'table': 1}, '_dst': {...",2202.05901v2.pdf,The CNNs
4,2202.05901v2.pdf_futureDirections_1,"{'_src': {'offset': 26, 'table': 1}, '_dst': {...",2202.05901v2.pdf,The CNNs


### Visualize

In [12]:
# Create a widget instance using the existing connection
g = KuzuGraphWidget(conn)

def get_node_color(node):
    node_type = node["properties"]["type"]
    if node_type == "source":
        return "blue"  # Or any color you prefer for source
    elif node_type == "claim":
        return "green" # Or any color you prefer for claim
    elif node_type == "entities":
        return "purple" # Or any color you prefer for entity
    else:
        return "gray"  # Default color for other types

g.add_node_configuration(
    "Claim",  # You might want to change this if the configuration is not just for "Person" nodes
    color=lambda node: get_node_color(node),  # type: ignore
    text=lambda node: {  # type: ignore
        "text": node["properties"]["type"],
        "position": "south",
    }
)
# set up configuration for the graph
# Custom configuration for nodes
# g.add_node_configuration(
#     "Claim",
#     color="red",   # type: ignore
#      text= lambda node : {   # type: ignore
#          "text": node["properties"]["type"],
#          "position": "south",
#     }
# )

# Display the entire graph

In [None]:
query_vector = model.encode("convolutional neural network").tolist()
g.show_cypher(
    """
    CALL QUERY_VECTOR_INDEX(
        'CLAIMS',
        'text_vec_index',
        $query_vector,
        5
    )
    WITH node AS n, distance
   MATCH (n)-[b]->(c)
    RETURN *
    ORDER BY distance LIMIT 3;
    """,
    {"query_vector": query_vector})



In [13]:
g.show_cypher("MATCH (a)-[b]->(c) RETURN *")


GraphWidget(layout=Layout(height='800px', width='100%'))

In [9]:
# g.show_cypher("MATCH (a)-[]->(intermediate_node {type: 'claim'})-[]->(c) RETURN *")
# g.show_cypher("MATCH conn_path = (a)-[]->(intermediate_node {type: 'claim'})-[]->(c) RETURN a, c, conn_path")
# g.show_cypher("MATCH conn_path = (a)-[]->(intermediate_node {type: 'claim'})-[]->(c) RETURN a, c, conn_path")
# MATCH p = (s {type: 'source'})-[*]-(e {type: 'entities'})
# RETURN p

In [10]:
# g.show_cypher("MATCH p = (start_node {type: 'source'})-[]->(intermediate_node)-[]->(end_node {type: 'entities'}) RETURN p")
g.show_cypher("MATCH p = (start_node {type: 'source'})-[]->(intermediate_node)-[]->(end_node {type: 'entities'}) RETURN start_node, end_node, p")

RuntimeError: Binder exception: Cannot find property type for start_node.