In [1]:
print("hello")

hello


In [2]:
# Basics to Kuzu Graph databases
# Our example dataframe: 4 Nodes (id_number, department, color, role)


import pandas as pd

id_number = [1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009]

department = ['hr', 'hr', 'it', 'media', 'design', 'design', 'dev', 'dev', 'dev', 'dev' ]

color = ['red', 'green', 'yellow', 'orange', 'red', 'green', 'yellow', 'orange', 'yellow', 'red']

role = ['intern', 'junior', 'senior', 'manager', 'senior', 'manager', 'junior', 'senior', 'senior', 'manager']



dict = {'id_number':id_number, 'department':department, 'color':color, "role":role}



#-----------------------------------

our_data = pd.DataFrame(dict)

our_data

Unnamed: 0,id_number,department,color,role
0,1000,hr,red,intern
1,1001,hr,green,junior
2,1002,it,yellow,senior
3,1003,media,orange,manager
4,1004,design,red,senior
5,1005,design,green,manager
6,1006,dev,yellow,junior
7,1007,dev,orange,senior
8,1008,dev,yellow,senior
9,1009,dev,red,manager


In [3]:
# Step 1
# Start by initializing your Kuzu database
# and create a connection for executing queries


import kuzu
import os
import shutil


# Kuzu requires a directory to store the database (db)
db_path = os.path.join(".", "db", "graph_db")
if os.path.exists(db_path):
    shutil.rmtree(db_path)
os.makedirs(db_path, exist_ok=True)

db = kuzu.Database(db_path)
conn = kuzu.Connection(db)



In [4]:
# Step 2
# Create a node table
 
# Follow Kuzu's syntax


# Use conn.execute to run a Kuzu query
# CREATE NODE TABLE is a command to create a node
# NAME_OF_THE_NODE( PROPERTY and DATA TYPE, PRIMARY KEY(PROPERTY) )

# This is the database schema. It outlines where content should be loaded

conn.execute("CREATE NODE TABLE Employee(id INT64, PRIMARY KEY(id))")
conn.execute("CREATE NODE TABLE Department(name STRING, PRIMARY KEY(name))")
conn.execute("CREATE NODE TABLE Color(name STRING, PRIMARY KEY(name))")
conn.execute("CREATE NODE TABLE Role(name STRING, PRIMARY KEY(name))")

# Every Node table created must have at least one property. One property is the primary key
# This ensures each Node is unique
# The 'Employee' node could have multiple properties like full name, age, salary, etc

<kuzu.query_result.QueryResult at 0x1d772233950>

In [5]:
# Step 3
# Define relationship tables. 

# Intuitively, 'Employee' connects to every node (FROM Employee TO others)
    # Employee works in a department
conn.execute("CREATE REL TABLE WORKS_IN(FROM Employee TO Department)")
    #Employees have a role in a company
conn.execute("CREATE REL TABLE HAS_ROLE(FROM Employee TO Role)")
    # Employee has turnover risk (color)
conn.execute("CREATE REL TABLE LEAVING_COLOR(FROM Employee TO Color)")


<kuzu.query_result.QueryResult at 0x1d772223790>

In [6]:
# Steps 2 and 3 establish an outline for the Kuzu Database to follow

In [7]:
# Step 4
# Load data using MERGE
# Without this step, our database won't have any content

for department in our_data['department'].unique():
    conn.execute(f"MERGE (d:Department {{name: '{department}'}})")
for color in our_data['color'].unique():
    conn.execute(f"MERGE (c:Color {{name: '{color}'}})")
for role in our_data['role'].unique():
    conn.execute(f"MERGE (r:Role {{name: '{role}'}})")
for _, row in our_data.iterrows():
    conn.execute(f"""
        MERGE (e:Employee {{id: {row['id_number']}}})
        MERGE (d:Department {{name: '{row['department']}'}})
        MERGE (c:Color {{name: '{row['color']}'}})
        MERGE (r:Role {{name: '{row['role']}'}})
        MERGE (e)-[:WORKS_IN]->(d)
        MERGE (e)-[:HAS_ROLE]->(r)
        MERGE (e)-[:LEAVING_COLOR]->(c)
    """)

In [8]:
# Step 5
# Query Interconnections and Visualize with yFiles
from yfiles_jupyter_graphs import GraphWidget

# Query interconnections (employees sharing roles across departments)
results = conn.execute("""
    MATCH (e1:Employee)-[:HAS_ROLE]->(r:Role)<-[:HAS_ROLE]-(e2:Employee),
          (e1)-[:WORKS_IN]->(d1:Department),
          (e2)-[:WORKS_IN]->(d2:Department)
    WHERE d1.name <> d2.name AND e1.id < e2.id
    RETURN e1.id, e2.id, r.name, d1.name, d2.name
    LIMIT 10
""")
interconnections = []
while results.has_next():
    row = results.get_next()
    interconnections.append({
        "Employee1": row[0],
        "Employee2": row[1],
        "Shared_Role": row[2],
        "Dept1": row[3],
        "Dept2": row[4]
    })
interconnections_df = pd.DataFrame(interconnections)
if not interconnections_df.empty:
    print(interconnections_df.to_string(index=False))

# Query all nodes and relationships for visualization
employee_nodes = conn.execute("MATCH (e:Employee) RETURN e.id").get_as_arrow().to_pandas()
department_nodes = conn.execute("MATCH (d:Department) RETURN d.name").get_as_arrow().to_pandas()
color_nodes = conn.execute("MATCH (c:Color) RETURN c.name").get_as_arrow().to_pandas()
role_nodes = conn.execute("MATCH (r:Role) RETURN r.name").get_as_arrow().to_pandas()
works_in_rels = conn.execute("MATCH (e:Employee)-[:WORKS_IN]->(d:Department) RETURN e.id, d.name").get_as_arrow().to_pandas()
has_role_rels = conn.execute("MATCH (e:Employee)-[:HAS_ROLE]->(r:Role) RETURN e.id, r.name").get_as_arrow().to_pandas()
leaving_color_rels = conn.execute("MATCH (e:Employee)-[:LEAVING_COLOR]->(c:Color) RETURN e.id, c.name").get_as_arrow().to_pandas()

# Prepare nodes for yFiles
nodes = []
node_ids = {}
index = 0
# Add Employee nodes
for _, row in employee_nodes.iterrows():
    node_ids[f"Employee:{row['e.id']}"] = index
    nodes.append({"id": index, "properties": {"label": f"Employee {row['e.id']}", "type": "Employee"}})
    index += 1
# Add Department nodes
for _, row in department_nodes.iterrows():
    node_ids[f"Department:{row['d.name']}"] = index
    nodes.append({"id": index, "properties": {"label": row['d.name'], "type": "Department"}})
    index += 1
# Add Color nodes
for _, row in color_nodes.iterrows():
    node_ids[f"Color:{row['c.name']}"] = index
    nodes.append({"id": index, "properties": {"label": row['c.name'], "type": "Color"}})
    index += 1
# Add Role nodes
for _, row in role_nodes.iterrows():
    node_ids[f"Role:{row['r.name']}"] = index
    nodes.append({"id": index, "properties": {"label": row['r.name'], "type": "Role"}})
    index += 1

# Prepare edges for yFiles
edges = []
for _, row in works_in_rels.iterrows():
    edges.append({"start": node_ids[f"Employee:{row['e.id']}"], "end": node_ids[f"Department:{row['d.name']}"], "properties": {"label": "WORKS_IN"}})
for _, row in has_role_rels.iterrows():
    edges.append({"start": node_ids[f"Employee:{row['e.id']}"], "end": node_ids[f"Role:{row['r.name']}"], "properties": {"label": "HAS_ROLE"}})
for _, row in leaving_color_rels.iterrows():
    edges.append({"start": node_ids[f"Employee:{row['e.id']}"], "end": node_ids[f"Color:{row['c.name']}"], "properties": {"label": "LEAVING_COLOR"}})

# Create and display graph
w = GraphWidget()
w.nodes = nodes
w.edges = edges
w.node_styles = {
    "Employee": {"color": "#3498db", "shape": "ellipse"},
    "Department": {"color": "#2ecc71", "shape": "rectangle"},
    "Color": {"color": "#e74c3c", "shape": "triangle"},
    "Role": {"color": "#f1c40f", "shape": "hexagon"}
}
w.set_graph_layout("organic")
w.show()  # Use w.save_to_html("graph.html") if not in Jupyter

 Employee1  Employee2 Shared_Role  Dept1  Dept2
      1003       1005     manager  media design
      1002       1004      senior     it design
      1003       1009     manager  media    dev
      1005       1009     manager design    dev
      1002       1008      senior     it    dev
      1004       1008      senior design    dev
      1002       1007      senior     it    dev
      1004       1007      senior design    dev
      1001       1006      junior     hr    dev


GraphWidget(layout=Layout(height='730px', width='100%'))