In [27]:
# Hack to make the module importable
import sys
sys.path.append(r'./../')

In [29]:
%load_ext autoreload
%autoreload 2
import neo4j
import pandas as pd

from data2neo.relational_modules.pandas import PandasDataFrameIterator
from data2neo import IteratorIterator
from data2neo import Converter
from data2neo.utils import load_file
from data2neo import register_subgraph_preprocessor

import data2neo.common_modules.types # For FLOAT, INT, etc. wrappers
# This is required because the pandas dataframe iterator will convert all values 
# to int64 which is not supported by neo4j

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [31]:
# Configure Logging
import logging

#logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger("data2neo")
logger.setLevel(logging.INFO)
log_formatter = logging.Formatter("%(asctime)s [%(threadName)s]::[%(levelname)s]::%(filename)s: %(message)s")
console_handler = logging.StreamHandler()
console_handler.setFormatter(log_formatter)
logger.addHandler(console_handler)

In [33]:
schema = """
ENTITY("orders"):
  NODE("Order") order:
    + orderID = INT(orders.OrderID)
    - shipName = orders.ShipName
  NODE("Product") product:
    + productID = INT(products.ProductID)
  NODE("Employee") employee:
    + employeeID = INT(employees.EmployeeID)
  
  RELATIONSHIP(order, "CONTAINS", product):
    - unitPrice = FLOAT(orders.UnitPrice)
    - quantity = FLOAT(orders.Quantity)

  RELATIONSHIP(employee, "SOLD", order):


ENTITY("suppliers"):
  NODE("Supplier") supplier:
    + supplierID = INT(suppliers.SupplierID)
    - companyName = suppliers.CompanyName


ENTITY("products"):
  NODE("Product") product:
    + productID = INT(products.ProductID)
    - productName = products.ProductName
    - unitPrice = FLOAT(products.UnitPrice)

  NODE("Supplier") supplier:
    + supplierID = INT(suppliers.SupplierID)
  
  NODE("Category") category:
    + categoryID = INT(categories.CategoryID)

  RELATIONSHIP(supplier, "SUPPLIES", product):
  
  RELATIONSHIP(product, "PART_OF", category):


ENTITY("employees"):
  NODE("Employee") employee:
    + employeeID = INT(employees.EmployeeID)
    - firstName = employees.FirstName
    - lastName = employees.LastName
    - title = employees.Title

  IF_HAS_BOSS(RELATIONSHIP(employee, "REPORTS_TO", MATCH("Employee", employeeID = INT(employees.ReportsTo)))):


ENTITY("categories"):
  NODE("Category") category:
    + categoryID = INT(categories.CategoryID)
    - categoryName = categories.CategoryName
    - description = categories.Description
"""

In [35]:
@register_subgraph_preprocessor
def IF_HAS_BOSS(resource):
    if pd.isna(resource["ReportsTo"]):
        return None
    return resource



In [37]:
uri = "bolt://localhost:7687"
auth = neo4j.basic_auth("neo4j", "password") # CHANGE TO YOUR CREDENTIALS

In [39]:
# Delete all nodes and relationships
driver = neo4j.GraphDatabase().driver(uri, auth=auth)
with driver.session() as session:
    session.run("MATCH (n) DETACH DELETE n")

In [41]:
# Create IteratorIterator
files = ["categories", "employees", "orders", "products", "suppliers"]
iterators = []
for file in files:
    df = pd.read_csv(f"https://raw.githubusercontent.com/neo4j-documentation/developer-resources/gh-pages/data/northwind/{file}.csv")
    iterators.append(PandasDataFrameIterator(df, file))

In [43]:
iterator = IteratorIterator(iterators)

In [45]:
converter = Converter(schema, iterator, uri, auth, num_workers=1, serialize=True)

In [47]:
from tqdm.notebook import tqdm
converter(progress_bar=tqdm)

  0%|          | 0/4556 [00:00<?, ?it/s]

2025-10-16 21:47:02,372 [MainThread]::[INFO]::converter.py: Starting serial processing.
2025-10-16 21:47:02,372 [MainThread]::[INFO]::converter.py: Starting serial processing.
2025-10-16 21:47:02,372 [MainThread]::[INFO]::converter.py: Starting creation of nodes.
2025-10-16 21:47:02,372 [MainThread]::[INFO]::converter.py: Starting creation of nodes.
2025-10-16 21:47:03,354 [MainThread]::[INFO]::converter.py: Starting creation of relations.
2025-10-16 21:47:03,354 [MainThread]::[INFO]::converter.py: Starting creation of relations.
2025-10-16 21:47:04,102 [MainThread]::[INFO]::converter.py: Processed in total 7695 nodes and 4472 relationships (this run took 1s)
2025-10-16 21:47:04,102 [MainThread]::[INFO]::converter.py: Processed in total 7695 nodes and 4472 relationships (this run took 1s)
