In [None]:
# Hack to make the module importable
import sys
sys.path.append(r'./../')

In [None]:
%load_ext autoreload
%autoreload 2
import neo4j
import pandas as pd

from rel2graph.relational_modules.pandas import PandasDataFrameIterator
from rel2graph import IteratorIterator
from rel2graph import Converter
from rel2graph.utils import load_file
from rel2graph import register_subgraph_preprocessor

import rel2graph.common_modules.types # For FLOAT, INT, etc. wrappers
# This is required because the pandas dataframe iterator will convert all values 
# to int64 which is not supported by neo4j

In [None]:
# Configure Logging
import logging

#logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger("rel2graph")
logger.setLevel(logging.INFO)
log_formatter = logging.Formatter("%(asctime)s [%(threadName)s]::[%(levelname)s]::%(filename)s: %(message)s")
console_handler = logging.StreamHandler()
console_handler.setFormatter(log_formatter)
logger.addHandler(console_handler)

In [None]:
schema = """
ENTITY("orders"):
  NODE("Order") order:
    + orderID = INT(orders.OrderID)
    - shipName = orders.ShipName
  NODE("Product") product:
    + productID = INT(products.ProductID)
  NODE("Employee") employee:
    + employeeID = INT(employees.EmployeeID)
  
  RELATIONSHIP(order, "CONTAINS", product):
    - unitPrice = FLOAT(orders.UnitPrice)
    - quantity = FLOAT(orders.Quantity)

  RELATIONSHIP(employee, "SOLD", order):


ENTITY("suppliers"):
  NODE("Supplier") supplier:
    + supplierID = INT(suppliers.SupplierID)
    - companyName = suppliers.CompanyName


ENTITY("products"):
  NODE("Product") product:
    + productID = INT(products.ProductID)
    - productName = products.ProductName
    - unitPrice = FLOAT(products.UnitPrice)

  NODE("Supplier") supplier:
    + supplierID = INT(suppliers.SupplierID)
  
  NODE("Category") category:
    + categoryID = INT(categories.CategoryID)

  RELATIONSHIP(supplier, "SUPPLIES", product):
  
  RELATIONSHIP(product, "PART_OF", category):


ENTITY("employees"):
  NODE("Employee") employee:
    + employeeID = INT(employees.EmployeeID)
    - firstName = employees.FirstName
    - lastName = employees.LastName
    - title = employees.Title

  IF_HAS_BOSS(RELATIONSHIP(employee, "REPORTS_TO", MATCH("Employee", employeeID = INT(employees.ReportsTo)))):


ENTITY("categories"):
  NODE("Category") category:
    + categoryID = INT(categories.CategoryID)
    - categoryName = categories.CategoryName
    - description = categories.Description
"""

In [None]:
@register_subgraph_preprocessor
def IF_HAS_BOSS(resource):
    if pd.isna(resource["ReportsTo"]):
        return None
    return resource

In [None]:
uri = "bolt://localhost:7687"
auth = neo4j.basic_auth("neo4j", "password") # CHANGE TO YOUR CREDENTIALS

In [None]:
# Delete all nodes and relationships
driver = neo4j.GraphDatabase().driver(uri, auth=auth)
with driver.session() as session:
    session.run("MATCH (n) DETACH DELETE n")

In [None]:
# Create IteratorIterator
files = ["categories", "employees", "orders", "products", "suppliers"]
iterators = []
for file in files:
    df = pd.read_csv(f"https://raw.githubusercontent.com/neo4j-documentation/developer-resources/gh-pages/data/northwind/{file}.csv")
    iterators.append(PandasDataFrameIterator(df, file))

In [None]:
iterator = IteratorIterator(iterators)

In [None]:
converter = Converter(schema, iterator, uri, auth, num_workers=1, serialize=True)

In [None]:
from tqdm.notebook import tqdm
converter(progress_bar=tqdm)