# Optional: Load Sample Data

This notebook shows how to load sample CSV data into Neo4j. This is optional and only needed if you want to experiment with sample data.

In [None]:
from neo4j import GraphDatabase
from dotenv import load_dotenv
import os
import pandas as pd

# Connect to Neo4j
load_dotenv()
uri = os.getenv('NEO4J_URI')
auth = (os.getenv('NEO4J_USERNAME'), os.getenv('NEO4J_PASSWORD'))
driver = GraphDatabase.driver(uri, auth=auth)

## Clear Existing Data

First, let's clear any existing data from the database:

In [None]:
def clear_database():
    """Clear all nodes and relationships from the database."""
    with driver.session() as session:
        session.run("MATCH (n) DETACH DELETE n")
        print("Cleared database")

clear_database()

## Load Customer Data

Now let's load the customer data from customers.csv:

In [None]:
def load_customers():
    """Load customer data into Neo4j."""
    # Read CSV file
    customers_df = pd.read_csv('../data/customers.csv')
    
    with driver.session() as session:
        # Create Customer nodes
        session.run("""
        UNWIND $customers AS customer
        CREATE (c:Customer {
            id: customer.customer_id,
            name: customer.name,
            email: customer.email,
            join_date: date(customer.join_date)
        })
        """, customers=customers_df.to_dict('records'))
        print("Loaded customers")

load_customers()

## Load Product Data

Next, let's load the product data and create category relationships:

In [None]:
def load_products():
    """Load product data into Neo4j."""
    # Read CSV file
    products_df = pd.read_csv('../data/products.csv')
    
    with driver.session() as session:
        # Create Product and Category nodes
        session.run("""
        UNWIND $products AS product
        MERGE (cat:Category {name: product.category})
        CREATE (p:Product {
            id: product.product_id,
            name: product.name,
            price: toFloat(product.price)
        })
        CREATE (p)-[:IN_CATEGORY]->(cat)
        """, products=products_df.to_dict('records'))
        print("Loaded products")

load_products()

## Load Order Data

Finally, let's load the order data and create relationships to customers and products:

In [None]:
def load_orders():
    """Load order data into Neo4j."""
    # Read CSV file
    orders_df = pd.read_csv('../data/orders.csv')
    
    with driver.session() as session:
        # Create Order nodes and relationships
        session.run("""
        UNWIND $orders AS order
        MATCH (c:Customer {id: order.customer_id})
        MATCH (p:Product {id: order.product_id})
        CREATE (o:Order {
            id: order.order_id,
            date: date(order.order_date),
            quantity: toInteger(order.quantity)
        })
        CREATE (c)-[:PLACED_ORDER]->(o)
        CREATE (o)-[:CONTAINS]->(p)
        """, orders=orders_df.to_dict('records'))
        print("Loaded orders")

load_orders()

## Verify Data Loading

Let's verify that our data was loaded correctly:

In [None]:
def count_nodes():
    """Count nodes by label."""
    with driver.session() as session:
        result = session.run("""
        MATCH (n)
        RETURN labels(n)[0] as label, count(*) as count
        """)
        for record in result:
            print(f"{record['label']}: {record['count']} nodes")

print("Node counts:")
count_nodes()

In [None]:
def verify_relationships():
    """Verify relationships between nodes."""
    with driver.session() as session:
        # Check customer orders
        result = session.run("""
        MATCH (c:Customer)-[:PLACED_ORDER]->(o:Order)-[:CONTAINS]->(p:Product)
        RETURN c.name as customer, p.name as product, o.date as date
        LIMIT 5
        """)
        print("\nSample orders:")
        for record in result:
            print(f"{record['customer']} ordered {record['product']} on {record['date']}")
        
        # Check product categories
        result = session.run("""
        MATCH (p:Product)-[:IN_CATEGORY]->(c:Category)
        RETURN c.name as category, count(p) as product_count
        """)
        print("\nProducts by category:")
        for record in result:
            print(f"{record['category']}: {record['product_count']} products")

verify_relationships()

## Next Steps

Now that we have sample data loaded, you can proceed with the other notebooks to learn about:
1. Knowledge Graph concepts
2. Document processing
3. GraphRAG patterns
4. Memory Graph