# Database initialization

In [10]:
from neo4j import GraphDatabase

URI = "bolt://127.0.0.1:7687"
USER = "neo4j"
PASSWORD = "neo4jproject123"

DBS = [
    ("db1", 1),
    ("db2", 2),
    ("db3", 3),
]

In [11]:
def init_db(db_name, dataset_idx):

    driver = GraphDatabase.driver(URI, auth=(USER, PASSWORD), database=db_name)

    def run_query(query, params=None):
        with driver.session() as session:
            session.run(query, params or {})

    print(f"--- Initializing database: {db_name} (using dataset_{dataset_idx}) ---")

    # Clear DB
    run_query("MATCH (n) DETACH DELETE n")
    print("Cleared existing nodes and relationships")

    # Constraints
    constraints = [
        "CREATE CONSTRAINT customer_id IF NOT EXISTS FOR (c:Customer) REQUIRE c.id IS UNIQUE",
        "CREATE CONSTRAINT terminal_id IF NOT EXISTS FOR (t:Terminal) REQUIRE t.id IS UNIQUE",
        "CREATE CONSTRAINT transaction_id IF NOT EXISTS FOR (tr:Transaction) REQUIRE tr.id IS UNIQUE",
    ]
    for c in constraints:
        run_query(c)
    print("Constraints created")

    # Load Customers
    q = f"""
    LOAD CSV WITH HEADERS FROM 'file:///dataset_{dataset_idx}_customers.csv' AS row
    CALL {{
        WITH row
        MERGE (c:Customer {{id: row.CUSTOMER_ID}})
        SET c.lat = toFloat(row.x_customer_id),
            c.lon = toFloat(row.y_customer_id),
            c.amountMean = toFloat(row.mean_amount),
            c.amountStd = toFloat(row.std_amount),
            c.meanDailyTransactions = toFloat(row.mean_nb_tx_per_day)
    }} IN TRANSACTIONS OF 500 ROWS
    """
    run_query(q)
    print("Customers loaded")

    # Load Terminals
    q = f"""
    LOAD CSV WITH HEADERS FROM 'file:///dataset_{dataset_idx}_terminals.csv' AS row
    CALL {{
        WITH row
        MERGE (t:Terminal {{id: row.TERMINAL_ID}})
        SET t.lat = toFloat(row.x_terminal_id),
            t.lon = toFloat(row.y_terminal_id)
    }} IN TRANSACTIONS OF 500 ROWS
    """
    run_query(q)
    print("Terminals loaded")

    # Load Transactions
    q = f"""
    LOAD CSV WITH HEADERS FROM 'file:///dataset_{dataset_idx}.csv' AS row
    CALL {{
        WITH row
        CREATE (tr:Transaction {{
            id: row.TRANSACTION_ID,
            dateTime: datetime(replace(row.TX_DATETIME, ' ', 'T')),
            amount: toFloat(row.TX_AMOUNT),
            isFraud: row.TX_FRAUD = '1'
        }})
    }} IN TRANSACTIONS OF 1000 ROWS
    """
    run_query(q)
    print("Transactions loaded")

    # USE relationships (Customer -> Terminal)
    q = f"""
    LOAD CSV WITH HEADERS FROM 'file:///dataset_{dataset_idx}_customers.csv' AS row
    CALL {{
        WITH row
        WITH row, replace(replace(row.available_terminals, '[', ''), ']', '') AS terms_str
        UNWIND split(terms_str, ',') AS t_id
        WITH row, trim(t_id) AS t_id_clean
        WHERE t_id_clean <> ''
        MATCH (c:Customer {{id: row.CUSTOMER_ID}})
        MATCH (t:Terminal {{id: t_id_clean}})
        MERGE (c)-[:USE]->(t)
    }} IN TRANSACTIONS OF 1000 ROWS
    """
    run_query(q)
    print("USE relationships created")

    # PERFORM relationships (Customer -> Transaction)
    q = f"""
    LOAD CSV WITH HEADERS FROM 'file:///dataset_{dataset_idx}.csv' AS row
    CALL {{
        WITH row
        MATCH (c:Customer {{id: row.CUSTOMER_ID}})
        MATCH (tr:Transaction {{id: row.TRANSACTION_ID}})
        MERGE (c)-[:PERFORM]->(tr)
    }} IN TRANSACTIONS OF 2000 ROWS
    """
    run_query(q)
    print("PERFORM relationships created")

    # ON relationships (Transaction -> Terminal)
    q = f"""
    LOAD CSV WITH HEADERS FROM 'file:///dataset_{dataset_idx}.csv' AS row
    CALL {{
        WITH row
        MATCH (tr:Transaction {{id: row.TRANSACTION_ID}})
        MATCH (t:Terminal {{id: row.TERMINAL_ID}})
        MERGE (tr)-[:ON]->(t)
    }} IN TRANSACTIONS OF 2000 ROWS
    """
    run_query(q)
    print("ON relationships created")

    driver.close()
    print(f"--- Finished initializing {db_name} ---\n")


In [12]:
init_db("db1", 1)

--- Initializing database: db1 (using dataset_1) ---


ServiceUnavailable: Couldn't connect to 127.0.0.1:7687 (resolved to ('127.0.0.1:7687',)):
Failed to read four byte Bolt handshake response from server ResolvedIPv4Address(('127.0.0.1', 7687)) (deadline Deadline(timeout=60.0))

In [5]:
init_db("db2", 2)

--- Initializing database: db2 (using dataset_2) ---
Cleared existing nodes and relationships
Constraints created




Customers loaded
Terminals loaded




Transactions loaded




USE relationships created




PERFORM relationships created




ON relationships created
--- Finished initializing db2 ---



In [6]:
init_db("db3", 3)

--- Initializing database: db3 (using dataset_3) ---
Cleared existing nodes and relationships
Constraints created




Customers loaded
Terminals loaded




Transactions loaded
USE relationships created




PERFORM relationships created




ON relationships created
--- Finished initializing db3 ---

