# Database initialization

In [1]:
from neo4j import GraphDatabase

URI = "bolt://127.0.0.1:7687"
USER = "neo4j"
PASSWORD = "neo4jproject123"

DBS = [
    ("db1", 1),
    ("db2", 2),
    ("db3", 3),
]

In [2]:
def init_db(db_name, dataset_idx):
    driver = GraphDatabase.driver(URI, auth=(USER, PASSWORD), database=db_name)

    def run_query(query, params=None):
        with driver.session() as session:
            session.run(query, params or {})

    print(f"--- Initializing database: {db_name} (using dataset_{dataset_idx}) ---")

    # Clear DB
    run_query("MATCH (n) DETACH DELETE n")
    print("Cleared existing nodes and relationships")

    # Constraints
    constraints = [
        "CREATE CONSTRAINT customer_id IF NOT EXISTS FOR (c:Customer) REQUIRE c.id IS UNIQUE",
        "CREATE CONSTRAINT terminal_id IF NOT EXISTS FOR (t:Terminal) REQUIRE t.id IS UNIQUE",
        "CREATE CONSTRAINT transaction_id IF NOT EXISTS FOR (tr:Transaction) REQUIRE tr.id IS UNIQUE",
    ]
    for c in constraints:
        run_query(c)
    print("Constraints created")

    # Load Customers
    q = (
        "LOAD CSV WITH HEADERS FROM 'file:///dataset_%d_customers.csv' AS row\n"
        "CALL {\n"
        "    WITH row\n"
        "    MERGE (c:Customer {id: row.CUSTOMER_ID})\n"
        "    SET c.lat = toFloat(row.x_customer_id),\n"
        "        c.lon = toFloat(row.y_customer_id),\n"
        "        c.amountMean = toFloat(row.mean_amount),\n"
        "        c.amountStd = toFloat(row.std_amount),\n"
        "        c.meanDailyTransactions = toFloat(row.mean_nb_tx_per_day)\n"
        "} IN TRANSACTIONS OF 500 ROWS"
    ) % dataset_idx
    run_query(q)
    print("Customers loaded")

    # Load Terminals
    q = (
        "LOAD CSV WITH HEADERS FROM 'file:///dataset_%d_terminals.csv' AS row\n"
        "CALL {\n"
        "    WITH row\n"
        "    MERGE (t:Terminal {id: row.TERMINAL_ID})\n"
        "    SET t.lat = toFloat(row.x_terminal_id),\n"
        "        t.lon = toFloat(row.y_terminal_id)\n"
        "} IN TRANSACTIONS OF 500 ROWS"
    ) % dataset_idx
    run_query(q)
    print("Terminals loaded")

    # Load Transactions
    q = (
        "LOAD CSV WITH HEADERS FROM 'file:///dataset_%d.csv' AS row\n"
        "CALL {\n"
        "    WITH row\n"
        "    CREATE (tr:Transaction {\n"
        "        id: row.TRANSACTION_ID,\n"
        "        dateTime: row.TX_DATETIME,\n"
        "        amount: toFloat(row.TX_AMOUNT),\n"
        "        isFraud: row.TX_FRAUD = \"1\"\n"
        "    })\n"
        "} IN TRANSACTIONS OF 1000 ROWS"
    ) % dataset_idx
    run_query(q)
    print("Transactions loaded")

    # USE relationships (Customer -> Terminal)
    q = (
        "LOAD CSV WITH HEADERS FROM 'file:///dataset_%d_customers.csv' AS row\n"
        "CALL {\n"
        "  WITH row\n"
        "  WITH row, replace(replace(row.available_terminals, '[', ''), ']', '') AS terms_str\n"
        "  UNWIND split(terms_str, ',') AS t_id\n"
        "  WITH row, trim(t_id) AS t_id_clean\n"
        "  WHERE t_id_clean <> ''\n"
        "  MATCH (c:Customer {id: row.CUSTOMER_ID})\n"
        "  MATCH (t:Terminal {id: t_id_clean})\n"
        "  MERGE (c)-[:USE]->(t)\n"
        "} IN TRANSACTIONS OF 1000 ROWS"
    ) % dataset_idx
    run_query(q)
    print("USE relationships created")

    # PERFORM relationships (Customer -> Transaction)
    q = (
        "LOAD CSV WITH HEADERS FROM 'file:///dataset_%d.csv' AS row\n"
        "CALL {\n"
        "  WITH row\n"
        "  MATCH (c:Customer {id: row.CUSTOMER_ID})\n"
        "  MATCH (tr:Transaction {id: row.TRANSACTION_ID})\n"
        "  MERGE (c)-[:PERFORM]->(tr)\n"
        "} IN TRANSACTIONS OF 2000 ROWS"
    ) % dataset_idx
    run_query(q)
    print("PERFORM relationships created")

    # ON relationships (Transaction -> Terminal)
    q = (
        "LOAD CSV WITH HEADERS FROM 'file:///dataset_%d.csv' AS row\n"
        "CALL {\n"
        "  WITH row\n"
        "  MATCH (tr:Transaction {id: row.TRANSACTION_ID})\n"
        "  MATCH (t:Terminal {id: row.TERMINAL_ID})\n"
        "  MERGE (tr)-[:ON]->(t)\n"
        "} IN TRANSACTIONS OF 2000 ROWS"
    ) % dataset_idx
    run_query(q)
    print("ON relationships created")

    driver.close()
    print(f"--- Finished initializing {db_name} ---\n")

In [3]:
init_db("db1", 1)

--- Initializing database: db1 (using dataset_1) ---
Cleared existing nodes and relationships
Constraints created




Customers loaded
Terminals loaded




Transactions loaded




USE relationships created




PERFORM relationships created




ON relationships created
--- Finished initializing db1 ---



In [5]:
init_db("db2", 2)

--- Initializing database: db2 (using dataset_2) ---
Cleared existing nodes and relationships
Constraints created




Customers loaded
Terminals loaded




Transactions loaded




USE relationships created




PERFORM relationships created




ON relationships created
--- Finished initializing db2 ---



In [6]:
init_db("db3", 3)

--- Initializing database: db3 (using dataset_3) ---
Cleared existing nodes and relationships
Constraints created




Customers loaded
Terminals loaded




Transactions loaded
USE relationships created




PERFORM relationships created




ON relationships created
--- Finished initializing db3 ---

