# Database initialization

In [40]:
from neo4j import GraphDatabase

URI = "bolt://127.0.0.1:7687"
USER = "neo4j"
PASSWORD = "neo4jproject123"
DB = "test3"

driver = GraphDatabase.driver(URI, auth=(USER, PASSWORD), database=DB)

In [41]:
def run_query(query, params=None):
    with driver.session() as session:
        return session.run(query, params or {})

In [42]:
query = """
MATCH (n) DETACH DELETE n
"""

run_query(query)
print("Database cleared.")


Database cleared.


In [43]:
#Costrains
constraints = [
    "CREATE CONSTRAINT customer_id IF NOT EXISTS FOR (c:Customer) REQUIRE c.id IS UNIQUE",
    "CREATE CONSTRAINT terminal_id IF NOT EXISTS FOR (t:Terminal) REQUIRE t.id IS UNIQUE",
    "CREATE CONSTRAINT transaction_id IF NOT EXISTS FOR (tr:Transaction) REQUIRE tr.id IS UNIQUE"
]

for c in constraints:
    run_query(c)

print("Constraints created.")


Constraints created.


# DB POPULATION

## Load using LOAD CSV

In [44]:
# Load Customers
query = """
LOAD CSV WITH HEADERS FROM 'file:///dataset_1_customers.csv' AS row
CALL (row) {
    WITH row
    MERGE (c:Customer {id: row.CUSTOMER_ID})
    SET c.lat = toFloat(row.x_customer_id),
        c.lon = toFloat(row.y_customer_id),
        c.amountMean = toFloat(row.mean_amount),
        c.amountStd = toFloat(row.std_amount),
        c.meanDailyTransactions = toFloat(row.mean_nb_tx_per_day)
} IN TRANSACTIONS OF 500 ROWS
"""

run_query(query)
print("Customers loaded.")

Customers loaded.


In [45]:
# Load Terminals
query = """
LOAD CSV WITH HEADERS FROM 'file:///dataset_1_terminals.csv' AS row
CALL (row) {
    WITH row
    MERGE (t:Terminal {id: row.TERMINAL_ID})
    SET t.lat = toFloat(row.x_terminal_id),
        t.lon = toFloat(row.y_terminal_id)
} IN TRANSACTIONS OF 500 ROWS
"""

run_query(query)
print("Terminals loaded.")

Terminals loaded.


In [46]:
query = """
LOAD CSV WITH HEADERS FROM 'file:///dataset_1.csv' AS row
CALL (row) {
    WITH row
    CREATE (tr:Transaction {
        id: row.TRANSACTION_ID,
        dateTime: row.TX_DATETIME,
        amount: toFloat(row.TX_AMOUNT),
        isFraud: row.TX_FRAUD = "1"
    })
} IN TRANSACTIONS OF 1000 ROWS
"""

run_query(query)
print("Transactions loaded.")

Transactions loaded.


## Relationships

In [56]:
# Bulk USE relationships (Customer -> Terminal)
# Using string split approach (no APOC required)
query = """
LOAD CSV WITH HEADERS FROM 'file:///dataset_1_customers.csv' AS row
CALL (row) {
  WITH row
  WITH row, replace(replace(row.available_terminals, '[', ''), ']', '') AS terms_str
  UNWIND split(terms_str, ',') AS t_id
  WITH row, trim(t_id) AS t_id_clean
  WHERE t_id_clean <> ''
  MATCH (c:Customer {id: row.CUSTOMER_ID})
  MATCH (t:Terminal {id: t_id_clean})
  MERGE (c)-[:USE]->(t)
} IN TRANSACTIONS OF 1000 ROWS
"""

run_query(query)
print("USE relationships created in bulk.")

USE relationships created in bulk.


In [51]:
# Bulk PERFORM relationships (Customer -> Transaction)
# Uses a single pass of dataset_1.csv with UNWIND for speed
query = """
LOAD CSV WITH HEADERS FROM 'file:///dataset_1.csv' AS row
CALL (row) {
  WITH row
  MATCH (c:Customer {id: row.CUSTOMER_ID})
  MATCH (tr:Transaction {id: row.TRANSACTION_ID})
  MERGE (c)-[:PERFORM]->(tr)
} IN TRANSACTIONS OF 2000 ROWS
"""

run_query(query)
print("PERFORM relationships created in bulk.")

PERFORM relationships created in bulk.


In [53]:
# Bulk ON relationships (Terminal -> Transaction)
query = """
LOAD CSV WITH HEADERS FROM 'file:///dataset_1.csv' AS row
CALL (row) {
  WITH row
  MATCH (tr:Transaction {id: row.TRANSACTION_ID})
  MATCH (t:Terminal {id: row.TERMINAL_ID})
  MERGE (tr)-[:ON]->(t)
} IN TRANSACTIONS OF 2000 ROWS
"""

run_query(query)
print("ON relationships created in bulk.")

ON relationships created in bulk.


# End of connection

In [None]:
driver.close()
print("Connection closed.")
