# Database initialization

In [None]:
from neo4j import GraphDatabase

URI = "bolt://127.0.0.1:7687"
USER = "neo4j"
PASSWORD = "neo4jproject123"
DB = "test2"

driver = GraphDatabase.driver(URI, auth=(USER, PASSWORD), database=DB)

In [16]:
def run_query(query, params=None):
    with driver.session() as session:
        return session.run(query, params or {})

In [None]:
query = """
MATCH (n) DETACH DELETE n
"""

run_query(query)
print("Database cleared.")


Database cleared.


In [18]:
#Costrains
constraints = [
    "CREATE CONSTRAINT customer_id IF NOT EXISTS FOR (c:Customer) REQUIRE c.id IS UNIQUE",
    "CREATE CONSTRAINT terminal_id IF NOT EXISTS FOR (t:Terminal) REQUIRE t.id IS UNIQUE",
    "CREATE CONSTRAINT transaction_id IF NOT EXISTS FOR (tr:Transaction) REQUIRE tr.id IS UNIQUE"
]

for c in constraints:
    run_query(c)

print("Constraints created.")


Constraints created.


# DB POPULATION

## Load using LOAD CSV

In [23]:
# Load Customers
query = """
LOAD CSV WITH HEADERS FROM 'file:///dataset_1_customers.csv' AS row
CALL (row) {
    WITH row
    MERGE (c:Customer {id: row.CUSTOMER_ID})
    SET c.lat = toFloat(row.x_customer_id),
        c.lon = toFloat(row.y_customer_id),
        c.amountMean = toFloat(row.mean_amount),
        c.amountStd = toFloat(row.std_amount),
        c.meanDailyTransactions = toFloat(row.mean_nb_tx_per_day)
} IN TRANSACTIONS OF 500 ROWS
"""

run_query(query)
print("Customers loaded.")

Customers loaded.


In [24]:
# Load Terminals
query = """
LOAD CSV WITH HEADERS FROM 'file:///dataset_1_terminals.csv' AS row
CALL (row) {
    WITH row
    MERGE (t:Terminal {id: row.TERMINAL_ID})
    SET t.lat = toFloat(row.x_terminal_id),
        t.lon = toFloat(row.y_terminal_id)
} IN TRANSACTIONS OF 500 ROWS
"""

run_query(query)
print("Terminals loaded.")

Terminals loaded.


In [None]:
query = """
LOAD CSV WITH HEADERS FROM 'file:///dataset_1.csv' AS row
CALL (row) {
    WITH row
    CREATE (tr:Transaction {
        id: row.TRANSACTION_ID,
        dateTime: row.TX_DATETIME,
        amount: toFloat(row.TX_AMOUNT),
        isFraud: row.TX_FRAUD = "1"
    })
} IN TRANSACTIONS OF 1000 ROWS
"""

run_query(query)
print("Transactions loaded.")

<neo4j._sync.work.result.Result at 0x124e5c290>

## Relationships

In [None]:
# sembra che non vengano create relationships

In [None]:
import pandas as pd
customers = pd.read_csv("../data/dataset_1_customers.csv")
terminals = pd.read_csv("../data/dataset_1_terminals.csv")
transactions = pd.read_csv("../data/dataset_1.csv")

In [20]:
#Relationships
def link_customer_terminals(customer_id, terminal_list):
    query = """
    MATCH (c:Customer {id: $customer_id})
    UNWIND $terminal_list AS t_id
    MATCH (t:Terminal {id: t_id})
    MERGE (c)-[:USE]->(t)
    """
    run_query(query, {"customer_id": customer_id, "terminal_list": terminal_list})


def link_customer_transaction(customer_id, transaction_id):
    query = """
    MATCH (c:Customer {id: $customer_id})
    MATCH (tr:Transaction {id: $transaction_id})
    MERGE (c)-[:PERFORM]->(tr)
    """
    run_query(query, {"customer_id": customer_id, "transaction_id": transaction_id})

def link_terminal_transaction(terminal_id, transaction_id):
    query = """
    MATCH (t:Terminal {id: $terminal_id})
    MATCH (tr:Transaction {id: $transaction_id})
    MERGE (t)-[:ON]->(tr)
    """
    run_query(query, {"terminal_id": terminal_id, "transaction_id": transaction_id})


In [27]:
import ast

customers["available_terminals"] = customers["available_terminals"].apply(ast.literal_eval)

In [30]:
for _, row in customers.iterrows():
    link_customer_terminals(row["CUSTOMER_ID"], row["available_terminals"])


In [None]:
for _, row in transactions.iterrows():
    link_customer_transaction(row["CUSTOMER_ID"], row["TRANSACTION_ID"])


In [31]:
for _, row in transactions.iterrows():
    link_terminal_transaction(row["TERMINAL_ID"], row["TRANSACTION_ID"])

# End of connection

In [None]:
driver.close()
print("Connection closed.")
