In [1]:
import pandas as pd

df = pd.read_csv('/home/ntak-mac/blobfuse_mnt/datasets/PS_20174392719_1491204439457_log.csv',index_col=False)

In [2]:
df.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [3]:
df.columns

Index(['step', 'type', 'amount', 'nameOrig', 'oldbalanceOrg', 'newbalanceOrig',
       'nameDest', 'oldbalanceDest', 'newbalanceDest', 'isFraud',
       'isFlaggedFraud'],
      dtype='object')

In [7]:
df['type'].value_counts()

type
CASH_OUT    2237500
PAYMENT     2151495
CASH_IN     1399284
TRANSFER     532909
DEBIT         41432
Name: count, dtype: int64

In [8]:
fraudulent_transactions = df[df['isFraud'] == 1]
fraudulent_transactions.head()
fraudulent_transactions['type'].unique()

array(['TRANSFER', 'CASH_OUT'], dtype=object)

In [9]:
non_merch_transactions = df[~df['nameDest'].str.startswith('M')]
non_merch_transactions.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
9,1,DEBIT,5337.77,C712410124,41720.0,36382.23,C195600860,41898.0,40348.79,0,0
10,1,DEBIT,9644.94,C1900366749,4465.0,0.0,C997608398,10845.0,157982.12,0,0
15,1,CASH_OUT,229133.94,C905080434,15325.0,0.0,C476402209,5083.0,51513.44,0,0


In [10]:
non_merch_fraud_trans = non_merch_transactions[(non_merch_transactions['type']=='TRANSFER') | (non_merch_transactions['type']=='CASH_OUT') ]

In [12]:
non_merch_fraud_trans['step'].count()

2770409

In [13]:
non_merch_transactions['step'].loc[non_merch_transactions['isFraud']==1].count()

8213

In [14]:
non_merch_fraud_trans.to_csv('/home/dsolomos/non_merch_fraud_trans.csv', header=True) 
# '/home/ntak-mac/blobfuse_mnt/neo4j/import/' is mounted to Neo4j imports directory in the /home/ntak-mac/docker-neo4j/docker-compose.yml

In [15]:
# !wget https://github.com/neo4j/apoc/releases/download/5.18.0/apoc-5.18.0-core.jar # download the APOC JAR file that matches the docker-compose Neo4j version and put jar in /home/ntak-mac/blobfuse_mnt/neo4j/plugins/ mounted in $NEO4J_HOME/plugins
# !docker-compose up -d
# !sudo docker ps
# !sudo docker logs 2876919f6055
# !docker exec -it 2876919f6055 ls /plugins

In [17]:
from neo4j import GraphDatabase
from creds import username,password
import logging

ModuleNotFoundError: No module named 'creds'

In [None]:
from neo4j import GraphDatabase
from creds import username,password
import logging

# Neo4j connection setup
uri = "bolt://172.174.47.18:7999"  
username = username           
password = password     

driver = GraphDatabase.driver(uri, auth=(username, password))

# Configure logging for errors
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

# Function to run a Cypher query and capture logs/errors
def run_cypher_query(query):
    try:
        with driver.session() as session:
            result = session.run(query)
            # Print summary of the query execution
            summary = result.consume()
            logger.info(f"Query executed successfully: {summary}")
            return result
    except Exception as e:
        logger.error(f"Error occurred: {str(e)}")
        return None

# Define the query to create a unique constraint on Customer's name
create_constraint_query = """
CREATE CONSTRAINT customer_name IF NOT EXISTS 
FOR (c:Customer) REQUIRE c.name IS UNIQUE;
"""

# Define the query for loading data from CSV
load_data_query = """
CALL apoc.periodic.iterate(
  "
  LOAD CSV WITH HEADERS FROM 'file:///non_merch_fraud_trans.csv' AS row
  RETURN row
  ",
  "
  MERGE (s:Customer {name: row.nameOrig})
  SET s.oldbalanceOrg = toFloat(row.oldbalanceOrg), 
      s.newbalanceOrig = toFloat(row.newbalanceOrig)
  
  MERGE (r:Customer {name: row.nameDest})
  SET r.oldbalanceDest = toFloat(row.oldbalanceDest), 
      r.newbalanceDest = toFloat(row.newbalanceDest)
  
  MERGE (s)-[t:TRANSFER]->(r)
  SET t.amount = toFloat(row.amount), 
      t.isFlaggedFraud = toBoolean(row.isFlaggedFraud),
      t.type = row.type
  ",
  {batchSize: 10000, parallel: true}
)
"""

# Step 1: Run the query to create the constraint
logger.info("Creating constraint...")
run_cypher_query(create_constraint_query)

# Step 2: Run the query to load data from the CSV
logger.info("Loading data from CSV...")
run_cypher_query(load_data_query)

# Close the driver after use
driver.close()

print("Constraint creation and data loading complete.")