In [4]:
spark.stop()

# General Imports and Spark Config

In [41]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

In [6]:
config = pyspark.SparkConf().setAll([
    ('spark.executor.memory', '16g'), 
    ('spark.executor.cores', '4'), 
    ('spark.cores.max', '4'),
    ('spark.driver.memory','64g'),
    ('spark.executor.instances', '1'),
    ('spark.worker.cleanup.enabled', 'true'),
    ('spark.worker.cleanup.interval', '60'),
    ('spark.worker.cleanup.appDataTtl', '60'),
    ('spark.jars.packages', 'org.mongodb.spark:mongo-spark-connector:10.0.2')
])

Important: In neo4j there is a naming convention, node labels should use camelcase (beginning with uppercase) and relationship labels should use all uppercase with _

Notes from Neo4j regarding the Spark Connector
We recommend individual property fields to be returned, rather than returning graph entity (node, relationship, and path) types. This best maps to Spark’s type system and yields the best results. So instead of writing:

MATCH (p:Person) RETURN p

write the following:

MATCH (p:Person) RETURN id(p) AS id, p.name AS name.

If your query returns a graph entity, use the labels or relationship modes instead.


# Create Spark Session

In [7]:
spark = SparkSession \
    .builder \
    .config(conf=config) \
    .appName("PatternsInNetwork") \
    .master("spark://172.23.149.212:7077") \
    .getOrCreate()

182337 [Thread-4] WARN  org.apache.spark.util.Utils  - Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
189331 [Thread-4] WARN  org.apache.spark.ExecutorAllocationManager  - Dynamic allocation without a shuffle service is an experimental feature.


# Pattern Detection

## Pattern 1: Large Payments in less than a Day

The main goal of this pattern is to detect accounts that have sent a payment transaction which is larger than 100'000 algos and that another account has sent an equal or larger amount further in less than a day. A day can be represented by the blocknumber difference. As a block takes usually 5 seconds to create, there are 17280 blocks created daily. The results which are returned can be grouped and counted to have an overwiew which addresses made multiple of these calls.

In [46]:
# for full data query remove the LIMIT 10 phrase

query = """
MATCH (a1:Account)-[r1:PAYMENT]->(a2:Account)-[r2:PAYMENT]->(a3:Account) 
WHERE a1.account <> a2.account AND r1.amount > 100000000 and r2.amount > 100000000 and r1.blockNumber > 0 and r2.blockNumber > 0 and 0 < r2.blockNumber - r1.blockNumber < 17280 
WITH a1.account AS senderAccount LIMIT 10 
RETURN senderAccount
"""

dfPattern1 = spark.read.format("org.neo4j.spark.DataSource") \
  .option("url", "bolt://172.23.149.212:7687") \
  .option("query", query) \
  .load()

Write results to MongoDB

In [47]:
dfPattern1.show()
dfPattern1 = dfPattern1.groupBy("senderAccount").count().sort(col("count").desc())
dfPattern1.show()

+--------------------+
|       senderAccount|
+--------------------+
|hcm0TFElFZuDv652p...|
|peoH04okZMbRCx1zR...|
|HrMr/5WVzj3n690c4...|
|aiJ6oRQbnowaYL4HB...|
|vlbsPa3gU3e6aQVEB...|
|8inyrMMFCQLZVsCmF...|
|1AN5tmjXGWsGACxSY...|
|/0QK7CnRG355cTp/z...|
|aiJ6oRQbnowaYL4HB...|
|hgZAE/fY2rV909K1j...|
+--------------------+

+--------------------+-----+
|       senderAccount|count|
+--------------------+-----+
|aiJ6oRQbnowaYL4HB...|    2|
|hcm0TFElFZuDv652p...|    1|
|HrMr/5WVzj3n690c4...|    1|
|peoH04okZMbRCx1zR...|    1|
|/0QK7CnRG355cTp/z...|    1|
|vlbsPa3gU3e6aQVEB...|    1|
|8inyrMMFCQLZVsCmF...|    1|
|1AN5tmjXGWsGACxSY...|    1|
|hgZAE/fY2rV909K1j...|    1|
+--------------------+-----+



In [48]:
dfPattern1.write.format("mongodb") \
	.option('spark.mongodb.connection.uri', 'mongodb://172.23.149.212:27017') \
  	.mode("overwrite") \
    .option('spark.mongodb.database', 'algorand_gold') \
  	.option('spark.mongodb.collection', 'Patterns_LargePaymentTransactionAccounts_6') \
  	.option("forceDeleteTempCheckpointLocation", "true") \
  	.save()

174937188 [Thread-4] WARN  org.apache.spark.sql.util.CaseInsensitiveStringMap  - Converting duplicated key forcedeletetempcheckpointlocation into CaseInsensitiveStringMap.


                                                                                

## Pattern 2: Accounts that have created NFTs

The goal of this query is to detect accounts that have created more than 10 NFT's in one day.

In [None]:
# match (a1:Account)-[r1:PAYMENT]->(a2:Account)-[r2:PAYMENT]->(a3:Account) where a1.account <> a2.account and r1.amount > 100000000 and r2.amount > 100000000 and r2.blockNumber - r1.blockNumber < 17280 return a1.account as senderAccount limit 1

## Pattern 3: SmartContract calls where other people have called the same SC shortly after

The goal of this pattern is to detect smart contracts and people that have all sent transactions to the same SC in a short amount of time.

In [None]:
# for full data query remove the LIMIT 10 phrase

query1 = """
MATCH (a1:Account)-[r1:APPLICATION_CALL]->(app:Application)<-[r2:APPLICATION_CALL]-(a2:Account) 
WHERE a1.account <> a2.account AND r1.blockNumber > 0 AND r2.blockNumber > 0 AND abs(r2.blockNumber - r1.blockNumber) < 17280 
WITH a1.account AS account, app.application AS application LIMIT 10 
RETURN DISTINCT application, account
"""

query = """
MATCH (a1:Account)-[r1:APPLICATION_CALL]->(app:Application)<-[r2:APPLICATION_CALL]-(a2:Account) 
WHERE a1.account <> a2.account AND r1.blockNumber > 0 AND r2.blockNumber > 0 AND abs(r2.blockNumber - r1.blockNumber) < 17280 
WITH a1.account AS account, app.application AS application
RETURN DISTINCT application, account
"""

dfPattern3 = spark.read.format("org.neo4j.spark.DataSource") \
  .option("url", "bolt://172.23.149.212:7687") \
  .option("query", query) \
  .load()

In [None]:
dfPattern3.show()

Write the results into MongoDB

In [22]:
dfPattern3.write.format("mongodb") \
	.option('spark.mongodb.connection.uri', 'mongodb://172.23.149.212:27017') \
  	.mode("overwrite") \
    .option('spark.mongodb.database', 'algorand_gold') \
  	.option('spark.mongodb.collection', 'Patterns_ScCallsFromDifferentAcc_6') \
  	.option("forceDeleteTempCheckpointLocation", "true") \
  	.save()

83778629 [Thread-4] WARN  org.apache.spark.sql.util.CaseInsensitiveStringMap  - Converting duplicated key forcedeletetempcheckpointlocation into CaseInsensitiveStringMap.


                                                                                

## Pattern 4: Accounts that had a lot of transfers between each other

The goal is to find accounts that made more than 100 transactions between each other with an amount smaller than 100 Algos.

In [49]:
# for full data query remove the LIMIT 10 phrase

query1 = """
MATCH (a1:Account)-[r:PAYMENT]->(a2:Account) 
WHERE r.amount < 100000 AND a1.account <> a2.account 
WITH count(r) AS rel_count, a1.account AS senderAccount, a2.account AS receiverAccount LIMIT 10 
WHERE rel_count > 100 
RETURN senderAccount, receiverAccount, rel_count
ORDER BY rel_count DESC
"""

query = """
MATCH (a1:Account)-[r:PAYMENT]->(a2:Account) 
WHERE r.amount < 100000 AND a1.account <> a2.account 
WITH count(r) AS rel_count, a1.account AS senderAccount, a2.account AS receiverAccount
WHERE rel_count > 100 
RETURN senderAccount, receiverAccount, rel_count
ORDER BY rel_count DESC
"""

dfPattern4 = spark.read.format("org.neo4j.spark.DataSource") \
  .option("url", "bolt://172.23.149.212:7687") \
  .option("query", query) \
  .load()

In [50]:
dfPattern4.show()

[Stage 32:>                                                         (0 + 1) / 1]

+--------------------+--------------------+---------+
|       senderAccount|     receiverAccount|rel_count|
+--------------------+--------------------+---------+
|7OUGoX3hg950O7LF5...|oQY98gUjLRNgQVqyg...|    19306|
|7OUGoX3hg950O7LF5...|iRhe80TuSql+RpNPP...|    19306|
|7OUGoX3hg950O7LF5...|xQbYMccFxcDsIGRYy...|    19306|
|7OUGoX3hg950O7LF5...|1YbO3KX7m2KBrydeF...|    19305|
|7OUGoX3hg950O7LF5...|aCYVTqsazIyWyYgpo...|    16755|
|7OUGoX3hg950O7LF5...|hTapIPF+i+W+MImEO...|    16755|
|7OUGoX3hg950O7LF5...|x8IMpA860wzXTCVAG...|    16754|
|7OUGoX3hg950O7LF5...|jA0BZe0eV6eA+fid3...|    16754|
|7OUGoX3hg950O7LF5...|+oR7zi4ucUJZCVWHc...|    16754|
|7OUGoX3hg950O7LF5...|xjQhuSoCIUsugjmaV...|    16753|
+--------------------+--------------------+---------+



                                                                                

Save the results in MongoDB

In [51]:
dfPattern4.write.format("mongodb") \
	.option('spark.mongodb.connection.uri', 'mongodb://172.23.149.212:27017') \
  	.mode("overwrite") \
    .option('spark.mongodb.database', 'algorand_gold') \
  	.option('spark.mongodb.collection', 'Patterns_AccountsWithManyPaymentTransactions_6') \
  	.option("forceDeleteTempCheckpointLocation", "true") \
  	.save()

175044676 [Thread-4] WARN  org.apache.spark.sql.util.CaseInsensitiveStringMap  - Converting duplicated key forcedeletetempcheckpointlocation into CaseInsensitiveStringMap.


                                                                                

# Patterns that make use of GraphAlgorithms

Create the graphs projections that are needed for the algorithms.

In [56]:
query = """
 CALL gds.graph.project(
  "paymentGraph",
  "Account",                         
  {
    PAYMENT: {properties: ["blockNumber", "amount"]}
  }           
)
 YIELD
  graphName AS graph, nodeProjection, nodeCount AS nodes, relationshipProjection, relationshipCount AS rels
 RETURN graph, nodeProjection, nodes, relationshipProjection, rels
"""


dfPaymentGraphProjection = spark.read.format("org.neo4j.spark.DataSource") \
  .option("url", "bolt://172.23.149.212:7687") \
  .option("query", query) \
  .option("partitions", "1") \
  .load()

## Pattern 5: Degree Centrality in Payment Senders

The goal of this pattern is to detect degree centralities in asset transfer senders. We search for the 50 nodes with the highest degrees to detect the most important accounts.

In [57]:
query = """
CALL gds.degree.stream('paymentGraph')
YIELD nodeId, score
WITH gds.util.asNode(nodeId).account AS account, score AS degree
ORDER BY degree DESC limit 50
RETURN account, degree
"""

dfPattern5 = spark.read.format("org.neo4j.spark.DataSource") \
  .option("url", "bolt://172.23.149.212:7687") \
  .option("query", query) \
  .option("partitions", "1") \
  .load()

In [58]:
dfPattern5.show()

[Stage 34:>                                                         (0 + 1) / 1]

+--------------------+---------+
|             account|   degree|
+--------------------+---------+
|fFAiP2bRer9k3bTSg...|6504460.0|
|k3ZZQqtbbSRtH4CZA...|5071150.0|
|9orEqXoI5YtIu44xk...|3515002.0|
|7OUGoX3hg950O7LF5...|1642024.0|
|6ryd57z3fIj/GPds1...|1602807.0|
|k//OpTHinxgS6cnqg...| 644163.0|
|UGfOsbi2Q/pWY9x3v...| 576457.0|
|fFAuGRCaSj1LKCQMD...| 479382.0|
|PSeTViEF0j5/99D/o...| 473470.0|
|peoH04okZMbRCx1zR...| 328767.0|
|Z7nV8rzyDgyjW0uMK...| 285710.0|
|Xalg5fUMtcmMI8/0u...| 273877.0|
|M9xnoAs0RQqazEtnV...| 264448.0|
|iNQ6/FV11ndp1wpFJ...| 227239.0|
|bbz5NtJ77OUOH7GBO...| 196938.0|
|Q0jCBw2XUaEy/En2H...| 149829.0|
|FFc9/RB3H94ir+XH+...| 108081.0|
|1SC4OajDfSHywS88T...|  96464.0|
|3uZHp1X66koXtE8vl...|  96277.0|
|wS+i/DTz5ouQcFB5L...|  92565.0|
+--------------------+---------+
only showing top 20 rows



                                                                                

In [40]:
dfPattern5.write.format("mongodb") \
	.option('spark.mongodb.connection.uri', 'mongodb://172.23.149.212:27017') \
  	.mode("overwrite") \
    .option('spark.mongodb.database', 'algorand_gold') \
  	.option('spark.mongodb.collection', 'Patterns_DegreeCentrality_Top50') \
  	.option("forceDeleteTempCheckpointLocation", "true") \
  	.save()

174331967 [Thread-4] WARN  org.apache.spark.sql.util.CaseInsensitiveStringMap  - Converting duplicated key forcedeletetempcheckpointlocation into CaseInsensitiveStringMap.


                                                                                

## Pattern 6: Eigenvector Centrality in Payment Senders

The goal of this pattern is to detect centralities in asset transfer senders.

In [59]:
query = """
CALL gds.eigenvector.stream('paymentGraph')
YIELD nodeId, score
WITH gds.util.asNode(nodeId).account AS account, score as eigenVectorScore
ORDER BY eigenVectorScore DESC limit 10
RETURN account, eigenVectorScore
"""

dfPattern6 = spark.read.format("org.neo4j.spark.DataSource") \
  .option("url", "bolt://172.23.149.212:7687") \
  .option("query", query) \
  .option("partitions", "1") \
  .load()

In [60]:
dfPattern6.show()

[Stage 35:>                                                         (0 + 1) / 1]

+--------------------+--------------------+
|             account|    eigenVectorScore|
+--------------------+--------------------+
|fFAiP2bRer9k3bTSg...|  0.9999999999999882|
|t4wIVEExOPn4WuJ80...|1.537406877245335E-7|
|fFAuGRCaSj1LKCQMD...|8.762774427687245...|
|PSeTViEF0j5/99D/o...|7.935811578806059...|
|Y/Nb1zwuKQ1WeXjPw...|4.727240903047691...|
|+/BT4moSzMB+a6+vC...|4.72723981289393E-14|
|DxXEziPamwtB3jgY7...|2.363619906446965...|
|gYR87zBKZh28v3Nze...|2.363619906446965...|
|1SC4OajDfSHywS88T...|2.422718774106649...|
|E6qBx6zBP9/dgxFcK...|4.918362989740098...|
+--------------------+--------------------+



                                                                                

In [39]:
dfPattern6.write.format("mongodb") \
	.option('spark.mongodb.connection.uri', 'mongodb://172.23.149.212:27017') \
  	.mode("overwrite") \
    .option('spark.mongodb.database', 'algorand_gold') \
  	.option('spark.mongodb.collection', 'Patterns_EigenvectorCentrality_Top10') \
  	.option("forceDeleteTempCheckpointLocation", "true") \
  	.save()

174307838 [Thread-4] WARN  org.apache.spark.sql.util.CaseInsensitiveStringMap  - Converting duplicated key forcedeletetempcheckpointlocation into CaseInsensitiveStringMap.


                                                                                

# Stopping Spark Context

Stopping context and removing the graph projection

In [55]:
query = """
CALL gds.graph.drop('paymentGraph') 
YIELD graphName 
RETURN graphName
"""


dfPaymentGraphProjection = spark.read.format("org.neo4j.spark.DataSource") \
  .option("url", "bolt://172.23.149.212:7687") \
  .option("query", query) \
  .load()

In [None]:
spark.stop()