In [19]:
from neo4j import GraphDatabase
import pandas as pd

In [2]:
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "abadeus1998"))

In [11]:
def cypher_query_read_executor(query):
    with driver.session(database="frauddetect") as session:
        return session.execute_read(lambda tx: [record.data() for record in tx.run(query)])
    
    
def cypher_query_write_executor(query, parameters=None):
    with driver.session(database="frauddetect") as session:
        return session.execute_write(lambda tx: tx.run(query, parameters or {}))


In [23]:
def get_relationships():
    query = """MATCH (n)-[r]->(m) RETURN DISTINCT labels(n), type(r), labels(m)"""
    results = cypher_query_read_executor(query)
    return results

In [3]:
def get_top_clients(tx):
    query = """
    MATCH (c:Client)-[:POSSEDE]->(:Compte)-[:REALISE]->(t:Transaction)
    RETURN c.name AS client, COUNT(t) AS nb_transacs ORDER BY nb_transacs DESC LIMIT 10
    """
    return list(tx.run(query))


In [24]:
relations = get_relationships()
relations_df = pd.DataFrame(relations)
# for row in relations:
#     print(row)
relations_df

Unnamed: 0,labels(n),type(r),labels(m)
0,[Client],PERFORMED,"[CashIn, Transaction]"
1,"[CashIn, Transaction]",TO,[Merchant]
2,"[Client, Mule]",PERFORMED,"[Transfer, Transaction]"
3,"[Transfer, Transaction]",TO,"[Client, Mule]"
4,[Client],PERFORMED,"[Payment, Transaction]"
5,"[Payment, Transaction]",TO,[Merchant]
6,[Client],PERFORMED,"[Transfer, Transaction]"
7,[Client],PERFORMED,"[Debit, Transaction]"
8,"[Debit, Transaction]",TO,[Bank]
9,[Client],PERFORMED,"[CashOut, Transaction]"


#### 1. Identifier les entités

In [22]:
def get_entities():
    query = """MATCH (n) RETURN labels(n) AS type, count(*) AS total ORDER BY total DESC;"""
    results = cypher_query_read_executor(query)
    results_df = pd.DataFrame.from_records(results)
    return results_df

entities = get_entities()
entities

Unnamed: 0,type,total
0,"[CashIn, Transaction]",149037
1,"[CashOut, Transaction]",76023
2,"[Payment, Transaction]",74577
3,"[Transfer, Transaction]",19460
4,"[Debit, Transaction]",4392
5,[SSN],2238
6,[Phone],2234
7,[Email],2229
8,[Client],2000
9,"[Client, Mule]",433


### 2. Recherche de clients actifs avec de nombreuses transactions

In [25]:
def get_transactions_list():
    query = """
    MATCH (c:Client)-[:PERFORMED]->(t:Transaction)
    RETURN c.name AS client, labels(t) AS types, count(t) AS nb_transactions
    ORDER BY nb_transactions DESC;
    """
    results = cypher_query_read_executor(query)
    results_df = pd.DataFrame.from_records(results)
    return results_df

transactions_list = get_transactions_list()
transactions_list

Unnamed: 0,client,types,nb_transactions
0,Daniel Hendrix,"[CashIn, Transaction]",955
1,Isabella Grant,"[CashIn, Transaction]",743
2,Aubree David,"[CashIn, Transaction]",602
3,Isabella Grant,"[CashOut, Transaction]",550
4,Evelyn Craig,"[CashIn, Transaction]",532
...,...,...,...
9324,Stella Beck,"[Debit, Transaction]",1
9325,Ellie Russell,"[Debit, Transaction]",1
9326,Bella Little,"[Debit, Transaction]",1
9327,Levi Bolton,"[Debit, Transaction]",1


### 3. Chaînes de transactions (chemins entre clients)


In [31]:
def get_transactions_strings():
    query="""MATCH path = (c1:Client)-[:PERFORMED]->(t1:Transfer)-[:TO]-> (c2:Client)-[:PERFORMED]->(t2:Transfer)-[:TO]->(c3:Client)
    RETURN path"""
    query_1 = """
    MATCH path = (c:Client)-[:PERFORMED]->(:Transfer)-[:TO*1..3]->(other:Client)
    RETURN path"""
    results = cypher_query_read_executor(query_1)
    results_df = pd.DataFrame.from_dict(results)
    return results_df

transactions_strings = get_transactions_strings()
transactions_strings

Unnamed: 0,path
0,"[{'name': 'Dylan Hunt', 'id': '478095878033882..."
1,"[{'name': 'Serenity Jacobson', 'id': '46223573..."
2,"[{'name': 'Brooklyn Harper', 'id': '4805783504..."
3,"[{'name': 'Claire Witt', 'id': '46549092099387..."
4,"[{'name': 'Oliver Daniel', 'id': '486800889836..."
...,...
19455,"[{'name': 'Michael Herman', 'id': '49958758099..."
19456,"[{'name': 'Ella Bush', 'id': '4025060919204955..."
19457,"[{'name': 'Elijah Byers', 'id': '4708996503243..."
19458,"[{'name': 'Adrian Carney', 'id': '492824416937..."


### 4. Clients communs à plusieurs marchands

In [35]:
def common_clients_to_merchants():
    query="""
    MATCH (c:Client)-[:PERFORMED]->(t)-[:TO]->  (m:Merchant)
    WITH c, COLLECT(DISTINCT m) as merchants
    WHERE SIZE(merchants) > 1
    RETURN c.name as client, SIZE(merchants) as nb_merchants
    ORDER BY nb_merchants DESC"""
    results = cypher_query_read_executor(query)
    results_df = pd.DataFrame.from_records(results)
    return results_df

merch_clients = common_clients_to_merchants()
merch_clients

Unnamed: 0,client,nb_merchants
0,Daniel Hendrix,346
1,Isabella Grant,344
2,Aubree David,337
3,Andrea Sweet,330
4,Michael Cooper,330
...,...,...
1992,Adrian Hensley,2
1993,Audrey Atkinson,2
1994,Violet Marks,2
1995,Christopher Bradley,2


In [48]:
def create_client_graph():
    query = """
    CALL gds.graph.project.cypher(
    'clientGraph1',
    'MATCH (c:Client) RETURN id(c) AS id',
    '
    MATCH (c1:Client)-[:PERFORMED]->(t:Transaction)<-[:TO]-(c2:Client)
    WHERE c1 <> c2
    RETURN id(c1) AS source, id(c2) AS target
    '
    )
    """
    graph_result = cypher_query_read_executor(query)
    return graph_result

client_graph = create_client_graph()



### Partie 5 : Détection de communautés (Louvain)

In [49]:
def detect_communities():
    query = """
    CALL gds.louvain.stream('clientGraph1')
    YIELD nodeId, communityId
    RETURN gds.util.asNode(nodeId).name AS client, communityId
    ORDER BY communityId DESC
    """
    results = cypher_query_read_executor(query)
    results_df = pd.DataFrame.from_records(results)
    return results_df

communities = detect_communities()
communities

Unnamed: 0,client,communityId
0,Adrian Jacobson,2432
1,Katherine Jacobson,2431
2,Nathaniel Myers,2430
3,Xavier Welch,2429
4,Elijah Warren,2428
...,...,...
2428,Lauren Mack,4
2429,Landon Adams,3
2430,Faith Dotson,2
2431,Dominic Boyer,1


In [50]:
def write_communities():
    query = """
    CALL gds.louvain.write('clientGraph1', { writeProperty: 'community' })
    YIELD communityCount, modularity
    """
    results = cypher_query_write_executor(query)
    # results_df = pd.DataFrame.from_records(results)
    return results

com_writing = write_communities()
com_writing

<neo4j._sync.work.result.Result at 0x1f894017500>

In [51]:
def show_communities():
    query = """
    MATCH (c:Client)
    RETURN c.name, c.community
    """
    results = cypher_query_read_executor(query)
    results_df = pd.DataFrame.from_records(results)
    return results_df

client_communities = show_communities()
client_communities

Unnamed: 0,c.name,c.community
0,Bentley Peck,0
1,Dominic Boyer,1
2,Faith Dotson,2
3,Landon Adams,3
4,Lauren Mack,4
...,...,...
2428,Elijah Warren,2428
2429,Xavier Welch,2429
2430,Nathaniel Myers,2430
2431,Katherine Jacobson,2431


### page rank

In [52]:
def detect_suspect_nodes_with_pagerank():
    query = """
    CALL gds.pageRank.stream('clientGraph1')
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).name AS client, score
    ORDER BY score DESC
    """
    results = cypher_query_read_executor(query)
    results_df = pd.DataFrame.from_records(results)
    return results_df

suspect_nodes_w_pagerank = detect_suspect_nodes_with_pagerank()
suspect_nodes_w_pagerank

Unnamed: 0,client,score
0,Bentley Peck,0.15
1,Dominic Boyer,0.15
2,Faith Dotson,0.15
3,Landon Adams,0.15
4,Lauren Mack,0.15
...,...,...
2428,Samuel Ellis,0.15
2429,Alexa Rhodes,0.15
2430,Ariana Charles,0.15
2431,Zoey Mendez,0.15


In [53]:
def detect_suspect_nodes_with_betweenness():
    query = """
    CALL gds.betweenness.stream('clientGraph1')
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).name AS client, score
    ORDER BY score DESC
    """
    results = cypher_query_read_executor(query)
    results_df = pd.DataFrame.from_records(results)
    return results_df

suspect_nodes_w_betweenness = detect_suspect_nodes_with_betweenness()
suspect_nodes_w_betweenness

Unnamed: 0,client,score
0,Bentley Peck,0.0
1,Dominic Boyer,0.0
2,Faith Dotson,0.0
3,Landon Adams,0.0
4,Lauren Mack,0.0
...,...,...
2428,Samuel Ellis,0.0
2429,Alexa Rhodes,0.0
2430,Ariana Charles,0.0
2431,Zoey Mendez,0.0


### 7-Similarity comparison

In [54]:
def similarity_comparison():
    query = """
    CALL gds.nodeSimilarity.stream('clientGraph1')
    YIELD node1, node2, similarity
    RETURN
    gds.util.asNode(node1).name AS client1,
    gds.util.asNode(node2).name AS client2,
    similarity
    ORDER BY similarity DESC
    """
    results = cypher_query_read_executor(query)
    results_df = pd.DataFrame.from_records(results)
    return results_df

similarities = similarity_comparison()
similarities