# Step 4

In [7]:
# Install the neo4j package
%pip install neo4j

from neo4j import GraphDatabase

# Setting up database connection
uri = "bolt://localhost:7689"
username = "neo4j"
password = "password"
driver = GraphDatabase.driver(uri, auth=(username, password))

print("Database Linked")

Note: you may need to restart the kernel to use updated packages.
Database Linked


# Step 5
#### Create the SHARED_PII relationship between clients

In [8]:
# Function to create SHARED_PII relationship
def shared_pii(driver):
    with driver.session() as session:
        session.run("""
            MATCH ( c:Client )-[ :HAS_EMAIL |:HAS_PHONE |:HAS_SSN ] -> (n) <-
            [ :HAS_EMAIL |:HAS_PHONE |:HAS_SSN ]- ( d:Client )
            WHERE id(c) < id(d)
            WITH c, d, count(*) AS cnt
            MERGE (c) - [ :SHARED_PII { count: cnt } ] -> (d)
        """)

shared_pii(driver)



# Step 6

In [9]:
# Creating in-memory projection of the graph
def create_projection(driver):
    with driver.session() as session:
        session.run("""
        CALL gds.graph.project(
            'clientClusters',
            'Client',
            'SHARED_PII'
        )
        """)

create_projection(driver)

# Step 7

In [14]:
# WCC function
def run_wcc(driver):
    with driver.session() as session:
        result = session.run("""
        CALL gds.wcc.stream('clientClusters')
        YIELD nodeId, componentId
        RETURN gds.util.asNode(nodeId).id AS clientId,
        componentId AS clusterId
        """)
        for record in result:
            print(record)

run_wcc(driver)

<Record clientId='4997933060327094' clusterId=0>
<Record clientId='4776276949898423' clusterId=1>
<Record clientId='4858607188760216' clusterId=2>
<Record clientId='4287186486553145' clusterId=3>
<Record clientId='4661202154682409' clusterId=4>
<Record clientId='4649268238636650' clusterId=5>
<Record clientId='4426707672690219' clusterId=6>
<Record clientId='4922246870240518' clusterId=7>
<Record clientId='4415848797892554' clusterId=8>
<Record clientId='4548497513788330' clusterId=9>
<Record clientId='4950634022082174' clusterId=10>
<Record clientId='4860903977910377' clusterId=11>
<Record clientId='4234798486577769' clusterId=12>
<Record clientId='4175792657809755' clusterId=13>
<Record clientId='4361287590543243' clusterId=14>
<Record clientId='4833833649287561' clusterId=15>
<Record clientId='4495151043368906' clusterId=16>
<Record clientId='4028666746330768' clusterId=17>
<Record clientId='4664274185403862' clusterId=18>
<Record clientId='4699974121949998' clusterId=19>
<Record cl

# Step 8

In [15]:
# Function for marking clients based on clusters
def mark_clients(driver):
    with driver.session() as session:
        # Get the cluster sizes
        cluster_sizes = session.run("""
        CALL gds.wcc.stream('clientClusters')
        YIELD nodeId, componentId
        RETURN componentId, count(*) AS size
        """)
        
        # Mark clients based on cluster size
        for record in cluster_sizes:
            component_id = record['componentId']
            size = record['size']
            if size >= 2:
                session.run("""
                MATCH (c:Client)-[:SHARED_PII]->(c2:Client)
                WHERE id(c) = $component_id
                SET c.possiblyFraud = true
                """, component_id=component_id)
            else:
                session.run("""
                MATCH (c:Client)
                WHERE id(c) = $component_id
                SET c.possiblyFraud = false
                """, component_id=component_id)

mark_clients(driver)



# Step 9

In [19]:
# Function for bipartite graph with projection
def create_bipartite_graph(driver):
    with driver.session() as session:
        # Finding clients
        clients = session.run("""
        MATCH (c:Client)
        WHERE c.secondPartyFraudRing IS NOT NULL
        RETURN collect(c) AS clients
        """).single()['clients']
        
        # Finding PII nodes
        piis = session.run("""
        MATCH (n)
        WHERE n:Email OR n:Phone OR n:SSN
        RETURN collect(n) AS piis
        """).single()['piis']
        
        # Combining clients & PII nodes
        nodes = clients + piis
        
        # Extracting node IDs and labels
        node_data = [{"id": node.id, "labels": list(node.labels)} for node in nodes]
        
        # Finding client-PII relationships
        relationships = session.run("""
        MATCH (c:Client)-[:HAS_EMAIL|:HAS_PHONE|:HAS_SSN]->(p)
        WHERE c.secondPartyFraudRing IS NOT NULL
        RETURN collect({source: id(c), target: id(p)}) AS relationships
        """).single()['relationships']
        
        # Creating graph with projection
        result = session.run("""
        CALL gds.graph.project.cypher(
            'similarity',
            "UNWIND $nodes AS n RETURN n.id AS id, n.labels AS labels",
            "UNWIND $relationships AS r RETURN r.source AS source, r.target AS target, 'HAS_PII' AS type",
            { parameters: { nodes: $node_data, relationships: $relationships } }
        )
        YIELD graphName, nodeCount, relationshipCount
        RETURN graphName, nodeCount, relationshipCount
        """, node_data=node_data, relationships=relationships)
        
        for record in result:
            print(record)

create_bipartite_graph(driver)

  node_data = [{"id": node.id, "labels": list(node.labels)} for node in nodes]


<Record graphName='similarity' nodeCount=6701 relationshipCount=0>


# Step 10

In [20]:
def compute_similarity(driver):
    with driver.session() as session:
        result = session.run("""
        CALL gds.nodeSimilarity.mutate('similarity', {
            mutateProperty: 'jaccardScore',
            mutateRelationshipType: 'SIMILAR_TO',
            topK: 15
        })
        YIELD nodesCompared, relationshipsWritten
        RETURN nodesCompared, relationshipsWritten
        """)
        
        for record in result:
            print(record)

compute_similarity(driver)

<Record nodesCompared=0 relationshipsWritten=0>


# Step 11

In [24]:
def write_similarity_relationships(driver):
    with driver.session() as session:
        result = session.run("""
        CALL gds.graph.writeRelationship('similarity', 'SIMILAR_TO', 'jaccardScore')
        YIELD relationshipsWritten
        RETURN relationshipsWritten
        """)
        
        for record in result:
            print(record)

write_similarity_relationships(driver)



<Record relationshipsWritten=0>


# Step 12

In [None]:
def compute_node_degree(driver):
    with driver.session() as session:
        result = session.run("""
        CALL gds.degree.write('similarity', {
            nodeLabels: ['Client'],
            relationshipTypes: ['SIMILAR_TO'],
            relationshipWeightProperty: 'jaccardScore',
            writeProperty: 'secondPartyFraudScore'
        })
        YIELD nodePropertiesWritten, nodesWritten
        RETURN nodePropertiesWritten, nodesWritten
        """)
        
        for record in result:
            print(record)

compute_node_degree(driver)

CypherSyntaxError: {code: Neo.ClientError.Statement.SyntaxError} {message: Type mismatch: expected String but was Map (line 2, column 31 (offset: 31))
"        CALL gds.degree.write({"
                               ^}