# Step 4

In [None]:
# Install required libraries
!pip install neo4j

# Import required libraries
from neo4j import GraphDatabase

# Set up database connection
uri = "bolt://localhost:7687"
username = "neo4j"
password = "your password here"
driver = GraphDatabase.driver(uri, auth=(username, password))

# Define a function to project a graph
def project_graph(tx):
    result = tx.run("""
    CALL gds.graph.project(
        'fraud-detection',
        'Person',
        'KNOWS'
    )
    """)
    return result.single()

# Test database connection and project the graph
with driver.session() as session:
    session.write_transaction(project_graph)

print("Graph projected successfully.")

# Step 5
#### Create the SHARED_PII relationship between clients

In [5]:
# Define a function to create the SHARED_PII relationship
def create_shared_pii_relationship(driver):
    with driver.session() as session:
        # Create the SHARED_PII relationship between clients
        session.run("""
        MATCH (c1:Client), (c2:Client)
        WHERE c1 <> c2
        CREATE (c1)-[:SHARED_PII]->(c2)
        """)
        print("SHARED_PII relationships created.")

# Call the function to create the SHARED_PII relationship
create_shared_pii_relationship(driver)

# Visualize the resulting subgraph
with driver.session() as session:
    result = session.run("CALL db.schema.visualization()")
    for record in result:
        print(record)

SHARED_PII relationships created.
<Record nodes=[] relationships=[]>


# Step 6

In [None]:
# Create an in-memory projection of the graph
def create_in_memory_projection(driver):
    with driver.session() as session:
        session.run("""
        CALL gds.graph.project(
            'clientClusters',
            'Client',
            'SHARED_PII'
        )
        """)
        print("In-memory projection 'clientClusters' created.")

# Call the function to create the in-memory projection
create_in_memory_projection(driver)

# Step 7

In [None]:
# Define a function to run the WCC algorithm
def run_wcc_algorithm(driver):
    with driver.session() as session:
        result = session.run("""
        CALL gds.wcc.stream('clientClusters')
        YIELD nodeId, componentId
        RETURN gds.util.asNode(nodeId).name AS clientName, componentId
        ORDER BY componentId, clientName
        """)
        for record in result:
            print(record)

# Call the function to run the WCC algorithm
run_wcc_algorithm(driver)

# Step 8

In [None]:
# Define a function to mark clients based on cluster size
def mark_clients_based_on_cluster_size(driver):
    with driver.session() as session:
        # Get the cluster sizes
        cluster_sizes = session.run("""
        CALL gds.wcc.stream('clientClusters')
        YIELD nodeId, componentId
        RETURN componentId, count(*) AS size
        """)
        
        # Mark clients based on cluster size
        for record in cluster_sizes:
            component_id = record['componentId']
            size = record['size']
            if size >= 2:
                session.run("""
                MATCH (c:Client)-[:SHARED_PII]->(c2:Client)
                WHERE gds.util.asNode(c).componentId = $component_id
                SET c.possiblyFraud = true
                """, component_id=component_id)
            else:
                session.run("""
                MATCH (c:Client)
                WHERE gds.util.asNode(c).componentId = $component_id
                SET c.possiblyFraud = false
                """, component_id=component_id)
        print("Clients marked based on cluster size.")

# Call the function to mark clients based on cluster size
mark_clients_based_on_cluster_size(driver)

# Step 9