In [3]:
import pandas as pd
import numpy as np
import networkx as nx
import plotly.graph_objects as go
from pyvis.network import Network

In [4]:
df = pd.read_json('./data/transactions.json')

In [5]:
df.head()

Unnamed: 0,sanctioned_wallet,transacted_with
0,bc1qcp6fr7gtyukympl6unr7uv78h3vprycwj455zx,"[bc1qptrvdk8uv8shd734tyhyzatrvmlq9l0v6r0dc2, b..."
1,0xE950DC316b836e4EeFb8308bf32Bf7C72a1358FF,"[bc1qptrvdk8uv8shd734tyhyzatrvmlq9l0v6r0dc2, b..."
2,1Ge8JodC2HiBiEuT7D3MoH6Fak6XrcT9Kf,"[bc1qptrvdk8uv8shd734tyhyzatrvmlq9l0v6r0dc2, b..."
3,1FjubFHV4mpYjBmvjsEhZssyiiA4TNmnm2,"[bc1qptrvdk8uv8shd734tyhyzatrvmlq9l0v6r0dc2, b..."
4,3H3rh85qPaGLy2w6618yZNaH7i8asHv46B,"[bc1qptrvdk8uv8shd734tyhyzatrvmlq9l0v6r0dc2, b..."


In [6]:
def visualize_network_from_dataframe(df: pd.DataFrame, source_col: str, targets_col: str, 
                                     title: str = "Network Visualization", 
                                     shorten_node_names: bool = False):
    """
    Builds and visualizes a directed network graph from a Pandas DataFrame using Plotly,
    coloring nodes based on their role (source or target). Node size is based on
    the number of connected edges (node degree).

    Args:
        df (pd.DataFrame): The input DataFrame containing the network data.
        source_col (str): The name of the column containing the source nodes.
        targets_col (str): The name of the column containing the list of target nodes.
        title (str, optional): The title of the network plot. Defaults to "Network Visualization".
        shorten_node_names (bool, optional): If True, node names will be shortened to the first 5 characters
                                             for better readability in dense graphs. Defaults to False.
    """
    # Create a directed graph object
    G = nx.DiGraph()

    # Iterate through each row of the DataFrame
    for _, row in df.iterrows():
        source = row[source_col]
        targets = row[targets_col]
        
        # Ensure the targets column contains an iterable list
        if isinstance(targets, list):
            # For each target in the list, add a directed edge from the source
            for target in targets:
                G.add_edge(source, target)
    
    # Check if the graph has any nodes before attempting to draw
    if not G.nodes():
        print("The graph is empty. No nodes or edges were added.")
        return

    # Determine node roles and assign colors
    source_nodes = set(df[source_col].unique())
    # Flatten the list of lists in the targets column and get unique values
    target_nodes = set([item for sublist in df[targets_col] if isinstance(sublist, list) for item in sublist])
    
    node_colors = []
    for node in G.nodes():
        is_source = node in source_nodes
        is_target = node in target_nodes
        
        # Color as source if it appears as a source, otherwise color as a target
        if is_source:
            node_colors.append('red')     # Node is a source (including those that are also targets)
        else:
            node_colors.append('blue')    # Node is a target only

    # Calculate node sizes based on node degree
    degrees = dict(G.degree())
    max_degree = max(degrees.values()) if degrees else 1
    
    # Scale the node sizes for better visualization
    min_size = 10
    max_size = 40
    node_sizes = [min_size + (max_size - min_size) * (degrees[node] / max_degree) for node in G.nodes()]

    # Use a spring layout for the node positions.
    pos = nx.spring_layout(G, seed=42)

    # Prepare node and edge data for Plotly
    edge_x = []
    edge_y = []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])

    node_x = [pos[node][0] for node in G.nodes()]
    node_y = [pos[node][1] for node in G.nodes()]

    # Handle node names based on the shorten_node_names parameter
    if shorten_node_names:
        # Shorten node names to the first 5 characters
        node_labels = [node[:7] for node in G.nodes()]
    else:
        node_labels = list(G.nodes())

    # Create edge trace
    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#888'),
        hoverinfo='none',
        mode='lines')

    # Create node trace
    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers+text',
        hoverinfo='text',
        text=node_labels,
        hovertext=list(G.nodes()),
        textposition='top center',
        textfont_size=10,
        marker=dict(
            showscale=False,
            color=node_colors,
            size=node_sizes,
            line_width=2))

    # Create the figure
    fig = go.Figure(data=[edge_trace, node_trace],
                    layout=go.Layout(
                        title=title,
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=20, l=5, r=5, t=40),
                        annotations=[
                            dict(
                                text="Color Key: <span style='color:red;'>Sanctioned Wallet</span>, <span style='color:blue;'>Not Sanctioned</span>",
                                showarrow=False,
                                xref="paper", yref="paper",
                                x=0.05, y=0.98,
                                font=dict(size=12)
                            )
                        ],
                        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                    )
    
    # Show the plot
    return fig

In [7]:
sanctioned_count = len(df['sanctioned_wallet'].unique())
title = f"Network of Transactions for {sanctioned_count} US-Sanctioned Crypto Wallets"

In [8]:
fig = visualize_network_from_dataframe(df, title=title, source_col='sanctioned_wallet', targets_col='transacted_with', shorten_node_names=True)

In [9]:
fig.write_html('./graph.html')

In [None]:
def pyvis_from_dataframe(df: pd.DataFrame, source_col: str, targets_col: str, 
                                     title: str = "Network Visualization", 
                                     shorten_node_names: bool = False):
    """
    Builds and visualizes a directed network graph from a Pandas DataFrame using pyvis.network,
    coloring nodes based on their role (source or target). Node size is based on
    the number of connected edges (node degree). The visualization is saved to an HTML file.

    Args:
        df (pd.DataFrame): The input DataFrame containing the network data.
        source_col (str): The name of the column containing the source nodes.
        targets_col (str): The name of the column containing the list of target nodes.
        title (str, optional): The title of the network plot. Defaults to "Network Visualization".
        shorten_node_names (bool, optional): If True, node names will be shortened to the first 5 characters
                                             for better readability in dense graphs. Defaults to False.
    """
    # Create a directed graph object using NetworkX as a bridge
    G = nx.DiGraph()

    # Iterate through each row of the DataFrame
    for _, row in df.iterrows():
        source = row[source_col]
        targets = row[targets_col]
        
        # Ensure the targets column contains an iterable list
        if isinstance(targets, list):
            # For each target in the list, add a directed edge from the source
            for target in targets:
                G.add_edge(source, target)
    
    # Check if the graph has any nodes before attempting to draw
    if not G.nodes():
        print("The graph is empty. No nodes or edges were added.")
        return

    # Create a pyvis Network object
    # cdn_resources='in_line' is used to create a single HTML file without external dependencies.
    net = Network(height="750px", width="100%", bgcolor="#222222", font_color="white", cdn_resources='in_line')
    
    # Determine node roles and assign colors
    source_nodes = set(df[source_col].unique())
    target_nodes = set([item for sublist in df[targets_col] if isinstance(sublist, list) for item in sublist])
    
    # Calculate node degrees for sizing
    degrees = dict(G.degree())
    max_degree = max(degrees.values()) if degrees else 1

    # Add nodes to the pyvis network
    for node in G.nodes():
        is_source = node in source_nodes
        
        # Color as source if it appears as a source, otherwise color as a target
        if is_source:
            color = 'red'
        else:
            color = 'blue'

        # Scale the node size based on its degree
        min_size = 10
        max_size = 40
        size = min_size + (max_size - min_size) * (degrees[node] / max_degree)
        
        # Create the hover text with multiple attributes
        hover_text = f"Name: {node}<br>Degree: {degrees.get(node, 0)}"
        
        # Handle node names based on the shorten_node_names parameter
        label = node[:5] if shorten_node_names else node
        
        # Add the node to the pyvis network with its properties
        net.add_node(node, label=label, title=hover_text, color=color, size=size)

    # Add edges to the pyvis network
    for edge in G.edges():
        net.add_edge(edge[0], edge[1])

    # Configure the visualization options (physics, layout, etc.)
    net.set_options("""
    var options = {
      "nodes": {
        "font": {
          "size": 12
        }
      },
      "physics": {
        "enabled": false,
        "barnesHut": {
          "avoidOverlap": 0.5,
          "centralGravity": 0.1,
          "springLength": 100,
          "springConstant": 0.05
        }
      },
      "interaction": {
        "hover": true,
        "tooltipDelay": 100
      }
    }
    """)
    
    # Set the title of the HTML file
    net.heading = title
    
    # Save the visualization to a standalone HTML file
    filename = "network_visualization.html"
    net.show(filename, notebook=False)
    print(f"Network visualization saved to '{filename}'")

In [35]:
pyvis_from_dataframe(df, title=title, source_col='sanctioned_wallet', targets_col='transacted_with', shorten_node_names=True)

network_visualization.html
Network visualization saved to 'network_visualization.html'
