In [1]:
import pickle

with open('../data/jp_morgan/pickled/graph_aml_final.pickle', 'rb') as f:
    graph = pickle.load(f)

In [2]:
import networkx as nx
import pandas as pd
from typing import Dict, Any

def extract_node_metrics(G: nx.Graph) -> pd.DataFrame:
    """
    Extract various node-level metrics from a NetworkX graph.
    
    Parameters:
    -----------
    G : nx.Graph
        Input NetworkX graph
        
    Returns:
    --------
    pd.DataFrame
        DataFrame containing node metrics with nodes as index
    """
    # Dictionary to store all metrics
    metrics: Dict[str, Dict[Any, float]] = {}
    
    # Basic centrality measures
    metrics['degree'] = dict(G.degree())
    metrics['degree_centrality'] = nx.degree_centrality(G)
    metrics['betweenness_centrality'] = nx.betweenness_centrality(G)
    metrics['closeness_centrality'] = nx.closeness_centrality(G)
    metrics['eigenvector_centrality'] = nx.eigenvector_centrality(G, max_iter=1000)
    
    # Local clustering coefficient
    metrics['clustering_coefficient'] = nx.clustering(G)
    
    # PageRank
    metrics['pagerank'] = nx.pagerank(G)
    
    # Load centrality (if graph is connected)
    if nx.is_connected(G):
        metrics['load_centrality'] = nx.load_centrality(G)
    
    # Node connectivity and other structural metrics
    metrics['eccentricity'] = nx.eccentricity(G) if nx.is_connected(G) else {}
    
    # Convert to DataFrame
    df_metrics = pd.DataFrame(metrics)
    
    # Add some additional computed metrics
    if nx.is_connected(G):
        # Average neighbor degree
        avg_neighbor_degree = nx.average_neighbor_degree(G)
        df_metrics['avg_neighbor_degree'] = pd.Series(avg_neighbor_degree)
    
    return df_metrics

def analyze_network(G: nx.Graph) -> Dict[str, Any]:
    """
    Calculate global network metrics.
    
    Parameters:
    -----------
    G : nx.Graph
        Input NetworkX graph
        
    Returns:
    --------
    Dict[str, Any]
        Dictionary containing global network metrics
    """
    global_metrics = {
        'num_nodes': G.number_of_nodes(),
        'num_edges': G.number_of_edges(),
        'density': nx.density(G),
        'average_clustering': nx.average_clustering(G),
        'is_connected': nx.is_connected(G),
        'average_shortest_path_length': nx.average_shortest_path_length(G) if nx.is_connected(G) else None,
        'diameter': nx.diameter(G) if nx.is_connected(G) else None,
        'transitivity': nx.transitivity(G),
        'number_connected_components': nx.number_connected_components(G),
    }
    
    try:
        global_metrics['graph_assortativity'] = nx.degree_assortativity_coefficient(G)
    except:
        global_metrics['graph_assortativity'] = None
        
    return global_metrics

node_metrics = extract_node_metrics(graph)