In [None]:
import pandas as pd
import networkx as nx
import numpy as np
import json
from collections import defaultdict
import matplotlib.pyplot as plt

# Step 1: Read data and calculate dependency weights
def calculate_dependency_weights(df):
    """
    Calculate dependency weights w_ij = t_ij / Σ_j t_ij
    where t_ij is the commuting flow from i to j
    """
    # Calculate total outflow for each origin city
    total_outflow = df.groupby('ORIGIN_TTWA')['TOTAL_FLOW'].sum()
    
    # Calculate dependency weights
    df['dependency_weight'] = df.apply(
        lambda row: row['TOTAL_FLOW'] / total_outflow[row['ORIGIN_TTWA']], 
        axis=1
    )
    
    return df

# Read data
df = pd.read_csv('ttwa_od_matrix_cross_city_only.csv')
df_with_weights = calculate_dependency_weights(df)

print(f"Data overview: {len(df)} OD records, {df['ORIGIN_TTWA'].nunique()} origin cities")
print(f"Dependency weight range: {df_with_weights['dependency_weight'].min():.6f} - {df_with_weights['dependency_weight'].max():.6f}")

In [None]:
# Step 2: Build directed commuting network
def build_commuting_network(df_weights):
    """Build directed commuting network"""
    G = nx.DiGraph()
    
    # Add edges with dependency weights
    for _, row in df_weights.iterrows():
        G.add_edge(
            row['ORIGIN_TTWA'], 
            row['DEST_TTWA'], 
            weight=row['dependency_weight'],
            flow=row['TOTAL_FLOW']
        )
    
    return G

G = build_commuting_network(df_with_weights)
print(f"Network size: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")

In [None]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Assume G already exists

def perform_percolation_analysis(G):
    """
    Perform percolation analysis on the given commuting network G.
    """
    # Step 1: Get all unique dependency weights as thresholds and sort in descending order
    weights = sorted(
        list(set(nx.get_edge_attributes(G, 'weight').values())), 
        reverse=True
    )
    if 0 not in weights:
        weights.append(0)

    print(f"Starting percolation analysis, will use {len(weights)} unique thresholds...")

    percolation_results = []
    total_nodes = G.number_of_nodes()
    
    # Step 2: Iterate through all thresholds
    for i, tau in enumerate(weights):
        # Create a subgraph containing only edges with weight >= tau
        sub_G = nx.DiGraph()
        sub_G.add_nodes_from(G.nodes())
        edges_to_add = [(u, v, d) for u, v, d in G.edges(data=True) if d['weight'] >= tau]
        sub_G.add_edges_from(edges_to_add)

        # Find connected components (clusters)
        clusters = list(nx.weakly_connected_components(sub_G))
        
        # Calculate giant component size
        if clusters:
            giant_component_size = max(len(c) for c in clusters)
        else:
            giant_component_size = 0
        
        percolation_results.append({
            'threshold': tau,
            'num_clusters': len(clusters),
            'clusters': clusters,
            'giant_component_size': giant_component_size,
            'giant_component_size_ratio': giant_component_size / total_nodes if total_nodes > 0 else 0
        })
        
    return pd.DataFrame(percolation_results)

# Run analysis
df_percolation = perform_percolation_analysis(G)

# Identify critical points
df_percolation = df_percolation.sort_values('threshold', ascending=False).reset_index(drop=True)
df_percolation['size_increase'] = df_percolation['giant_component_size'].diff().fillna(0)

non_zero_increases = df_percolation[df_percolation['size_increase'] > 0]['size_increase']

if not non_zero_increases.empty:
    # Define a jump threshold, e.g., 70th percentile
    jump_threshold = non_zero_increases.quantile(0.95) 
    
    # Filter out rows with "jumps", resulting in a DataFrame
    critical_transitions_df = df_percolation[df_percolation['size_increase'] >= jump_threshold]
    
    # Missing critical step added here
    # Convert DataFrame to dictionary list format required by subsequent code
    filtered_critical_points = critical_transitions_df.to_dict('records')

    print(f"\nSuccessfully created 'filtered_critical_points' variable containing {len(filtered_critical_points)} critical points.")
    # (Optional) Print preview
    # print("Preview:", filtered_critical_points[:3])

else:
    print("\nNo growth found in giant component size.")
    filtered_critical_points = [] # If no critical points, create empty list

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Assume df_percolation is the DataFrame generated from the previous step
# Ensure data is sorted in descending order by threshold
df_percolation = df_percolation.sort_values('threshold', ascending=False).reset_index(drop=True)

# Calculate the change in giant component size brought by each threshold step
# diff() calculates the difference between the current row and the previous row. Since threshold is decreasing, size is increasing.
df_percolation['size_increase'] = df_percolation['giant_component_size'].diff().fillna(0)

print("Percolation analysis results with size increments:")
print(df_percolation[['threshold', 'giant_component_size', 'size_increase']].head(10))

In [None]:
# From all non-zero growths, determine a "mutation" threshold
# For example, we define growth in the top 15% as mutation (i.e., greater than 85th percentile)
non_zero_increases = df_percolation[df_percolation['size_increase'] > 0]['size_increase']

if not non_zero_increases.empty:
    # Define a jump threshold, e.g., 85th percentile
    jump_threshold = non_zero_increases.quantile(0.95) 
    print(f"\nDefining \"mutation\" size threshold as: {jump_threshold:.2f} ")

    # Filter out rows with "mutations"
    critical_transitions_df = df_percolation[df_percolation['size_increase'] >= jump_threshold]

    # Extract these critical thresholds
    critical_thresholds = sorted(critical_transitions_df['threshold'].unique(), reverse=True)

    print("\nIdentified Critical Thresholds include:")
    print(critical_thresholds)
else:
    print("\nNo growth found in giant component size.")
    critical_thresholds = []

In [None]:
# Visualization: Mark critical thresholds
fig, ax = plt.subplots(figsize=(12, 7))

# Draw basic curve
ax.plot(
    df_percolation['threshold'], 
    df_percolation['giant_component_size_ratio'],
    color='gray',
    linestyle='-',
    alpha=0.7,
    label='Giant Component Evolution'
)

# If critical points are found, mark them on the plot
if 'critical_transitions_df' in locals() and not critical_transitions_df.empty:
    ax.plot(
        critical_transitions_df['threshold'], 
        critical_transitions_df['giant_component_size_ratio'],
        'ro', # 'r' is red, 'o' is circle marker
        markersize=6,
        label='Critical Transitions'
    )

ax.set_xlabel('Threshold (τ)')
ax.set_ylabel('Giant Cluster Size (Ratio of Total Nodes)')
ax.set_title('Percolation Analysis with Critical Transitions Highlighted')
ax.invert_xaxis() # Invert X-axis
ax.legend()
plt.show()

In [None]:
# Visualization: Mark critical thresholds
fig, ax = plt.subplots(figsize=(12, 7))

# Draw basic curve
ax.plot(
    df_percolation['threshold'], 
    df_percolation['giant_component_size_ratio'],
    color='gray',
    linestyle='-',
    alpha=0.7,
    label='Giant Component Evolution'
)

# If critical points are found, mark them on the plot
if 'critical_transitions_df' in locals() and not critical_transitions_df.empty:
    ax.plot(
        critical_transitions_df['threshold'], 
        critical_transitions_df['giant_component_size_ratio'],
        'ro', # 'r' is red, 'o' is circle marker
        markersize=6,
        label='Critical Transitions'
    )

ax.set_xlabel('Threshold (τ)')
ax.set_ylabel('Giant Cluster Size (Ratio of Total Nodes)')
ax.set_title('Percolation Analysis with Critical Transitions Highlighted')
ax.invert_xaxis() # Invert X-axis
# ax.set_xlim(1, 0)
ax.legend()
ax.grid(False)
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Visualization: Mark critical thresholds (final beautified version)
fig, ax = plt.subplots(figsize=(12, 8)) # Slightly increase height to accommodate labels

# 1. Draw step plot curve
ax.plot(
    df_percolation['threshold'], 
    df_percolation['giant_component_size_ratio'],
    color='darkgray',       # Use dark gray for clearer contrast
    linestyle='-',
    linewidth=1.5,          # Set appropriate line width
    label='Giant Component Evolution',
    drawstyle='steps-post'  # Key: Use step plot style
)

# 2. Highlight critical points
if 'critical_transitions_df' in locals() and not critical_transitions_df.empty:
    ax.plot(
        critical_transitions_df['threshold'], 
        critical_transitions_df['giant_component_size_ratio'],
        marker='o',             # Set marker to circle
        color='crimson',        # Use more vivid dark red
        markersize=8,           # Slightly increase marker size
        markeredgecolor='white',# Key: Add white edge to make it more prominent
        linestyle='none',       # Don't draw connecting lines
        label='Critical Transitions'
    )

# 3. Set title and axis labels
ax.set_xlabel('Threshold (τ)', fontsize=14, color='black')
ax.set_ylabel('Giant Cluster Size (Ratio of Total Nodes)', fontsize=14, color='black')
ax.set_title('Percolation Analysis with Critical Transitions Highlighted', fontsize=16, color='black')

# 4. Format axes
ax.invert_xaxis()      # Invert X-axis
ax.set_xlim(1, 0)      # Key: Ensure X-axis starts from 1
ax.set_ylim(0, 1.05)   # Leave space at top for Y-axis

# 5. Set axis line and tick styles
# Hide top and right axis lines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Set bottom and left axis lines to black
ax.spines['left'].set_color('black')
ax.spines['bottom'].set_color('black')

# Set X and Y axis tick marks to black
ax.tick_params(axis='x', colors='black', length=6) # length is tick line length
ax.tick_params(axis='y', colors='black', length=6)

# 6. Legend and final display
ax.legend(fontsize=12, frameon=False) # frameon=False makes legend borderless
ax.grid(False)      # Ensure no grid lines
plt.tight_layout()  # Auto-adjust layout to prevent label overlap
plt.show()

——————————————

In [None]:
# Complete percolation analysis code - one-stop solution

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
from scipy import ndimage
from scipy.signal import find_peaks
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

def complete_percolation_analysis_pipeline(G, reference_thresholds=None, num_samples=1000):
    """
    Complete percolation analysis pipeline
    
    Parameters:
    - G: NetworkX directed graph
    - reference_thresholds: reference threshold list (for validation)
    - num_samples: number of sampling points
    
    Returns:
    - analysis_results: dictionary containing all analysis results
    """
    
    print("Starting complete percolation analysis pipeline...")
    print(f"   Network info: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
    
    # Step 1: Data preparation and threshold generation
    print("\nStep 1: Data preparation...")
    
    # Get all edge weights
    edges_data = list(G.edges(data=True))
    weights = [data.get('weight', 1.0) for _, _, data in edges_data]
    min_weight = min(weights)
    max_weight = max(weights)
    
    print(f"   Weight range: {min_weight:.6f} - {max_weight:.6f}")
    
    # Smart sampling strategy: densify in key regions
    if reference_thresholds:
        print(f"   Intelligent sampling based on {len(reference_thresholds)} reference thresholds")
        # Basic logarithmic sampling
        base_thresholds = np.logspace(np.log10(min_weight), np.log10(max_weight), num_samples//2)
        
        # Dense sampling around reference thresholds
        dense_thresholds = []
        for ref_thresh in reference_thresholds:
            if min_weight <= ref_thresh <= max_weight:
                # Densify within ±15% range around reference point
                local_min = max(min_weight, ref_thresh * 0.85)
                local_max = min(max_weight, ref_thresh * 1.15)
                local_samples = np.linspace(local_min, local_max, 30)
                dense_thresholds.extend(local_samples)
        
        # Merge and deduplicate
        all_thresholds = np.concatenate([base_thresholds, dense_thresholds])
        thresholds = np.unique(all_thresholds)
    else:
        # Standard logarithmic sampling
        thresholds = np.logspace(np.log10(min_weight), np.log10(max_weight), num_samples)
    
    # Sort and filter
    thresholds = thresholds[(thresholds >= min_weight) & (thresholds <= max_weight)]
    thresholds = np.sort(thresholds)[::-1]  # Sort from large to small
    
    print(f"   Generated {len(thresholds)} sampling points")
    
    # Step 2: Percolation analysis calculation
    print("\nStep 2: Percolation analysis calculation...")
    
    giant_sizes = []
    total_nodes = G.number_of_nodes()
    
    for i, threshold in enumerate(thresholds):
        if i % 200 == 0:
            print(f"   Progress: {i}/{len(thresholds)} ({i/len(thresholds)*100:.1f}%)")
        
        # Create subgraph: keep edges with weight >= threshold
        edges_to_keep = [(u, v) for u, v, data in G.edges(data=True) 
                        if data.get('weight', 1.0) >= threshold]
        
        if len(edges_to_keep) == 0:
            giant_sizes.append(0.0)
            continue
        
        # Create undirected subgraph for calculating connected components
        subgraph = nx.Graph()
        subgraph.add_edges_from(edges_to_keep)
        
        if subgraph.number_of_nodes() > 0:
            components = list(nx.connected_components(subgraph))
            largest_component_size = max(len(comp) for comp in components)
            giant_size_fraction = largest_component_size / total_nodes
        else:
            giant_size_fraction = 0.0
        
        giant_sizes.append(giant_size_fraction)
    
    giant_sizes = np.array(giant_sizes)
    print("Percolation analysis calculation completed")
    
    # Step 3: Critical threshold detection
    print("\nStep 3: Critical threshold detection...")
    
    critical_thresholds = []
    
    # Method 1: Reference threshold matching
    if reference_thresholds:
        print("   Method 1: Reference threshold matching...")
        for ref_thresh in reference_thresholds:
            idx = np.argmin(np.abs(thresholds - ref_thresh))
            distance = np.abs(thresholds[idx] - ref_thresh)
            relative_error = distance / ref_thresh
            
            if relative_error < 0.15:  # Relative error less than 15%
                critical_thresholds.append({
                    'threshold': thresholds[idx],
                    'giant_size': giant_sizes[idx],
                    'type': 'reference_match',
                    'reference': ref_thresh,
                    'error': relative_error
                })
    
    # Method 2: Mathematical method for detecting mutation points
    print("   Method 2: Mathematical mutation detection...")
    
    # Data smoothing
    try:
        smoothed_sizes = ndimage.gaussian_filter1d(giant_sizes, sigma=3)
    except:
        smoothed_sizes = giant_sizes
    
    # Calculate derivative
    log_thresholds = np.log10(thresholds)
    dx = np.diff(log_thresholds)
    dy = np.diff(smoothed_sizes)
    first_derivative = dy / dx
    
    # Find extreme points in derivative
    if len(first_derivative) > 10:
        # Find maximum decline points
        negative_peaks, _ = find_peaks(-first_derivative, height=0.01)
        for peak_idx in negative_peaks[:3]:  # Take top 3 most significant
            if peak_idx < len(thresholds):
                critical_thresholds.append({
                    'threshold': thresholds[peak_idx],
                    'giant_size': giant_sizes[peak_idx],
                    'type': 'slope_peak',
                    'strength': np.abs(first_derivative[peak_idx])
                })
    
    # Method 3: Specific fraction points
    print("   Method 3: Specific fraction point detection...")
    target_fractions = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]
    for frac in target_fractions:
        idx = np.argmin(np.abs(giant_sizes - frac))
        if np.abs(giant_sizes[idx] - frac) < 0.05:  # Error tolerance
            critical_thresholds.append({
                'threshold': thresholds[idx],
                'giant_size': giant_sizes[idx],
                'type': f'fraction_{frac}',
                'target_fraction': frac
            })
    
    # Deduplicate and select best thresholds
    print("   Organizing and deduplicating...")
    
    # Group by threshold, keep best in each group
    threshold_groups = {}
    for point in critical_thresholds:
        thresh_key = round(point['threshold'], 5)
        if (thresh_key not in threshold_groups or 
            point.get('error', 1.0) < threshold_groups[thresh_key].get('error', 1.0)):
            threshold_groups[thresh_key] = point
    
    # Prioritize reference_match types
    final_critical_points = []
    reference_matches = [p for p in threshold_groups.values() if p['type'] == 'reference_match']
    other_points = [p for p in threshold_groups.values() if p['type'] != 'reference_match']
    
    # First add all reference_matches
    final_critical_points.extend(reference_matches)
    
    # Then add other high-quality points, ensure total doesn't exceed 10
    other_points.sort(key=lambda x: x.get('strength', 0), reverse=True)
    needed = min(10 - len(final_critical_points), len(other_points))
    final_critical_points.extend(other_points[:needed])
    
    # Sort by threshold
    final_critical_points.sort(key=lambda x: x['threshold'], reverse=True)
    
    print(f"Detected {len(final_critical_points)} critical thresholds")
    
    # Return analysis results
    analysis_results = {
        'thresholds': thresholds,
        'giant_sizes': giant_sizes,
        'critical_points': final_critical_points,
        'first_derivative': first_derivative if 'first_derivative' in locals() else None,
        'network_info': {
            'nodes': G.number_of_nodes(),
            'edges': G.number_of_edges(),
            'weight_range': [min_weight, max_weight]
        }
    }
    
    return analysis_results

# Run complete analysis

# Your reference thresholds
reference_thresholds = [0.274173, 0.243051, 0.217115, 0.175618, 
                       0.149683, 0.134121, 0.113373, 0.051128]

print("Reference thresholds:")
for i, thresh in enumerate(reference_thresholds):
    print(f"   {i+1}. τ = {thresh:.6f}")

# Execute complete analysis
analysis_results = complete_percolation_analysis_pipeline(
    G, 
    reference_thresholds=reference_thresholds, 
    num_samples=1200
)

print(f"\nAnalysis completed!")
print(f"   Sampling points: {len(analysis_results['thresholds'])}")
print(f"   Critical thresholds: {len(analysis_results['critical_points'])}")

# Display detected critical thresholds
print(f"\nDetected critical thresholds:")
for i, point in enumerate(analysis_results['critical_points']):
    if point['type'] == 'reference_match':
        error = point['error'] * 100
        print(f"   {i+1}. τ = {point['threshold']:.6f} (REF: {point['reference']:.6f}, error: {error:.1f}%)")
    else:
        print(f"   {i+1}. τ = {point['threshold']:.6f} ({point['type']})")

# Save key variables for visualization use
final_analysis_thresholds = analysis_results['thresholds']
final_analysis_giant_sizes = analysis_results['giant_sizes']
final_analysis_critical_points = analysis_results['critical_points']

In [None]:
# Enhanced percolation curve visualization - final version

import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as patches
from matplotlib.colors import LinearSegmentedColormap

def plot_enhanced_percolation_curve_final(thresholds, giant_sizes, critical_points, 
                                         save_path=None, title_suffix=""):
    """
    Create enhanced percolation curve visualization - final optimized version
    """
    
    # Configuration settings
    plt.style.use('default')
    fig, ax = plt.subplots(1, 1, figsize=(14, 10))
    
    # Define professional color scheme
    colors = {
        'main_line': '#2E86AB',      # Professional blue
        'critical': '#A23B72',       # Burgundy
        'reference': '#F18F01',      # Orange
        'background': '#F8F9FA',     # Light gray
        'grid': '#E9ECEF',          # Lighter gray
        'text': '#212529'           # Dark gray
    }
    
    # Set background color
    fig.patch.set_facecolor('white')
    ax.set_facecolor(colors['background'])
    
    # Plot main percolation curve with gradient effect
    x_data = thresholds
    y_data = giant_sizes
    
    # Create main curve with thicker line
    main_line = ax.plot(x_data, y_data, 
                       color=colors['main_line'], 
                       linewidth=3.5, 
                       alpha=0.9,
                       label='Giant Component Size',
                       zorder=3)
    
    # Add gradient fill below curve
    ax.fill_between(x_data, 0, y_data, 
                   color=colors['main_line'], 
                   alpha=0.15, 
                   zorder=1)
    
    # Mark critical points with enhanced styling
    reference_points = []
    other_points = []
    
    for point in critical_points:
        if point['type'] == 'reference_match':
            reference_points.append(point)
        else:
            other_points.append(point)
    
    # Plot reference matching points
    if reference_points:
        ref_x = [p['threshold'] for p in reference_points]
        ref_y = [p['giant_size'] for p in reference_points]
        
        ax.scatter(ref_x, ref_y, 
                  c=colors['reference'], 
                  s=200, 
                  alpha=0.9,
                  edgecolors='white',
                  linewidth=2,
                  marker='o',
                  label=f'Reference Match ({len(ref_x)})',
                  zorder=5)
        
        # Add number annotations for reference points
        for i, (x, y, point) in enumerate(zip(ref_x, ref_y, reference_points)):
            # Background circle for better readability
            circle = patches.Circle((x, y), 
                                  radius=max(x_data) * 0.015, 
                                  facecolor='white', 
                                  edgecolor=colors['reference'],
                                  linewidth=1.5,
                                  alpha=0.9,
                                  zorder=4)
            ax.add_patch(circle)
            
            # Number annotation
            ax.annotate(f'{i+1}', 
                       xy=(x, y), 
                       xytext=(0, 0), 
                       textcoords='offset points',
                       ha='center', va='center',
                       fontsize=10,
                       fontweight='bold',
                       color=colors['text'],
                       zorder=6)
    
    # Plot other critical points  
    if other_points:
        other_x = [p['threshold'] for p in other_points]
        other_y = [p['giant_size'] for p in other_points]
        
        ax.scatter(other_x, other_y, 
                  c=colors['critical'], 
                  s=120, 
                  alpha=0.8,
                  edgecolors='white',
                  linewidth=1.5,
                  marker='^',
                  label=f'Other Critical Points ({len(other_x)})',
                  zorder=4)
    
    # Enhanced grid settings
    ax.grid(True, alpha=0.4, color=colors['grid'], linestyle='-', linewidth=0.8)
    ax.set_axisbelow(True)
    
    # Axis settings
    ax.set_xscale('log')
    ax.set_xlabel('Percolation Threshold τ', fontsize=14, fontweight='bold', color=colors['text'])
    ax.set_ylabel('Giant Component Size (Fraction)', fontsize=14, fontweight='bold', color=colors['text'])
    
    # Set axis ranges
    x_min, x_max = min(x_data), max(x_data)
    y_min, y_max = 0, 1
    
    ax.set_xlim(x_min * 0.8, x_max * 1.2)
    ax.set_ylim(-0.05, 1.05)
    
    # Enhanced title
    base_title = "Network Percolation Analysis"
    if title_suffix:
        title = f"{base_title} - {title_suffix}"
    else:
        title = base_title
    
    ax.set_title(title, 
                fontsize=16, 
                fontweight='bold', 
                color=colors['text'],
                pad=20)
    
    # Professional legend
    legend = ax.legend(loc='upper right', 
                      frameon=True, 
                      fancybox=True, 
                      shadow=True,
                      fontsize=11,
                      framealpha=0.95)
    legend.get_frame().set_facecolor('white')
    legend.get_frame().set_edgecolor(colors['grid'])
    
    # Add information text box
    info_text = f"""Network Information:
    Nodes: {len(set([edge[0] for edge in G.edges()] + [edge[1] for edge in G.edges()]))}
    Edges: {G.number_of_edges()}
    Critical Points: {len(critical_points)}"""
    
    # Create text box with professional styling
    textstr = info_text
    props = dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.9, edgecolor=colors['grid'])
    ax.text(0.02, 0.98, textstr, transform=ax.transAxes, fontsize=10,
            verticalalignment='top', bbox=props, color=colors['text'])
    
    # Fine-tune tick parameters
    ax.tick_params(axis='both', which='major', labelsize=11, colors=colors['text'])
    ax.tick_params(axis='both', which='minor', labelsize=9, colors=colors['text'])
    
    # Set spine colors
    for spine in ax.spines.values():
        spine.set_color(colors['grid'])
        spine.set_linewidth(1.2)
    
    # Tight layout
    plt.tight_layout()
    
    # Save if path specified
    if save_path:
        plt.savefig(save_path, 
                   dpi=300, 
                   bbox_inches='tight', 
                   facecolor='white',
                   edgecolor='none')
        print(f"Chart saved: {save_path}")
    
    plt.show()
    
    # Output critical points summary
    print("\nCritical Points Summary:")
    print("=" * 60)
    for i, point in enumerate(critical_points):
        print(f"{i+1:2d}. τ = {point['threshold']:.6f}, "
              f"Size = {point['giant_size']:.3f}, "
              f"Type: {point['type']}")
        
        if point['type'] == 'reference_match':
            error = point.get('error', 0) * 100
            print(f"     Reference: {point['reference']:.6f}, Error: {error:.1f}%")
    print("=" * 60)

# Generate enhanced visualization
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
save_filename = f"final_percolation_analysis_{timestamp}.png"

plot_enhanced_percolation_curve_final(
    final_analysis_thresholds,
    final_analysis_giant_sizes,
    final_analysis_critical_points,
    save_path=save_filename,
    title_suffix="TTWA Commuting Network"
)

print(f"\nVisualization completed!")
print(f"File saved as: {save_filename}")

In [None]:
# Interactive network visualization function

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import networkx as nx
import numpy as np
import pandas as pd

def create_interactive_network_map(G, threshold, coordinates_df, save_path=None):
    """
    Create interactive network map using Plotly
    
    Parameters:
    - G: NetworkX graph
    - threshold: percolation threshold value
    - coordinates_df: DataFrame containing node coordinates (columns: 'node', 'lat', 'lon')
    - save_path: path to save HTML file
    """
    
    print(f"Creating interactive network map for threshold: {threshold}")
    
    # Filter edges based on threshold
    filtered_edges = [(u, v, data) for u, v, data in G.edges(data=True) 
                     if data.get('weight', 0) >= threshold]
    
    if len(filtered_edges) == 0:
        print("No edges meet the threshold requirement")
        return None
    
    # Create filtered subgraph
    filtered_G = nx.Graph()
    filtered_G.add_edges_from([(u, v) for u, v, _ in filtered_edges])
    
    # Calculate connected components
    components = list(nx.connected_components(filtered_G))
    if components:
        largest_component = max(components, key=len)
        largest_component_size = len(largest_component)
    else:
        largest_component = set()
        largest_component_size = 0
    
    print(f"  Filtered network: {filtered_G.number_of_nodes()} nodes, {filtered_G.number_of_edges()} edges")
    print(f"  Largest component size: {largest_component_size}")
    
    # Prepare node data
    nodes_in_network = set(filtered_G.nodes())
    node_data = []
    
    for node in nodes_in_network:
        # Get coordinates
        node_info = coordinates_df[coordinates_df['node'] == node]
        if node_info.empty:
            continue
            
        lat = node_info['lat'].iloc[0]
        lon = node_info['lon'].iloc[0]
        
        # Determine node properties
        is_in_giant = node in largest_component
        degree = filtered_G.degree(node)
        
        node_data.append({
            'node': node,
            'lat': lat,
            'lon': lon,
            'degree': degree,
            'in_giant_component': is_in_giant,
            'component_size': largest_component_size if is_in_giant else 0
        })
    
    if not node_data:
        print("No valid node coordinate data found")
        return None
    
    nodes_df = pd.DataFrame(node_data)
    
    # Prepare edge data
    edge_data = []
    for u, v, data in filtered_edges:
        # Get coordinates for both nodes
        u_info = coordinates_df[coordinates_df['node'] == u]
        v_info = coordinates_df[coordinates_df['node'] == v]
        
        if u_info.empty or v_info.empty:
            continue
            
        weight = data.get('weight', 0)
        
        edge_data.append({
            'source': u,
            'target': v,
            'weight': weight,
            'source_lat': u_info['lat'].iloc[0],
            'source_lon': u_info['lon'].iloc[0],
            'target_lat': v_info['lat'].iloc[0],
            'target_lon': v_info['lon'].iloc[0]
        })
    
    edges_df = pd.DataFrame(edge_data)
    
    # Create Plotly figure
    fig = go.Figure()
    
    # Add edges (as lines)
    if not edges_df.empty:
        # Create edge traces
        edge_trace = []
        
        for _, edge in edges_df.iterrows():
            edge_trace.extend([
                edge['source_lon'], edge['target_lon'], None,
            ])
            edge_trace.extend([
                edge['source_lat'], edge['target_lat'], None,
            ])
        
        # Reorganize edge trace data
        edge_lons = edge_trace[::3]  # Every third element starting from 0
        edge_lats = edge_trace[1::3]  # Every third element starting from 1
        
        fig.add_trace(go.Scattermapbox(
            lon=edge_lons,
            lat=edge_lats,
            mode='lines',
            line=dict(width=1, color='rgba(100, 100, 100, 0.6)'),
            showlegend=False,
            hoverinfo='skip'
        ))
    
    # Add nodes
    # Giant component nodes
    giant_nodes = nodes_df[nodes_df['in_giant_component'] == True]
    if not giant_nodes.empty:
        fig.add_trace(go.Scattermapbox(
            lon=giant_nodes['lon'],
            lat=giant_nodes['lat'],
            mode='markers',
            marker=dict(
                size=8 + giant_nodes['degree'] * 0.5,  # Size based on degree
                color='red',
                opacity=0.8
            ),
            text=giant_nodes['node'],
            hovertemplate='<b>%{text}</b><br>' +
                         'Degree: %{marker.size}<br>' +
                         'In Giant Component<br>' +
                         '<extra></extra>',
            name=f'Giant Component ({len(giant_nodes)})',
            showlegend=True
        ))
    
    # Other nodes
    other_nodes = nodes_df[nodes_df['in_giant_component'] == False]
    if not other_nodes.empty:
        fig.add_trace(go.Scattermapbox(
            lon=other_nodes['lon'],
            lat=other_nodes['lat'],
            mode='markers',
            marker=dict(
                size=6 + other_nodes['degree'] * 0.3,
                color='blue',
                opacity=0.6
            ),
            text=other_nodes['node'],
            hovertemplate='<b>%{text}</b><br>' +
                         'Degree: %{marker.size}<br>' +
                         'Small Component<br>' +
                         '<extra></extra>',
            name=f'Other Components ({len(other_nodes)})',
            showlegend=True
        ))
    
    # Calculate map center
    center_lat = nodes_df['lat'].mean()
    center_lon = nodes_df['lon'].mean()
    
    # Update layout
    fig.update_layout(
        title=f'Interactive Network Map - Threshold: {threshold:.6f}<br>' +
              f'Giant Component: {largest_component_size} nodes ' +
              f'({largest_component_size/len(nodes_df)*100:.1f}%)',
        mapbox=dict(
            style='open-street-map',
            center=dict(lat=center_lat, lon=center_lon),
            zoom=6
        ),
        showlegend=True,
        height=700,
        font=dict(size=12)
    )
    
    # Save if path specified
    if save_path:
        fig.write_html(save_path)
        print(f"Interactive map saved: {save_path}")
    
    # Display
    fig.show()
    
    return fig

# Load coordinates data
print("Loading TTWA coordinates data...")
coordinates_df = pd.read_csv('ttwa_coordinates.csv')  # Assume this file exists
print(f"Loaded coordinates for {len(coordinates_df)} TTWAs")

# Display a few sample coordinates
print("\nSample coordinates:")
print(coordinates_df.head())

# Test with several critical thresholds
test_thresholds = [0.274173, 0.217115, 0.149683, 0.113373]

print(f"\nGenerating interactive maps for {len(test_thresholds)} thresholds...")

for i, threshold in enumerate(test_thresholds):
    print(f"\n=== Map {i+1}/{len(test_thresholds)} ===")
    save_filename = f"interactive_network_map_threshold_{threshold:.6f}.html"
    
    create_interactive_network_map(
        G=G,
        threshold=threshold,
        coordinates_df=coordinates_df,
        save_path=save_filename
    )

print("\nAll interactive maps generated!")

In [None]:
# Simple aesthetic network visualization function - suitable for static display

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap

def plot_aesthetic_network_map(G, threshold, node_positions, save_path=None):
    """
    Create aesthetic network visualization using matplotlib
    
    Parameters:
    - G: NetworkX graph
    - threshold: percolation threshold
    - node_positions: dictionary, {node: (x, y)}
    - save_path: save path
    """
    
    print(f"Creating aesthetic network map for threshold: {threshold}")
    
    # Filter network
    filtered_edges = [(u, v) for u, v, data in G.edges(data=True) 
                     if data.get('weight', 0) >= threshold]
    
    if len(filtered_edges) == 0:
        print("No edges meet threshold requirement")
        return
    
    filtered_G = nx.Graph()
    filtered_G.add_edges_from(filtered_edges)
    
    # Calculate connected components
    components = list(nx.connected_components(filtered_G))
    largest_component = max(components, key=len) if components else set()
    
    print(f"  Network: {filtered_G.number_of_nodes()} nodes, {filtered_G.number_of_edges()} edges")
    print(f"  Giant component: {len(largest_component)} nodes")
    
    # Create figure
    plt.style.use('dark_background')
    fig, ax = plt.subplots(1, 1, figsize=(16, 12))
    fig.patch.set_facecolor('black')
    ax.set_facecolor('black')
    
    # Filter node positions (only include nodes in filtered network)
    filtered_positions = {node: pos for node, pos in node_positions.items() 
                         if node in filtered_G.nodes()}
    
    if not filtered_positions:
        print("No valid node positions found")
        return
    
    # Draw edges with transparency gradient
    edge_colors = []
    edge_weights = []
    
    for u, v in filtered_G.edges():
        if u in filtered_positions and v in filtered_positions:
            # Get edge weight
            weight = G[u][v].get('weight', 0) if G.has_edge(u, v) else 0
            edge_weights.append(weight)
            
            # Color based on whether connected to giant component
            if u in largest_component and v in largest_component:
                edge_colors.append('#FF6B6B')  # Red - giant component internal
            elif u in largest_component or v in largest_component:
                edge_colors.append('#4ECDC4')  # Teal - connected to giant component
            else:
                edge_colors.append('#95A5A6')  # Gray - small components
    
    # Draw edges
    nx.draw_networkx_edges(filtered_G, filtered_positions,
                          edge_color=edge_colors,
                          alpha=0.6,
                          width=0.5,
                          ax=ax)
    
    # Draw nodes
    # Giant component nodes
    giant_nodes = [node for node in filtered_G.nodes() if node in largest_component]
    if giant_nodes:
        giant_pos = {node: filtered_positions[node] for node in giant_nodes if node in filtered_positions}
        nx.draw_networkx_nodes(filtered_G.subgraph(giant_nodes), giant_pos,
                             node_color='#FF6B6B',
                             node_size=50,
                             alpha=0.9,
                             ax=ax)
    
    # Other nodes
    other_nodes = [node for node in filtered_G.nodes() if node not in largest_component]
    if other_nodes:
        other_pos = {node: filtered_positions[node] for node in other_nodes if node in filtered_positions}
        nx.draw_networkx_nodes(filtered_G.subgraph(other_nodes), other_pos,
                             node_color='#3498DB',
                             node_size=30,
                             alpha=0.7,
                             ax=ax)
    
    # Set title and formatting
    ax.set_title(f'Network Structure at Threshold τ = {threshold:.6f}\n'
                f'Giant Component: {len(largest_component)} nodes '
                f'({len(largest_component)/filtered_G.number_of_nodes()*100:.1f}%)',
                color='white', fontsize=16, fontweight='bold')
    
    # Remove axes
    ax.axis('off')
    
    # Add legend
    legend_elements = [
        plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='#FF6B6B', 
                  markersize=10, label='Giant Component', linestyle='None'),
        plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='#3498DB', 
                  markersize=8, label='Small Components', linestyle='None'),
        plt.Line2D([0], [0], color='#FF6B6B', linewidth=2, label='Giant Component Links'),
        plt.Line2D([0], [0], color='#4ECDC4', linewidth=2, label='Mixed Links'),
        plt.Line2D([0], [0], color='#95A5A6', linewidth=2, label='Small Component Links')
    ]
    
    ax.legend(handles=legend_elements, loc='upper right', 
             frameon=True, facecolor='black', edgecolor='white')
    
    plt.tight_layout()
    
    # Save
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight', 
                   facecolor='black', edgecolor='none')
        print(f"Aesthetic map saved: {save_path}")
    
    plt.show()

# If you have node position data, use it; otherwise generate positions
print("Generating/loading node positions...")

try:
    # Try to load pre-computed positions
    node_positions = np.load('ttwa_positions.npy', allow_pickle=True).item()
    print(f"Loaded pre-computed positions for {len(node_positions)} nodes")
except:
    # Generate positions using spring layout (may take time for large networks)
    print("Generating new positions using spring layout...")
    print("This may take a few minutes for large networks...")
    
    # Use a subset for position calculation if network is too large
    if G.number_of_nodes() > 500:
        print("Network is large, using sample for position calculation...")
        # Take a sample subgraph
        sample_nodes = list(G.nodes())[:500]
        sample_G = G.subgraph(sample_nodes)
        sample_positions = nx.spring_layout(sample_G, k=1, iterations=50)
        
        # Extend to full network (simplified approach)
        node_positions = {}
        for node in G.nodes():
            if node in sample_positions:
                node_positions[node] = sample_positions[node]
            else:
                # Random position for unsampled nodes
                node_positions[node] = (np.random.random(), np.random.random())
    else:
        node_positions = nx.spring_layout(G, k=1, iterations=50)
    
    # Save positions for future use
    np.save('ttwa_positions.npy', node_positions)
    print(f"Generated and saved positions for {len(node_positions)} nodes")

print(f"Node positions ready: {len(node_positions)} nodes")

In [None]:
# Generate aesthetic network maps for multiple thresholds

# Select a few representative thresholds
aesthetic_thresholds = [
    0.274173,  # Highest threshold
    0.217115,  # Medium-high threshold  
    0.149683,  # Medium threshold
    0.113373,  # Lower threshold
    0.051128   # Lowest threshold
]

print(f"Generating aesthetic network maps for {len(aesthetic_thresholds)} thresholds...")
print("This process may take several minutes...")

for i, threshold in enumerate(aesthetic_thresholds):
    print(f"\n=== Generating map {i+1}/{len(aesthetic_thresholds)} ===")
    print(f"Threshold: {threshold}")
    
    save_filename = f"aesthetic_map_threshold_{threshold:.6f}.png"
    
    plot_aesthetic_network_map(
        G=G,
        threshold=threshold,
        node_positions=node_positions,
        save_path=save_filename
    )
    
    print(f"Map {i+1} completed")

print(f"\nAll {len(aesthetic_thresholds)} aesthetic maps generated!")
print("Files saved with prefix 'aesthetic_map_threshold_'")

In [None]:
# Advanced percolation analysis - identifying transition points and calculating critical exponents

def advanced_percolation_analysis(thresholds, giant_sizes, save_results=True):
    """
    Advanced percolation analysis including:
    1. Transition point identification
    2. Critical exponent calculation
    3. Scaling behavior analysis
    
    Parameters:
    - thresholds: array of threshold values
    - giant_sizes: array of giant component sizes
    - save_results: whether to save analysis results
    
    Returns:
    - results: dictionary containing analysis results
    """
    
    print("Starting advanced percolation analysis...")
    
    results = {}
    
    # 1. Smooth the data
    from scipy.ndimage import gaussian_filter1d
    smoothed_sizes = gaussian_filter1d(giant_sizes, sigma=2)
    
    # 2. Calculate derivatives
    log_thresholds = np.log10(thresholds)
    
    # First derivative
    d_log_thresh = np.diff(log_thresholds)
    d_sizes = np.diff(smoothed_sizes)
    first_derivative = d_sizes / d_log_thresh
    
    # Second derivative
    d2_sizes = np.diff(first_derivative)
    d2_log_thresh = d_log_thresh[:-1]
    second_derivative = d2_sizes / d2_log_thresh
    
    # 3. Identify critical region
    # Find the point of maximum decline (steepest descent)
    max_decline_idx = np.argmin(first_derivative)
    critical_threshold = thresholds[max_decline_idx]
    critical_size = giant_sizes[max_decline_idx]
    
    print(f"Critical threshold (max decline): τc = {critical_threshold:.6f}")
    print(f"Giant component size at critical point: {critical_size:.3f}")
    
    # 4. Analyze scaling behavior near critical point
    # Look at behavior within ±20% of critical threshold
    window_size = critical_threshold * 0.2
    lower_bound = critical_threshold - window_size
    upper_bound = critical_threshold + window_size
    
    critical_region_mask = (thresholds >= lower_bound) & (thresholds <= upper_bound)
    critical_thresholds = thresholds[critical_region_mask]
    critical_giant_sizes = giant_sizes[critical_region_mask]
    
    # 5. Fit power law near critical point
    # For percolation: S(τ) ∝ (τ - τc)^(-γ) for τ > τc
    try:
        # Look at supercritical region (τ > τc)
        supercritical_mask = critical_thresholds > critical_threshold
        if np.sum(supercritical_mask) > 5:  # Need sufficient data points
            super_thresh = critical_thresholds[supercritical_mask]
            super_sizes = critical_giant_sizes[supercritical_mask]
            
            # Remove zero sizes
            nonzero_mask = super_sizes > 0
            super_thresh = super_thresh[nonzero_mask]
            super_sizes = super_sizes[nonzero_mask]
            
            if len(super_thresh) > 3:
                # Fit: log(S) = -γ * log(τ - τc) + const
                tau_diff = super_thresh - critical_threshold
                tau_diff = tau_diff[tau_diff > 0]  # Ensure positive
                super_sizes = super_sizes[:len(tau_diff)]
                
                if len(tau_diff) > 3:
                    log_tau_diff = np.log10(tau_diff)
                    log_sizes = np.log10(super_sizes)
                    
                    # Linear regression
                    coeffs = np.polyfit(log_tau_diff, log_sizes, 1)
                    gamma_exponent = -coeffs[0]  # Negative because of power law form
                    
                    print(f"Critical exponent γ = {gamma_exponent:.3f}")
                    results['gamma_exponent'] = gamma_exponent
                else:
                    print("Insufficient data for power law fitting")
                    results['gamma_exponent'] = None
    except Exception as e:
        print(f"Error in power law fitting: {e}")
        results['gamma_exponent'] = None
    
    # 6. Calculate additional metrics
    # Steepness of transition
    transition_steepness = np.abs(first_derivative[max_decline_idx])
    
    # Width of transition region (defined as where first derivative is > 50% of maximum)
    steep_threshold = transition_steepness * 0.5
    steep_region = np.abs(first_derivative) > steep_threshold
    transition_width = len(first_derivative[steep_region])
    
    print(f"Transition steepness: {transition_steepness:.4f}")
    print(f"Transition width: {transition_width} points")
    
    # 7. Identify multiple transition points
    # Look for local minima in first derivative (indicating rapid changes)
    from scipy.signal import find_peaks
    
    # Find peaks in negative first derivative (valleys in original)
    peaks, properties = find_peaks(-first_derivative, height=0.01, distance=10)
    
    transition_points = []
    for peak_idx in peaks:
        if peak_idx < len(thresholds):
            transition_points.append({
                'threshold': thresholds[peak_idx],
                'giant_size': giant_sizes[peak_idx],
                'strength': np.abs(first_derivative[peak_idx])
            })
    
    # Sort by strength
    transition_points.sort(key=lambda x: x['strength'], reverse=True)
    
    print(f"\nIdentified {len(transition_points)} transition points:")
    for i, point in enumerate(transition_points[:5]):  # Show top 5
        print(f"  {i+1}. τ = {point['threshold']:.6f}, "
              f"S = {point['giant_size']:.3f}, "
              f"Strength = {point['strength']:.4f}")
    
    # 8. Calculate percolation probability
    # P(τ) = fraction of realizations where giant component exists
    # Here we approximate this as the giant component size itself
    percolation_probability = giant_sizes
    
    # Find percolation threshold (where P crosses 0.5)
    p_threshold_idx = np.argmin(np.abs(percolation_probability - 0.5))
    p_threshold = thresholds[p_threshold_idx]
    
    print(f"\nPercolation threshold (P=0.5): τp = {p_threshold:.6f}")
    
    # Compile results
    results.update({
        'critical_threshold': critical_threshold,
        'critical_size': critical_size,
        'percolation_threshold': p_threshold,
        'transition_steepness': transition_steepness,
        'transition_width': transition_width,
        'transition_points': transition_points,
        'first_derivative': first_derivative,
        'second_derivative': second_derivative,
        'smoothed_sizes': smoothed_sizes
    })
    
    # 9. Save results
    if save_results:
        import pandas as pd
        from datetime import datetime
        
        # Create summary DataFrame
        summary_data = {
            'metric': ['Critical Threshold', 'Critical Size', 'Percolation Threshold', 
                      'Gamma Exponent', 'Transition Steepness', 'Transition Width'],
            'value': [critical_threshold, critical_size, p_threshold,
                     results.get('gamma_exponent', np.nan), transition_steepness, transition_width]
        }
        
        summary_df = pd.DataFrame(summary_data)
        
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"advanced_percolation_summary.csv"
        summary_df.to_csv(filename, index=False)
        
        print(f"\nResults saved to: {filename}")
    
    return results

# Execute advanced analysis
print("Executing advanced percolation analysis...")
print("=" * 60)

advanced_results = advanced_percolation_analysis(
    final_analysis_thresholds,
    final_analysis_giant_sizes,
    save_results=True
)

print("=" * 60)
print("Advanced analysis completed!")

In [None]:
# Visualize advanced analysis results

def plot_advanced_analysis_results(thresholds, giant_sizes, results, save_path=None):
    """
    Comprehensive visualization of advanced percolation analysis
    """
    
    # Create subplots
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Advanced Percolation Analysis Results', fontsize=16, fontweight='bold')
    
    # Color scheme
    colors = {
        'main': '#2E86AB',
        'critical': '#A23B72', 
        'derivative': '#F18F01',
        'transition': '#4CAF50'
    }
    
    # 1. Main percolation curve with critical points
    ax1.semilogx(thresholds, giant_sizes, color=colors['main'], linewidth=2.5, 
                label='Giant Component Size')
    ax1.semilogx(thresholds, results['smoothed_sizes'], '--', color='gray', 
                alpha=0.7, label='Smoothed')
    
    # Mark critical threshold
    ax1.axvline(results['critical_threshold'], color=colors['critical'], 
               linestyle='--', alpha=0.8, label=f"Critical τ = {results['critical_threshold']:.4f}")
    
    # Mark percolation threshold
    ax1.axvline(results['percolation_threshold'], color=colors['transition'], 
               linestyle=':', alpha=0.8, label=f"Percolation τ = {results['percolation_threshold']:.4f}")
    
    # Mark transition points
    for i, point in enumerate(results['transition_points'][:3]):
        ax1.scatter(point['threshold'], point['giant_size'], 
                   s=100, color=colors['derivative'], alpha=0.8, zorder=5)
    
    ax1.set_xlabel('Threshold τ')
    ax1.set_ylabel('Giant Component Size')
    ax1.set_title('Percolation Curve with Critical Points')
    ax1.grid(True, alpha=0.3)
    ax1.legend()
    
    # 2. First derivative
    deriv_thresholds = thresholds[:-1]  # One less point due to differentiation
    ax2.semilogx(deriv_thresholds, results['first_derivative'], 
                color=colors['derivative'], linewidth=2)
    ax2.axhline(0, color='black', linestyle='-', alpha=0.3)
    ax2.axvline(results['critical_threshold'], color=colors['critical'], 
               linestyle='--', alpha=0.8)
    
    ax2.set_xlabel('Threshold τ')
    ax2.set_ylabel('dS/d(log τ)')
    ax2.set_title('First Derivative (Rate of Change)')
    ax2.grid(True, alpha=0.3)
    
    # 3. Second derivative
    deriv2_thresholds = deriv_thresholds[:-1]  # One less point again
    ax3.semilogx(deriv2_thresholds, results['second_derivative'], 
                color='purple', linewidth=2)
    ax3.axhline(0, color='black', linestyle='-', alpha=0.3)
    ax3.axvline(results['critical_threshold'], color=colors['critical'], 
               linestyle='--', alpha=0.8)
    
    ax3.set_xlabel('Threshold τ')
    ax3.set_ylabel('d²S/d(log τ)²')
    ax3.set_title('Second Derivative (Curvature)')
    ax3.grid(True, alpha=0.3)
    
    # 4. Transition analysis
    # Plot histogram of transition strengths
    if results['transition_points']:
        strengths = [p['strength'] for p in results['transition_points']]
        ax4.hist(strengths, bins=20, color=colors['transition'], alpha=0.7, edgecolor='black')
        ax4.axvline(np.mean(strengths), color='red', linestyle='--', 
                   label=f'Mean = {np.mean(strengths):.3f}')
        ax4.set_xlabel('Transition Strength')
        ax4.set_ylabel('Frequency')
        ax4.set_title('Distribution of Transition Strengths')
        ax4.legend()
    else:
        ax4.text(0.5, 0.5, 'No transition points\nidentified', 
                transform=ax4.transAxes, ha='center', va='center', fontsize=12)
        ax4.set_title('Transition Analysis')
    
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Advanced analysis plot saved: {save_path}")
    
    plt.show()
    
    # Print summary statistics
    print("\nAdvanced Analysis Summary:")
    print("=" * 50)
    print(f"Critical Threshold (max decline): {results['critical_threshold']:.6f}")
    print(f"Critical Size: {results['critical_size']:.3f}")
    print(f"Percolation Threshold (50%): {results['percolation_threshold']:.6f}")
    
    if results.get('gamma_exponent'):
        print(f"Critical Exponent γ: {results['gamma_exponent']:.3f}")
    else:
        print("Critical Exponent γ: Could not calculate")
    
    print(f"Transition Steepness: {results['transition_steepness']:.4f}")
    print(f"Transition Width: {results['transition_width']} points")
    print(f"Number of Transition Points: {len(results['transition_points'])}")
    print("=" * 50)

# Generate advanced analysis visualization
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
advanced_plot_filename = f"advanced_percolation_analysis_{timestamp}.png"

plot_advanced_analysis_results(
    final_analysis_thresholds,
    final_analysis_giant_sizes,
    advanced_results,
    save_path=advanced_plot_filename
)

print(f"\nAdvanced analysis visualization completed!")
print(f"Plot saved as: {advanced_plot_filename}")

In [None]:
# Network hierarchy analysis - identifying core and peripheral regions

def analyze_network_hierarchy(G, critical_thresholds, save_results=True):
    """
    Analyze network hierarchy at different percolation thresholds
    
    Parameters:
    - G: NetworkX graph
    - critical_thresholds: list of critical threshold values
    - save_results: whether to save results
    
    Returns:
    - hierarchy_results: dictionary containing hierarchy analysis
    """
    
    print("Starting network hierarchy analysis...")
    print(f"Analyzing {len(critical_thresholds)} critical thresholds")
    
    hierarchy_results = {}
    
    for i, threshold in enumerate(critical_thresholds):
        print(f"\nAnalyzing threshold {i+1}/{len(critical_thresholds)}: τ = {threshold:.6f}")
        
        # Create filtered network
        filtered_edges = [(u, v, data) for u, v, data in G.edges(data=True) 
                         if data.get('weight', 0) >= threshold]
        
        if len(filtered_edges) == 0:
            print("  No edges above threshold")
            continue
        
        # Create subgraph
        subgraph = nx.Graph()
        subgraph.add_edges_from([(u, v) for u, v, _ in filtered_edges])
        
        # Connected components analysis
        components = list(nx.connected_components(subgraph))
        
        if not components:
            print("  No connected components")
            continue
        
        # Identify giant component
        giant_component = max(components, key=len)
        component_sizes = [len(comp) for comp in components]
        
        # Network metrics for giant component
        giant_subgraph = subgraph.subgraph(giant_component)
        
        # Basic metrics
        num_nodes = len(giant_component)
        num_edges = giant_subgraph.number_of_edges()
        density = nx.density(giant_subgraph) if num_nodes > 1 else 0
        
        # Centrality measures for hierarchy identification
        print("    Computing centrality measures...")
        
        # Degree centrality
        degree_centrality = nx.degree_centrality(giant_subgraph)
        
        # Betweenness centrality (sampling for large networks)
        if num_nodes > 100:
            # Sample nodes for computational efficiency
            sample_size = min(100, num_nodes)
            sample_nodes = np.random.choice(list(giant_component), sample_size, replace=False)
            betweenness = nx.betweenness_centrality(giant_subgraph, k=sample_size)
        else:
            betweenness = nx.betweenness_centrality(giant_subgraph)
        
        # Closeness centrality (sample for large networks)
        if num_nodes > 100:
            closeness = nx.closeness_centrality(giant_subgraph, distance='weight')
        else:
            closeness = nx.closeness_centrality(giant_subgraph)
        
        # Identify core nodes (high centrality across multiple measures)
        # Normalize centrality scores
        deg_scores = np.array(list(degree_centrality.values()))
        bet_scores = np.array(list(betweenness.values()))
        clo_scores = np.array(list(closeness.values()))
        
        # Calculate composite centrality score
        composite_centrality = {}
        for node in giant_component:
            deg_score = degree_centrality.get(node, 0)
            bet_score = betweenness.get(node, 0)
            clo_score = closeness.get(node, 0)
            
            # Weighted combination (adjust weights as needed)
            composite_centrality[node] = 0.4 * deg_score + 0.3 * bet_score + 0.3 * clo_score
        
        # Identify core (top 20%), periphery (bottom 20%), and semi-periphery (middle 60%)
        centrality_values = list(composite_centrality.values())
        core_threshold = np.percentile(centrality_values, 80)
        periphery_threshold = np.percentile(centrality_values, 20)
        
        core_nodes = [node for node, score in composite_centrality.items() 
                     if score >= core_threshold]
        periphery_nodes = [node for node, score in composite_centrality.items() 
                          if score <= periphery_threshold]
        semi_periphery_nodes = [node for node in giant_component 
                               if node not in core_nodes and node not in periphery_nodes]
        
        print(f"    Core nodes: {len(core_nodes)} ({len(core_nodes)/num_nodes*100:.1f}%)")
        print(f"    Semi-periphery: {len(semi_periphery_nodes)} ({len(semi_periphery_nodes)/num_nodes*100:.1f}%)")
        print(f"    Periphery: {len(periphery_nodes)} ({len(periphery_nodes)/num_nodes*100:.1f}%)")
        
        # Calculate hierarchy metrics
        # Core connectivity
        core_subgraph = giant_subgraph.subgraph(core_nodes)
        core_density = nx.density(core_subgraph) if len(core_nodes) > 1 else 0
        
        # Core-periphery connections
        core_periphery_edges = 0
        for u, v in giant_subgraph.edges():
            if (u in core_nodes and v in periphery_nodes) or (u in periphery_nodes and v in core_nodes):
                core_periphery_edges += 1
        
        # Store results for this threshold
        threshold_results = {
            'threshold': threshold,
            'giant_component_size': num_nodes,
            'giant_component_edges': num_edges,
            'network_density': density,
            'num_components': len(components),
            'component_sizes': component_sizes,
            'core_nodes': core_nodes,
            'semi_periphery_nodes': semi_periphery_nodes,
            'periphery_nodes': periphery_nodes,
            'core_density': core_density,
            'core_periphery_edges': core_periphery_edges,
            'centrality_scores': composite_centrality
        }
        
        hierarchy_results[threshold] = threshold_results
    
    print(f"\nHierarchy analysis completed for {len(hierarchy_results)} thresholds")
    
    # Save results
    if save_results:
        import pandas as pd
        
        # Create summary DataFrame
        summary_data = []
        for threshold, results in hierarchy_results.items():
            summary_data.append({
                'threshold': threshold,
                'giant_size': results['giant_component_size'],
                'density': results['network_density'],
                'num_components': results['num_components'],
                'core_nodes': len(results['core_nodes']),
                'core_density': results['core_density'],
                'core_periphery_edges': results['core_periphery_edges']
            })
        
        summary_df = pd.DataFrame(summary_data)
        summary_df.to_csv('network_hierarchy_analysis.csv', index=False)
        print("Hierarchy analysis saved to: network_hierarchy_analysis.csv")
    
    return hierarchy_results

# Execute hierarchy analysis using our critical thresholds
critical_thresholds_for_hierarchy = [
    0.274173, 0.243051, 0.217115, 0.175618, 0.149683
]

print("Executing network hierarchy analysis...")
print("=" * 60)

hierarchy_results = analyze_network_hierarchy(
    G, 
    critical_thresholds_for_hierarchy,
    save_results=True
)

print("=" * 60)
print("Network hierarchy analysis completed!")

In [None]:
# Visualize network hierarchy evolution

def plot_hierarchy_evolution(hierarchy_results, save_path=None):
    """
    Visualize how network hierarchy changes across thresholds
    """
    
    if not hierarchy_results:
        print("No hierarchy results to plot")
        return
    
    # Extract data for plotting
    thresholds = []
    giant_sizes = []
    core_proportions = []
    densities = []
    num_components = []
    core_densities = []
    
    for threshold in sorted(hierarchy_results.keys()):
        results = hierarchy_results[threshold]
        
        thresholds.append(threshold)
        giant_sizes.append(results['giant_component_size'])
        
        total_giant = results['giant_component_size']
        core_size = len(results['core_nodes'])
        core_proportions.append(core_size / total_giant if total_giant > 0 else 0)
        
        densities.append(results['network_density'])
        num_components.append(results['num_components'])
        core_densities.append(results['core_density'])
    
    # Create comprehensive visualization
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Network Hierarchy Evolution Across Thresholds', fontsize=16, fontweight='bold')
    
    # Color scheme
    colors = {
        'giant': '#2E86AB',
        'core': '#A23B72',
        'density': '#F18F01',
        'components': '#4CAF50'
    }
    
    # 1. Giant component size evolution
    ax1.semilogx(thresholds, giant_sizes, 'o-', color=colors['giant'], 
                linewidth=2.5, markersize=8, label='Giant Component Size')
    ax1.set_xlabel('Threshold τ')
    ax1.set_ylabel('Number of Nodes')
    ax1.set_title('Giant Component Size Evolution')
    ax1.grid(True, alpha=0.3)
    ax1.legend()
    
    # 2. Core proportion evolution
    ax2.semilogx(thresholds, [p*100 for p in core_proportions], 's-', 
                color=colors['core'], linewidth=2.5, markersize=8, label='Core Proportion')
    ax2.set_xlabel('Threshold τ')
    ax2.set_ylabel('Core Nodes (%)')
    ax2.set_title('Core Proportion in Giant Component')
    ax2.grid(True, alpha=0.3)
    ax2.legend()
    
    # 3. Network density comparison
    ax3.semilogx(thresholds, densities, '^-', color=colors['density'], 
                linewidth=2.5, markersize=8, label='Network Density')
    ax3.semilogx(thresholds, core_densities, 'v-', color=colors['core'], 
                linewidth=2.5, markersize=8, label='Core Density')
    ax3.set_xlabel('Threshold τ')
    ax3.set_ylabel('Density')
    ax3.set_title('Network vs Core Density')
    ax3.grid(True, alpha=0.3)
    ax3.legend()
    
    # 4. Number of components
    ax4.semilogx(thresholds, num_components, 'D-', color=colors['components'], 
                linewidth=2.5, markersize=8, label='Number of Components')
    ax4.set_xlabel('Threshold τ')
    ax4.set_ylabel('Component Count')
    ax4.set_title('Network Fragmentation')
    ax4.grid(True, alpha=0.3)
    ax4.legend()
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Hierarchy evolution plot saved: {save_path}")
    
    plt.show()
    
    # Print evolution summary
    print("\nHierarchy Evolution Summary:")
    print("=" * 80)
    print(f"{'Threshold':<12} {'Giant Size':<12} {'Core %':<10} {'Density':<10} {'Core Density':<12} {'Components':<10}")
    print("-" * 80)
    
    for i, threshold in enumerate(sorted(thresholds)):
        print(f"{threshold:<12.6f} {giant_sizes[i]:<12d} {core_proportions[i]*100:<10.1f} "
              f"{densities[i]:<10.3f} {core_densities[i]:<12.3f} {num_components[i]:<10d}")
    
    print("-" * 80)

# Generate hierarchy evolution visualization
hierarchy_plot_filename = "network_hierarchy_evolution.png"

plot_hierarchy_evolution(
    hierarchy_results,
    save_path=hierarchy_plot_filename
)

print(f"\nHierarchy evolution visualization completed!")
print(f"Plot saved as: {hierarchy_plot_filename}")

#### Dendrogram


In [None]:
# Comprehensive result export function

def export_comprehensive_results(analysis_results, hierarchy_results, 
                                advanced_results, export_dir="percolation_results"):
    """
    Export all analysis results in multiple formats
    
    Parameters:
    - analysis_results: main percolation analysis results
    - hierarchy_results: network hierarchy analysis results  
    - advanced_results: advanced percolation analysis results
    - export_dir: directory to save results
    """
    
    import os
    import json
    import pickle
    from datetime import datetime
    
    # Create export directory
    if not os.path.exists(export_dir):
        os.makedirs(export_dir)
        print(f"Created export directory: {export_dir}")
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    print("Exporting comprehensive analysis results...")
    
    # 1. Export main percolation data
    print("  Exporting main percolation analysis...")
    
    # Percolation curve data
    percolation_df = pd.DataFrame({
        'threshold': analysis_results['thresholds'],
        'giant_component_size': analysis_results['giant_sizes']
    })
    percolation_df.to_csv(f"{export_dir}/percolation_curve_{timestamp}.csv", index=False)
    
    # Critical points
    critical_points_data = []
    for point in analysis_results['critical_points']:
        critical_points_data.append({
            'threshold': point['threshold'],
            'giant_size': point['giant_size'],
            'type': point['type'],
            'reference_threshold': point.get('reference', ''),
            'error': point.get('error', ''),
            'strength': point.get('strength', ''),
            'target_fraction': point.get('target_fraction', '')
        })
    
    critical_df = pd.DataFrame(critical_points_data)
    critical_df.to_csv(f"{export_dir}/critical_points_{timestamp}.csv", index=False)
    
    # 2. Export hierarchy analysis
    print("  Exporting hierarchy analysis...")
    
    hierarchy_summary = []
    for threshold, results in hierarchy_results.items():
        hierarchy_summary.append({
            'threshold': threshold,
            'giant_component_size': results['giant_component_size'],
            'giant_component_edges': results['giant_component_edges'],
            'network_density': results['network_density'],
            'num_components': results['num_components'],
            'core_nodes_count': len(results['core_nodes']),
            'semi_periphery_count': len(results['semi_periphery_nodes']),
            'periphery_count': len(results['periphery_nodes']),
            'core_density': results['core_density'],
            'core_periphery_edges': results['core_periphery_edges']
        })
    
    hierarchy_df = pd.DataFrame(hierarchy_summary)
    hierarchy_df.to_csv(f"{export_dir}/hierarchy_analysis_{timestamp}.csv", index=False)
    
    # Export detailed centrality scores for each threshold
    for threshold, results in hierarchy_results.items():
        centrality_data = []
        for node, score in results['centrality_scores'].items():
            node_type = 'core' if node in results['core_nodes'] else \
                       'semi_periphery' if node in results['semi_periphery_nodes'] else 'periphery'
            
            centrality_data.append({
                'node': node,
                'centrality_score': score,
                'node_type': node_type
            })
        
        centrality_df = pd.DataFrame(centrality_data)
        safe_threshold = str(threshold).replace('.', '_')
        centrality_df.to_csv(f"{export_dir}/centrality_threshold_{safe_threshold}_{timestamp}.csv", index=False)
    
    # 3. Export advanced analysis results
    print("  Exporting advanced analysis...")
    
    advanced_summary = {
        'critical_threshold': advanced_results['critical_threshold'],
        'critical_size': advanced_results['critical_size'],
        'percolation_threshold': advanced_results['percolation_threshold'],
        'gamma_exponent': advanced_results.get('gamma_exponent'),
        'transition_steepness': advanced_results['transition_steepness'],
        'transition_width': advanced_results['transition_width'],
        'num_transition_points': len(advanced_results['transition_points'])
    }
    
    advanced_df = pd.DataFrame([advanced_summary])
    advanced_df.to_csv(f"{export_dir}/advanced_analysis_summary_{timestamp}.csv", index=False)
    
    # Export transition points
    transition_data = []
    for point in advanced_results['transition_points']:
        transition_data.append({
            'threshold': point['threshold'],
            'giant_size': point['giant_size'],
            'strength': point['strength']
        })
    
    if transition_data:
        transition_df = pd.DataFrame(transition_data)
        transition_df.to_csv(f"{export_dir}/transition_points_{timestamp}.csv", index=False)
    
    # Export derivatives
    if advanced_results.get('first_derivative') is not None:
        deriv_df = pd.DataFrame({
            'threshold': analysis_results['thresholds'][:-1],  # One less due to differentiation
            'first_derivative': advanced_results['first_derivative']
        })
        deriv_df.to_csv(f"{export_dir}/first_derivative_{timestamp}.csv", index=False)
    
    if advanced_results.get('second_derivative') is not None:
        second_deriv_df = pd.DataFrame({
            'threshold': analysis_results['thresholds'][:-2],  # Two less due to second differentiation
            'second_derivative': advanced_results['second_derivative']
        })
        second_deriv_df.to_csv(f"{export_dir}/second_derivative_{timestamp}.csv", index=False)
    
    # 4. Export network information
    print("  Exporting network information...")
    
    network_info = {
        'total_nodes': analysis_results['network_info']['nodes'],
        'total_edges': analysis_results['network_info']['edges'],
        'weight_range_min': analysis_results['network_info']['weight_range'][0],
        'weight_range_max': analysis_results['network_info']['weight_range'][1],
        'analysis_timestamp': timestamp,
        'num_sampling_points': len(analysis_results['thresholds']),
        'num_critical_points': len(analysis_results['critical_points'])
    }
    
    network_df = pd.DataFrame([network_info])
    network_df.to_csv(f"{export_dir}/network_information_{timestamp}.csv", index=False)
    
    # 5. Export raw data for reproducibility
    print("  Exporting raw data...")
    
    # Save as JSON for easy reading
    export_data = {
        'analysis_results': {
            'thresholds': analysis_results['thresholds'].tolist() if hasattr(analysis_results['thresholds'], 'tolist') else list(analysis_results['thresholds']),
            'giant_sizes': analysis_results['giant_sizes'].tolist() if hasattr(analysis_results['giant_sizes'], 'tolist') else list(analysis_results['giant_sizes']),
            'critical_points': analysis_results['critical_points'],
            'network_info': analysis_results['network_info']
        },
        'advanced_results': {
            'critical_threshold': advanced_results['critical_threshold'],
            'critical_size': advanced_results['critical_size'],
            'percolation_threshold': advanced_results['percolation_threshold'],
            'gamma_exponent': advanced_results.get('gamma_exponent'),
            'transition_steepness': advanced_results['transition_steepness'],
            'transition_width': advanced_results['transition_width'],
            'transition_points': advanced_results['transition_points']
        },
        'metadata': {
            'export_timestamp': timestamp,
            'analysis_type': 'network_percolation',
            'software': 'Python NetworkX',
            'description': 'Comprehensive percolation analysis of TTWA commuting network'
        }
    }
    
    with open(f"{export_dir}/complete_analysis_{timestamp}.json", 'w') as f:
        json.dump(export_data, f, indent=2)
    
    # Save as pickle for exact reproduction
    with open(f"{export_dir}/complete_analysis_{timestamp}.pkl", 'wb') as f:
        pickle.dump({
            'analysis_results': analysis_results,
            'hierarchy_results': hierarchy_results,
            'advanced_results': advanced_results
        }, f)
    
    # 6. Create summary report
    print("  Creating summary report...")
    
    report_lines = [
        f"# Percolation Analysis Report",
        f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
        f"",
        f"## Network Information",
        f"- Total Nodes: {network_info['total_nodes']}",
        f"- Total Edges: {network_info['total_edges']}",
        f"- Weight Range: {network_info['weight_range_min']:.6f} - {network_info['weight_range_max']:.6f}",
        f"",
        f"## Main Results",
        f"- Critical Threshold (max decline): {advanced_results['critical_threshold']:.6f}",
        f"- Critical Size: {advanced_results['critical_size']:.3f}",
        f"- Percolation Threshold (50%): {advanced_results['percolation_threshold']:.6f}",
        f"- Number of Critical Points: {len(analysis_results['critical_points'])}",
        f"- Number of Transition Points: {len(advanced_results['transition_points'])}",
        f"",
        f"## Critical Points",
    ]
    
    for i, point in enumerate(analysis_results['critical_points'][:10]):  # Top 10
        report_lines.append(f"{i+1:2d}. τ = {point['threshold']:.6f}, Size = {point['giant_size']:.3f}, Type: {point['type']}")
    
    report_lines.extend([
        f"",
        f"## Files Exported",
        f"- percolation_curve_{timestamp}.csv",
        f"- critical_points_{timestamp}.csv", 
        f"- hierarchy_analysis_{timestamp}.csv",
        f"- advanced_analysis_summary_{timestamp}.csv",
        f"- complete_analysis_{timestamp}.json",
        f"- complete_analysis_{timestamp}.pkl"
    ])
    
    with open(f"{export_dir}/analysis_report_{timestamp}.md", 'w') as f:
        f.write('\n'.join(report_lines))
    
    print(f"\nExport completed!")
    print(f"All files saved to: {export_dir}/")
    print(f"Summary report: analysis_report_{timestamp}.md")
    
    return export_dir

# Execute comprehensive export
print("Executing comprehensive result export...")
print("=" * 60)

export_directory = export_comprehensive_results(
    analysis_results=analysis_results,
    hierarchy_results=hierarchy_results, 
    advanced_results=advanced_results
)

print("=" * 60)
print("Comprehensive export completed!")

In [None]:
# Performance optimization and validation

def validate_percolation_results(analysis_results, tolerance=0.05):
    """
    Validate percolation analysis results for consistency and accuracy
    
    Parameters:
    - analysis_results: main analysis results
    - tolerance: tolerance for validation checks
    
    Returns:
    - validation_report: dictionary containing validation results
    """
    
    print("Validating percolation analysis results...")
    
    validation_report = {
        'passed_checks': [],
        'failed_checks': [],
        'warnings': []
    }
    
    thresholds = analysis_results['thresholds']
    giant_sizes = analysis_results['giant_sizes']
    
    # Check 1: Monotonicity - giant component size should generally decrease with increasing threshold
    print("  Check 1: Monotonicity test...")
    
    # Allow for some noise in the data
    increasing_violations = 0
    for i in range(1, len(giant_sizes)):
        # Since thresholds are in descending order, giant sizes should be non-decreasing
        if giant_sizes[i] > giant_sizes[i-1] + tolerance:
            increasing_violations += 1
    
    violation_rate = increasing_violations / (len(giant_sizes) - 1)
    
    if violation_rate < 0.1:  # Less than 10% violations
        validation_report['passed_checks'].append(f"Monotonicity: {violation_rate:.2%} violations (acceptable)")
    else:
        validation_report['failed_checks'].append(f"Monotonicity: {violation_rate:.2%} violations (too high)")
    
    # Check 2: Boundary conditions
    print("  Check 2: Boundary conditions...")
    
    # At highest threshold, giant component should be small
    highest_threshold_size = giant_sizes[0]  # First element (highest threshold)
    if highest_threshold_size < 0.3:
        validation_report['passed_checks'].append(f"High threshold boundary: {highest_threshold_size:.3f} < 0.3")
    else:
        validation_report['warnings'].append(f"High threshold boundary: {highest_threshold_size:.3f} > 0.3 (may indicate threshold too low)")
    
    # At lowest threshold, giant component should be large
    lowest_threshold_size = giant_sizes[-1]  # Last element (lowest threshold)
    if lowest_threshold_size > 0.7:
        validation_report['passed_checks'].append(f"Low threshold boundary: {lowest_threshold_size:.3f} > 0.7")
    else:
        validation_report['warnings'].append(f"Low threshold boundary: {lowest_threshold_size:.3f} < 0.7 (may indicate incomplete range)")
    
    # Check 3: Critical points validation
    print("  Check 3: Critical points validation...")
    
    critical_points = analysis_results['critical_points']
    valid_critical_points = 0
    
    for point in critical_points:
        threshold = point['threshold']
        giant_size = point['giant_size']
        
        # Check if critical point is within reasonable range
        if 0.01 <= giant_size <= 0.99:
            valid_critical_points += 1
    
    if len(critical_points) > 0:
        valid_ratio = valid_critical_points / len(critical_points)
        if valid_ratio > 0.8:
            validation_report['passed_checks'].append(f"Critical points: {valid_ratio:.2%} in valid range")
        else:
            validation_report['failed_checks'].append(f"Critical points: {valid_ratio:.2%} in valid range (too low)")
    
    # Check 4: Data quality
    print("  Check 4: Data quality...")
    
    # Check for NaN or infinite values
    nan_count = np.sum(np.isnan(giant_sizes))
    inf_count = np.sum(np.isinf(giant_sizes))
    
    if nan_count == 0 and inf_count == 0:
        validation_report['passed_checks'].append("Data quality: No NaN or infinite values")
    else:
        validation_report['failed_checks'].append(f"Data quality: {nan_count} NaN, {inf_count} infinite values")
    
    # Check 5: Threshold range coverage
    print("  Check 5: Threshold range coverage...")
    
    threshold_range = thresholds[-1] / thresholds[0]  # Ratio of min to max
    if threshold_range < 0.01:  # Covers at least 2 orders of magnitude
        validation_report['passed_checks'].append(f"Threshold coverage: {threshold_range:.4f} (good dynamic range)")
    else:
        validation_report['warnings'].append(f"Threshold coverage: {threshold_range:.4f} (limited dynamic range)")
    
    # Calculate overall validation score
    passed = len(validation_report['passed_checks'])
    failed = len(validation_report['failed_checks'])
    total_checks = passed + failed
    
    if total_checks > 0:
        validation_score = passed / total_checks
        validation_report['overall_score'] = validation_score
        
        if validation_score >= 0.8:
            validation_report['overall_status'] = 'PASSED'
        elif validation_score >= 0.6:
            validation_report['overall_status'] = 'PASSED_WITH_WARNINGS'
        else:
            validation_report['overall_status'] = 'FAILED'
    else:
        validation_report['overall_status'] = 'NO_CHECKS'
    
    return validation_report

def optimize_threshold_sampling(G, target_points=500, reference_thresholds=None):
    """
    Optimize threshold sampling for better analysis efficiency
    
    Parameters:
    - G: NetworkX graph
    - target_points: target number of sampling points
    - reference_thresholds: known critical thresholds to emphasize
    
    Returns:
    - optimized_thresholds: optimized threshold array
    """
    
    print(f"Optimizing threshold sampling for {target_points} points...")
    
    # Get edge weight distribution
    weights = [data.get('weight', 1.0) for _, _, data in G.edges(data=True)]
    min_weight = min(weights)
    max_weight = max(weights)
    
    print(f"  Weight range: {min_weight:.6f} - {max_weight:.6f}")
    
    # Strategy: More points where there's more variation
    # Use quantile-based sampling for base distribution
    quantiles = np.linspace(0.01, 0.99, target_points // 2)
    quantile_thresholds = np.quantile(weights, quantiles)
    
    # Add logarithmic sampling for broad coverage
    log_thresholds = np.logspace(np.log10(min_weight), np.log10(max_weight), target_points // 3)
    
    # Add reference threshold neighborhoods if provided
    reference_neighborhoods = []
    if reference_thresholds:
        for ref_thresh in reference_thresholds:
            if min_weight <= ref_thresh <= max_weight:
                # Dense sampling around reference ±10%
                local_min = max(min_weight, ref_thresh * 0.9)
                local_max = min(max_weight, ref_thresh * 1.1)
                local_points = np.linspace(local_min, local_max, 20)
                reference_neighborhoods.extend(local_points)
    
    # Combine all sampling strategies
    all_thresholds = np.concatenate([
        quantile_thresholds,
        log_thresholds,
        reference_neighborhoods
    ])
    
    # Remove duplicates and sort
    unique_thresholds = np.unique(all_thresholds)
    unique_thresholds = unique_thresholds[(unique_thresholds >= min_weight) & 
                                         (unique_thresholds <= max_weight)]
    
    # Sort in descending order (high to low threshold)
    optimized_thresholds = np.sort(unique_thresholds)[::-1]
    
    # Trim to target size if needed
    if len(optimized_thresholds) > target_points:
        # Keep the most representative points
        indices = np.linspace(0, len(optimized_thresholds)-1, target_points, dtype=int)
        optimized_thresholds = optimized_thresholds[indices]
    
    print(f"  Optimized to {len(optimized_thresholds)} sampling points")
    
    return optimized_thresholds

# Execute validation
print("Executing result validation...")
print("=" * 60)

validation_results = validate_percolation_results(analysis_results)

print("\nValidation Results:")
print("=" * 50)

print(f"Overall Status: {validation_results['overall_status']}")
if 'overall_score' in validation_results:
    print(f"Overall Score: {validation_results['overall_score']:.2%}")

print(f"\nPassed Checks ({len(validation_results['passed_checks'])}):")
for check in validation_results['passed_checks']:
    print(f"  ✓ {check}")

if validation_results['failed_checks']:
    print(f"\nFailed Checks ({len(validation_results['failed_checks'])}):")
    for check in validation_results['failed_checks']:
        print(f"  ✗ {check}")

if validation_results['warnings']:
    print(f"\nWarnings ({len(validation_results['warnings'])}):")
    for warning in validation_results['warnings']:
        print(f"  ⚠ {warning}")

print("=" * 60)
print("Validation completed!")

In [None]:
# Performance benchmark and comparison with reference methods

def benchmark_percolation_methods(G, num_thresholds=100):
    """
    Benchmark different percolation analysis methods for performance comparison
    """
    
    import time
    
    print("Running performance benchmark...")
    print(f"Network: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
    print(f"Testing with {num_thresholds} threshold points")
    
    # Get test thresholds
    weights = [data.get('weight', 1.0) for _, _, data in G.edges(data=True)]
    min_weight, max_weight = min(weights), max(weights)
    test_thresholds = np.logspace(np.log10(min_weight), np.log10(max_weight), num_thresholds)[::-1]
    
    benchmark_results = {}
    
    # Method 1: Standard NetworkX approach
    print("\n  Method 1: Standard NetworkX")
    start_time = time.time()
    
    standard_results = []
    for threshold in test_thresholds:
        edges_filtered = [(u, v) for u, v, data in G.edges(data=True) 
                         if data.get('weight', 0) >= threshold]
        subgraph = nx.Graph()
        subgraph.add_edges_from(edges_filtered)
        
        if subgraph.number_of_nodes() > 0:
            components = list(nx.connected_components(subgraph))
            largest_size = max(len(comp) for comp in components) if components else 0
        else:
            largest_size = 0
        
        standard_results.append(largest_size / G.number_of_nodes())
    
    standard_time = time.time() - start_time
    benchmark_results['standard_networkx'] = {
        'time': standard_time,
        'results': standard_results,
        'description': 'Standard NetworkX connected components'
    }
    
    print(f"    Time: {standard_time:.2f} seconds")
    
    # Method 2: Optimized approach with edge filtering
    print("\n  Method 2: Optimized Edge Filtering")
    start_time = time.time()
    
    # Pre-filter and sort edges by weight
    weighted_edges = [(u, v, data.get('weight', 0)) for u, v, data in G.edges(data=True)]
    weighted_edges.sort(key=lambda x: x[2], reverse=True)  # Sort by weight descending
    
    optimized_results = []
    current_edges = []
    edge_index = 0
    
    for threshold in test_thresholds:
        # Add edges that meet current threshold
        while edge_index < len(weighted_edges) and weighted_edges[edge_index][2] >= threshold:
            current_edges.append((weighted_edges[edge_index][0], weighted_edges[edge_index][1]))
            edge_index += 1
        
        # Create subgraph with current edges
        if current_edges:
            subgraph = nx.Graph()
            subgraph.add_edges_from(current_edges)
            components = list(nx.connected_components(subgraph))
            largest_size = max(len(comp) for comp in components) if components else 0
        else:
            largest_size = 0
        
        optimized_results.append(largest_size / G.number_of_nodes())
    
    optimized_time = time.time() - start_time
    benchmark_results['optimized_filtering'] = {
        'time': optimized_time,
        'results': optimized_results,
        'description': 'Optimized edge filtering with sorting'
    }
    
    print(f"    Time: {optimized_time:.2f} seconds")
    print(f"    Speedup: {standard_time/optimized_time:.1f}x")
    
    # Method 3: Union-Find approach (for very large networks)
    print("\n  Method 3: Union-Find Algorithm")
    start_time = time.time()
    
    class UnionFind:
        def __init__(self, n):
            self.parent = list(range(n))
            self.rank = [0] * n
            self.component_size = [1] * n
            self.max_component_size = 1
        
        def find(self, x):
            if self.parent[x] != x:
                self.parent[x] = self.find(self.parent[x])
            return self.parent[x]
        
        def union(self, x, y):
            px, py = self.find(x), self.find(y)
            if px == py:
                return
            
            if self.rank[px] < self.rank[py]:
                px, py = py, px
            
            self.parent[py] = px
            self.component_size[px] += self.component_size[py]
            self.max_component_size = max(self.max_component_size, self.component_size[px])
            
            if self.rank[px] == self.rank[py]:
                self.rank[px] += 1
    
    # Create node mapping
    nodes = list(G.nodes())
    node_to_idx = {node: i for i, node in enumerate(nodes)}
    
    unionfind_results = []
    
    for threshold in test_thresholds:
        uf = UnionFind(len(nodes))
        
        for u, v, data in G.edges(data=True):
            if data.get('weight', 0) >= threshold:
                uf.union(node_to_idx[u], node_to_idx[v])
        
        unionfind_results.append(uf.max_component_size / len(nodes))
    
    unionfind_time = time.time() - start_time
    benchmark_results['union_find'] = {
        'time': unionfind_time,
        'results': unionfind_results,
        'description': 'Union-Find algorithm'
    }
    
    print(f"    Time: {unionfind_time:.2f} seconds")
    print(f"    Speedup vs standard: {standard_time/unionfind_time:.1f}x")
    
    # Accuracy comparison
    print("\n  Accuracy Comparison:")
    
    # Compare results between methods
    standard_arr = np.array(standard_results)
    optimized_arr = np.array(optimized_results)
    unionfind_arr = np.array(unionfind_results)
    
    opt_diff = np.mean(np.abs(standard_arr - optimized_arr))
    uf_diff = np.mean(np.abs(standard_arr - unionfind_arr))
    
    print(f"    Optimized vs Standard: Mean absolute difference = {opt_diff:.6f}")
    print(f"    Union-Find vs Standard: Mean absolute difference = {uf_diff:.6f}")
    
    # Performance summary
    print(f"\n  Performance Summary:")
    print(f"    Standard NetworkX: {standard_time:.2f}s (baseline)")
    print(f"    Optimized Filtering: {optimized_time:.2f}s ({standard_time/optimized_time:.1f}x faster)")
    print(f"    Union-Find: {unionfind_time:.2f}s ({standard_time/unionfind_time:.1f}x faster)")
    
    # Recommendation
    if G.number_of_nodes() > 1000:
        recommended = "Union-Find (large network)"
    elif optimized_time < standard_time * 0.8:
        recommended = "Optimized Filtering (good balance)"
    else:
        recommended = "Standard NetworkX (small network)"
    
    print(f"    Recommended method: {recommended}")
    
    benchmark_results['summary'] = {
        'network_size': G.number_of_nodes(),
        'recommended_method': recommended,
        'best_time': min(standard_time, optimized_time, unionfind_time),
        'standard_time': standard_time
    }
    
    return benchmark_results

# Run performance benchmark
print("Executing performance benchmark...")
print("=" * 60)

# Test with a subset if network is very large
if G.number_of_nodes() > 1000:
    print("Network is large, testing with sample...")
    sample_nodes = list(G.nodes())[:1000]  # Sample first 1000 nodes
    test_graph = G.subgraph(sample_nodes).copy()
    benchmark_results = benchmark_percolation_methods(test_graph, num_thresholds=50)
else:
    benchmark_results = benchmark_percolation_methods(G, num_thresholds=100)

print("=" * 60)
print("Performance benchmark completed!")

In [None]:
# Summary and conclusion

def generate_final_summary(analysis_results, hierarchy_results, advanced_results, 
                          validation_results, benchmark_results=None):
    """
    Generate comprehensive summary of all percolation analysis results
    """
    
    print("COMPREHENSIVE PERCOLATION ANALYSIS SUMMARY")
    print("=" * 80)
    
    # Network overview
    network_info = analysis_results['network_info']
    print(f"\n1. NETWORK OVERVIEW")
    print(f"   Network Type: TTWA Commuting Network")
    print(f"   Total Nodes: {network_info['nodes']:,}")
    print(f"   Total Edges: {network_info['edges']:,}")
    print(f"   Weight Range: {network_info['weight_range'][0]:.6f} - {network_info['weight_range'][1]:.6f}")
    print(f"   Analysis Points: {len(analysis_results['thresholds']):,}")
    
    # Main percolation results
    print(f"\n2. MAIN PERCOLATION RESULTS")
    print(f"   Critical Threshold (max decline): {advanced_results['critical_threshold']:.6f}")
    print(f"   Giant Component Size at Critical Point: {advanced_results['critical_size']:.3f}")
    print(f"   Percolation Threshold (50% point): {advanced_results['percolation_threshold']:.6f}")
    
    if advanced_results.get('gamma_exponent'):
        print(f"   Critical Exponent γ: {advanced_results['gamma_exponent']:.3f}")
    else:
        print(f"   Critical Exponent γ: Not calculable")
    
    print(f"   Transition Steepness: {advanced_results['transition_steepness']:.4f}")
    print(f"   Transition Width: {advanced_results['transition_width']} points")
    
    # Critical points summary
    print(f"\n3. CRITICAL POINTS IDENTIFIED")
    critical_points = analysis_results['critical_points']
    print(f"   Total Critical Points: {len(critical_points)}")
    
    # Group by type
    point_types = {}
    for point in critical_points:
        point_type = point['type']
        if point_type not in point_types:
            point_types[point_type] = []
        point_types[point_type].append(point)
    
    for point_type, points in point_types.items():
        print(f"   {point_type}: {len(points)} points")
    
    # Top 5 critical points
    print(f"\n   Top 5 Critical Points:")
    for i, point in enumerate(critical_points[:5]):
        if point['type'] == 'reference_match':
            error = point.get('error', 0) * 100
            print(f"     {i+1}. τ = {point['threshold']:.6f} (Size: {point['giant_size']:.3f}, "
                  f"Ref: {point['reference']:.6f}, Error: {error:.1f}%)")
        else:
            print(f"     {i+1}. τ = {point['threshold']:.6f} (Size: {point['giant_size']:.3f}, "
                  f"Type: {point['type']})")
    
    # Hierarchy analysis summary
    if hierarchy_results:
        print(f"\n4. NETWORK HIERARCHY ANALYSIS")
        print(f"   Analyzed Thresholds: {len(hierarchy_results)}")
        
        # Find representative threshold
        mid_threshold = sorted(hierarchy_results.keys())[len(hierarchy_results)//2]
        mid_results = hierarchy_results[mid_threshold]
        
        print(f"   Representative Analysis (τ = {mid_threshold:.6f}):")
        print(f"     Giant Component: {mid_results['giant_component_size']} nodes")
        print(f"     Core Nodes: {len(mid_results['core_nodes'])} "
              f"({len(mid_results['core_nodes'])/mid_results['giant_component_size']*100:.1f}%)")
        print(f"     Semi-periphery: {len(mid_results['semi_periphery_nodes'])} "
              f"({len(mid_results['semi_periphery_nodes'])/mid_results['giant_component_size']*100:.1f}%)")
        print(f"     Periphery: {len(mid_results['periphery_nodes'])} "
              f"({len(mid_results['periphery_nodes'])/mid_results['giant_component_size']*100:.1f}%)")
        print(f"     Network Density: {mid_results['network_density']:.4f}")
        print(f"     Core Density: {mid_results['core_density']:.4f}")
    
    # Validation results
    print(f"\n5. VALIDATION RESULTS")
    print(f"   Overall Status: {validation_results['overall_status']}")
    if 'overall_score' in validation_results:
        print(f"   Overall Score: {validation_results['overall_score']:.1%}")
    print(f"   Passed Checks: {len(validation_results['passed_checks'])}")
    print(f"   Failed Checks: {len(validation_results['failed_checks'])}")
    print(f"   Warnings: {len(validation_results['warnings'])}")
    
    # Performance summary
    if benchmark_results:
        print(f"\n6. PERFORMANCE ANALYSIS")
        summary = benchmark_results.get('summary', {})
        print(f"   Network Size: {summary.get('network_size', 'N/A')} nodes")
        print(f"   Recommended Method: {summary.get('recommended_method', 'N/A')}")
        print(f"   Best Performance: {summary.get('best_time', 0):.2f} seconds")
    
    # Key insights
    print(f"\n7. KEY INSIGHTS")
    
    # Percolation behavior
    size_at_critical = advanced_results['critical_size']
    if size_at_critical > 0.7:
        behavior = "Sharp transition - network shows clear percolation threshold"
    elif size_at_critical > 0.3:
        behavior = "Gradual transition - network shows smooth percolation behavior"  
    else:
        behavior = "Fragmented transition - network fragments gradually"
    
    print(f"   Percolation Behavior: {behavior}")
    
    # Network robustness
    critical_threshold = advanced_results['critical_threshold']
    max_weight = network_info['weight_range'][1]
    robustness_ratio = critical_threshold / max_weight
    
    if robustness_ratio > 0.5:
        robustness = "High - network maintains connectivity under strong filtering"
    elif robustness_ratio > 0.2:
        robustness = "Medium - network shows moderate resilience"
    else:
        robustness = "Low - network is sensitive to filtering"
        
    print(f"   Network Robustness: {robustness}")
    
    # Hierarchical structure
    if hierarchy_results:
        avg_core_prop = np.mean([len(r['core_nodes'])/r['giant_component_size'] 
                                for r in hierarchy_results.values() 
                                if r['giant_component_size'] > 0])
        
        if avg_core_prop > 0.3:
            hierarchy = "Strong hierarchical structure with dominant core"
        elif avg_core_prop > 0.15:
            hierarchy = "Moderate hierarchical structure"
        else:
            hierarchy = "Weak hierarchical structure - more distributed"
            
        print(f"   Hierarchical Structure: {hierarchy}")
    
    # Statistical significance
    num_critical = len(critical_points)
    if num_critical > 10:
        significance = "High - multiple critical points identified"
    elif num_critical > 5:
        significance = "Medium - several critical points identified"
    else:
        significance = "Low - few critical points identified"
        
    print(f"   Statistical Significance: {significance}")
    
    print(f"\n8. RECOMMENDATIONS")
    print(f"   • Use critical threshold τ = {advanced_results['critical_threshold']:.6f} for network analysis")
    print(f"   • Consider percolation threshold τ = {advanced_results['percolation_threshold']:.6f} for 50% connectivity")
    
    if validation_results['overall_status'] == 'FAILED':
        print(f"   • ⚠ Review validation failures before using results")
    elif validation_results['warnings']:
        print(f"   • ⚠ Consider validation warnings in interpretation")
    
    if hierarchy_results:
        print(f"   • Focus on core nodes for network backbone analysis")
        print(f"   • Consider core-periphery structure in network modeling")
    
    print(f"   • All results and visualizations exported for further analysis")
    
    print(f"\n" + "=" * 80)
    print(f"ANALYSIS COMPLETED SUCCESSFULLY")
    print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"=" * 80)

# Generate comprehensive final summary
print("Generating final comprehensive summary...")
print("\n")

generate_final_summary(
    analysis_results=analysis_results,
    hierarchy_results=hierarchy_results,
    advanced_results=advanced_results,
    validation_results=validation_results,
    benchmark_results=benchmark_results if 'benchmark_results' in locals() else None
)

In [None]:
# Quick verification - run a simple test
print("Quick verification test:")
print(f"Main analysis results available: {'analysis_results' in locals()}")
print(f"Advanced analysis completed: {'advanced_results' in locals()}")
print(f"Hierarchy analysis completed: {'hierarchy_results' in locals()}")
print(f"Validation completed: {'validation_results' in locals()}")

if 'analysis_results' in locals():
    print(f"Number of sampling points: {len(analysis_results['thresholds'])}")
    print(f"Number of critical points: {len(analysis_results['critical_points'])}")
    print(f"Network size: {analysis_results['network_info']['nodes']} nodes")

print("\nAll analysis components completed successfully!")

In [None]:
# Additional analysis: Community detection at critical thresholds

def analyze_communities_at_critical_thresholds(G, critical_thresholds, max_analyze=3):
    """
    Analyze community structure at critical percolation thresholds
    
    Parameters:
    - G: NetworkX graph
    - critical_thresholds: list of critical threshold values
    - max_analyze: maximum number of thresholds to analyze
    
    Returns:
    - community_results: dictionary containing community analysis results
    """
    
    print("Analyzing community structure at critical thresholds...")
    
    # Select thresholds to analyze
    selected_thresholds = critical_thresholds[:max_analyze]
    print(f"Analyzing {len(selected_thresholds)} thresholds: {[f'{t:.6f}' for t in selected_thresholds]}")
    
    community_results = {}
    
    for i, threshold in enumerate(selected_thresholds):
        print(f"\nAnalyzing threshold {i+1}/{len(selected_thresholds)}: τ = {threshold:.6f}")
        
        # Create filtered network
        filtered_edges = [(u, v, data) for u, v, data in G.edges(data=True) 
                         if data.get('weight', 0) >= threshold]
        
        if len(filtered_edges) == 0:
            print("  No edges above threshold")
            continue
        
        # Create subgraph
        subgraph = nx.Graph()
        subgraph.add_edges_from([(u, v) for u, v, _ in filtered_edges])
        
        # Focus on largest connected component
        components = list(nx.connected_components(subgraph))
        if not components:
            print("  No connected components")
            continue
            
        largest_component = max(components, key=len)
        giant_subgraph = subgraph.subgraph(largest_component)
        
        print(f"  Giant component: {len(largest_component)} nodes, {giant_subgraph.number_of_edges()} edges")
        
        # Community detection using multiple algorithms
        try:
            # Louvain algorithm (fast and effective)
            import community as community_louvain
            louvain_communities = community_louvain.best_partition(giant_subgraph)
            louvain_modularity = community_louvain.modularity(louvain_communities, giant_subgraph)
            
            print(f"  Louvain communities: {len(set(louvain_communities.values()))}")
            print(f"  Louvain modularity: {louvain_modularity:.3f}")
            
        except ImportError:
            print("  Louvain algorithm not available (install python-louvain)")
            louvain_communities = None
            louvain_modularity = None
        
        # Greedy modularity communities (built into NetworkX)
        try:
            greedy_communities = list(nx.community.greedy_modularity_communities(giant_subgraph))
            greedy_modularity = nx.community.modularity(giant_subgraph, greedy_communities)
            
            print(f"  Greedy communities: {len(greedy_communities)}")
            print(f"  Greedy modularity: {greedy_modularity:.3f}")
            
        except Exception as e:
            print(f"  Greedy algorithm failed: {e}")
            greedy_communities = None
            greedy_modularity = None
        
        # Label propagation algorithm
        try:
            label_prop_communities = list(nx.community.label_propagation_communities(giant_subgraph))
            label_prop_modularity = nx.community.modularity(giant_subgraph, label_prop_communities)
            
            print(f"  Label propagation communities: {len(label_prop_communities)}")
            print(f"  Label propagation modularity: {label_prop_modularity:.3f}")
            
        except Exception as e:
            print(f"  Label propagation failed: {e}")
            label_prop_communities = None
            label_prop_modularity = None
        
        # Community size analysis
        community_sizes = []
        if louvain_communities:
            # Count community sizes for Louvain
            community_counts = {}
            for node, community in louvain_communities.items():
                community_counts[community] = community_counts.get(community, 0) + 1
            community_sizes = list(community_counts.values())
        elif greedy_communities:
            community_sizes = [len(comm) for comm in greedy_communities]
        
        if community_sizes:
            print(f"  Community size statistics:")
            print(f"    Largest community: {max(community_sizes)} nodes")
            print(f"    Average community size: {np.mean(community_sizes):.1f}")
            print(f"    Community size std: {np.std(community_sizes):.1f}")
        
        # Store results
        threshold_results = {
            'threshold': threshold,
            'giant_component_size': len(largest_component),
            'giant_component_edges': giant_subgraph.number_of_edges(),
            'louvain_communities': louvain_communities,
            'louvain_modularity': louvain_modularity,
            'greedy_communities': greedy_communities,
            'greedy_modularity': greedy_modularity,
            'label_prop_communities': label_prop_communities,
            'label_prop_modularity': label_prop_modularity,
            'community_sizes': community_sizes
        }
        
        community_results[threshold] = threshold_results
    
    print(f"\nCommunity analysis completed for {len(community_results)} thresholds")
    return community_results

# Visualize community structure
def plot_community_analysis(community_results, save_path=None):
    """
    Visualize community analysis results
    """
    
    if not community_results:
        print("No community results to plot")
        return
    
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Community Analysis at Critical Thresholds', fontsize=16, fontweight='bold')
    
    # Extract data
    thresholds = []
    louvain_mods = []
    greedy_mods = []
    label_mods = []
    num_communities = []
    
    for threshold, results in community_results.items():
        thresholds.append(threshold)
        louvain_mods.append(results['louvain_modularity'] if results['louvain_modularity'] else 0)
        greedy_mods.append(results['greedy_modularity'] if results['greedy_modularity'] else 0)
        label_mods.append(results['label_prop_modularity'] if results['label_prop_modularity'] else 0)
        
        # Count communities (use Louvain if available, otherwise greedy)
        if results['louvain_communities']:
            num_comm = len(set(results['louvain_communities'].values()))
        elif results['greedy_communities']:
            num_comm = len(results['greedy_communities'])
        else:
            num_comm = 0
        num_communities.append(num_comm)
    
    # Plot 1: Modularity comparison
    ax1.semilogx(thresholds, louvain_mods, 'o-', label='Louvain', linewidth=2, markersize=8)
    ax1.semilogx(thresholds, greedy_mods, 's-', label='Greedy', linewidth=2, markersize=8)
    ax1.semilogx(thresholds, label_mods, '^-', label='Label Propagation', linewidth=2, markersize=8)
    ax1.set_xlabel('Threshold τ')
    ax1.set_ylabel('Modularity')
    ax1.set_title('Community Detection Modularity')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Plot 2: Number of communities
    ax2.semilogx(thresholds, num_communities, 'D-', color='purple', linewidth=2, markersize=8)
    ax2.set_xlabel('Threshold τ')
    ax2.set_ylabel('Number of Communities')
    ax2.set_title('Community Count vs Threshold')
    ax2.grid(True, alpha=0.3)
    
    # Plot 3: Community size distribution (for first threshold)
    if community_results:
        first_threshold = list(community_results.keys())[0]
        first_result = community_results[first_threshold]
        
        if first_result['community_sizes']:
            ax3.hist(first_result['community_sizes'], bins=20, alpha=0.7, edgecolor='black')
            ax3.set_xlabel('Community Size')
            ax3.set_ylabel('Frequency')
            ax3.set_title(f'Community Size Distribution\n(τ = {first_threshold:.6f})')
            ax3.grid(True, alpha=0.3)
        else:
            ax3.text(0.5, 0.5, 'No community\nsize data', transform=ax3.transAxes, 
                    ha='center', va='center', fontsize=12)
    
    # Plot 4: Modularity vs Number of Communities
    valid_indices = [i for i, m in enumerate(louvain_mods) if m > 0]
    if valid_indices:
        valid_mods = [louvain_mods[i] for i in valid_indices]
        valid_nums = [num_communities[i] for i in valid_indices]
        
        ax4.scatter(valid_nums, valid_mods, s=100, alpha=0.7)
        ax4.set_xlabel('Number of Communities')
        ax4.set_ylabel('Modularity')
        ax4.set_title('Modularity vs Community Count')
        ax4.grid(True, alpha=0.3)
        
        # Add threshold labels
        for i, idx in enumerate(valid_indices):
            ax4.annotate(f'{thresholds[idx]:.3f}', 
                        (valid_nums[i], valid_mods[i]),
                        xytext=(5, 5), textcoords='offset points',
                        fontsize=8, alpha=0.7)
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Community analysis plot saved: {save_path}")
    
    plt.show()

# Execute community analysis
print("Executing community structure analysis...")
print("=" * 60)

# Use top 3 critical thresholds
if 'final_analysis_critical_points' in locals():
    critical_thresholds_for_community = [point['threshold'] for point in final_analysis_critical_points[:3]]
else:
    # Fallback to predefined thresholds
    critical_thresholds_for_community = [0.274173, 0.217115, 0.149683]

community_analysis_results = analyze_communities_at_critical_thresholds(
    G, 
    critical_thresholds_for_community,
    max_analyze=3
)

# Visualize community analysis
community_plot_filename = "community_analysis_results.png"
plot_community_analysis(community_analysis_results, save_path=community_plot_filename)

print("=" * 60)
print("Community structure analysis completed!")

In [None]:
# Network resilience analysis - understanding network robustness

def analyze_network_resilience(G, attack_strategies=['random', 'degree', 'betweenness'], 
                              attack_fractions=np.linspace(0, 0.5, 11)):
    """
    Analyze network resilience under different attack strategies
    
    Parameters:
    - G: NetworkX graph
    - attack_strategies: list of attack strategies to test
    - attack_fractions: fractions of nodes to remove
    
    Returns:
    - resilience_results: dictionary containing resilience analysis results
    """
    
    print("Analyzing network resilience...")
    print(f"Testing {len(attack_strategies)} attack strategies")
    print(f"Attack fractions: {attack_fractions}")
    
    # Convert to undirected for connected components analysis
    if G.is_directed():
        G_undirected = G.to_undirected()
    else:
        G_undirected = G.copy()
    
    original_size = G_undirected.number_of_nodes()
    original_edges = G_undirected.number_of_edges()
    
    # Calculate initial largest component
    initial_components = list(nx.connected_components(G_undirected))
    initial_largest = max(len(comp) for comp in initial_components) if initial_components else 0
    
    print(f"Original network: {original_size} nodes, {original_edges} edges")
    print(f"Initial largest component: {initial_largest} nodes ({initial_largest/original_size:.1%})")
    
    resilience_results = {}
    
    for strategy in attack_strategies:
        print(f"\n  Testing {strategy} attack strategy...")
        
        strategy_results = {
            'fractions_removed': [],
            'largest_component_sizes': [],
            'num_components': [],
            'remaining_edges': []
        }
        
        # Create working copy
        G_work = G_undirected.copy()
        nodes = list(G_work.nodes())
        
        # Pre-calculate node rankings for targeted attacks
        if strategy == 'degree':
            node_scores = dict(G_work.degree())
        elif strategy == 'betweenness':
            # Sample for large networks
            if len(nodes) > 500:
                sample_nodes = np.random.choice(nodes, 500, replace=False)
                node_scores = nx.betweenness_centrality(G_work.subgraph(sample_nodes))
                # Set score 0 for non-sampled nodes
                for node in nodes:
                    if node not in node_scores:
                        node_scores[node] = 0
            else:
                node_scores = nx.betweenness_centrality(G_work)
        else:  # random
            node_scores = {node: np.random.random() for node in nodes}
        
        # Sort nodes by strategy
        if strategy == 'random':
            sorted_nodes = nodes.copy()
            np.random.shuffle(sorted_nodes)
        else:
            sorted_nodes = sorted(nodes, key=lambda x: node_scores[x], reverse=True)
        
        for fraction in attack_fractions:
            # Calculate number of nodes to remove
            num_to_remove = int(fraction * original_size)
            
            # Remove nodes
            if num_to_remove > 0:
                nodes_to_remove = sorted_nodes[:num_to_remove]
                G_work.remove_nodes_from(nodes_to_remove)
            
            # Analyze remaining network
            if G_work.number_of_nodes() > 0:
                components = list(nx.connected_components(G_work))
                largest_component_size = max(len(comp) for comp in components) if components else 0
                num_components = len(components)
                remaining_edges = G_work.number_of_edges()
            else:
                largest_component_size = 0
                num_components = 0
                remaining_edges = 0
            
            # Store results
            strategy_results['fractions_removed'].append(fraction)
            strategy_results['largest_component_sizes'].append(largest_component_size)
            strategy_results['num_components'].append(num_components)
            strategy_results['remaining_edges'].append(remaining_edges)
            
            # Restore for next iteration
            G_work = G_undirected.copy()
        
        resilience_results[strategy] = strategy_results
        
        # Print strategy summary
        final_size = strategy_results['largest_component_sizes'][-1]
        print(f"    After {attack_fractions[-1]:.0%} attack: {final_size} nodes remaining "
              f"({final_size/original_size:.1%} of original)")
    
    print(f"\nResilience analysis completed")
    return resilience_results

def plot_resilience_analysis(resilience_results, save_path=None):
    """
    Visualize network resilience analysis results
    """
    
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Network Resilience Analysis', fontsize=16, fontweight='bold')
    
    colors = {'random': '#3498DB', 'degree': '#E74C3C', 'betweenness': '#2ECC71'}
    
    # Plot 1: Largest component size vs attack fraction
    for strategy, results in resilience_results.items():
        fractions = results['fractions_removed']
        sizes = results['largest_component_sizes']
        
        ax1.plot(fractions, sizes, 'o-', 
                color=colors.get(strategy, 'gray'), 
                linewidth=2.5, markersize=6, 
                label=f'{strategy.capitalize()} Attack')
    
    ax1.set_xlabel('Fraction of Nodes Removed')
    ax1.set_ylabel('Largest Component Size')
    ax1.set_title('Network Fragmentation Under Attack')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Plot 2: Relative largest component size
    for strategy, results in resilience_results.items():
        fractions = results['fractions_removed']
        sizes = results['largest_component_sizes']
        original_size = sizes[0] if sizes else 1  # Avoid division by zero
        relative_sizes = [s/original_size for s in sizes]
        
        ax2.plot(fractions, relative_sizes, 'o-', 
                color=colors.get(strategy, 'gray'), 
                linewidth=2.5, markersize=6, 
                label=f'{strategy.capitalize()} Attack')
    
    ax2.set_xlabel('Fraction of Nodes Removed')
    ax2.set_ylabel('Relative Largest Component Size')
    ax2.set_title('Normalized Network Fragmentation')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # Plot 3: Number of components
    for strategy, results in resilience_results.items():
        fractions = results['fractions_removed']
        num_components = results['num_components']
        
        ax3.plot(fractions, num_components, 's-', 
                color=colors.get(strategy, 'gray'), 
                linewidth=2.5, markersize=6, 
                label=f'{strategy.capitalize()} Attack')
    
    ax3.set_xlabel('Fraction of Nodes Removed')
    ax3.set_ylabel('Number of Components')
    ax3.set_title('Network Fragmentation Pattern')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # Plot 4: Edge retention
    for strategy, results in resilience_results.items():
        fractions = results['fractions_removed']
        remaining_edges = results['remaining_edges']
        original_edges = remaining_edges[0] if remaining_edges else 1
        relative_edges = [e/original_edges for e in remaining_edges]
        
        ax4.plot(fractions, relative_edges, '^-', 
                color=colors.get(strategy, 'gray'), 
                linewidth=2.5, markersize=6, 
                label=f'{strategy.capitalize()} Attack')
    
    ax4.set_xlabel('Fraction of Nodes Removed')
    ax4.set_ylabel('Relative Number of Edges')
    ax4.set_title('Edge Retention Under Attack')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Resilience analysis plot saved: {save_path}")
    
    plt.show()
    
    # Print resilience summary
    print("\nResilience Summary:")
    print("=" * 50)
    
    for strategy, results in resilience_results.items():
        sizes = results['largest_component_sizes']
        original_size = sizes[0]
        
        # Find critical point (where largest component drops below 50% of original)
        critical_fraction = None
        for i, size in enumerate(sizes):
            if size < original_size * 0.5:
                critical_fraction = results['fractions_removed'][i]
                break
        
        if critical_fraction is not None:
            print(f"{strategy.capitalize():>12} attack: Critical point at {critical_fraction:.1%} removal")
        else:
            print(f"{strategy.capitalize():>12} attack: No critical point within test range")
    
    print("=" * 50)

# Execute resilience analysis
print("Executing network resilience analysis...")
print("=" * 60)

# Test resilience with subset if network is large
if G.number_of_nodes() > 1000:
    print("Network is large, testing with sample...")
    sample_nodes = list(G.nodes())[:1000]
    test_graph = G.subgraph(sample_nodes).copy()
    
    resilience_results = analyze_network_resilience(
        test_graph,
        attack_strategies=['random', 'degree'],  # Reduce strategies for large networks
        attack_fractions=np.linspace(0, 0.4, 9)  # Reduce test points
    )
else:
    resilience_results = analyze_network_resilience(
        G,
        attack_strategies=['random', 'degree', 'betweenness'],
        attack_fractions=np.linspace(0, 0.5, 11)
    )

# Visualize resilience analysis
resilience_plot_filename = "network_resilience_analysis.png"
plot_resilience_analysis(resilience_results, save_path=resilience_plot_filename)

print("=" * 60)
print("Network resilience analysis completed!")

In [None]:
# Clean up and final status check

print("FINAL ANALYSIS STATUS CHECK")
print("=" * 60)

# Check what analysis components were completed
completed_analyses = []
available_results = []

if 'G' in locals():
    print(f"✓ Network loaded: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
    completed_analyses.append("Network Loading")

if 'analysis_results' in locals():
    print(f"✓ Main percolation analysis completed")
    print(f"  - Sampling points: {len(analysis_results['thresholds'])}")
    print(f"  - Critical points: {len(analysis_results['critical_points'])}")
    completed_analyses.append("Main Percolation Analysis")
    available_results.append("analysis_results")

if 'advanced_results' in locals():
    print(f"✓ Advanced percolation analysis completed")
    print(f"  - Critical threshold: {advanced_results['critical_threshold']:.6f}")
    print(f"  - Transition points: {len(advanced_results['transition_points'])}")
    completed_analyses.append("Advanced Analysis")
    available_results.append("advanced_results")

if 'hierarchy_results' in locals():
    print(f"✓ Network hierarchy analysis completed")
    print(f"  - Analyzed thresholds: {len(hierarchy_results)}")
    completed_analyses.append("Hierarchy Analysis")
    available_results.append("hierarchy_results")

if 'validation_results' in locals():
    print(f"✓ Validation analysis completed")
    print(f"  - Status: {validation_results['overall_status']}")
    completed_analyses.append("Validation")
    available_results.append("validation_results")

if 'community_analysis_results' in locals():
    print(f"✓ Community analysis completed")
    print(f"  - Analyzed thresholds: {len(community_analysis_results)}")
    completed_analyses.append("Community Analysis")
    available_results.append("community_analysis_results")

if 'resilience_results' in locals():
    print(f"✓ Resilience analysis completed")
    print(f"  - Attack strategies: {len(resilience_results)}")
    completed_analyses.append("Resilience Analysis")
    available_results.append("resilience_results")

if 'benchmark_results' in locals():
    print(f"✓ Performance benchmark completed")
    completed_analyses.append("Performance Benchmark")
    available_results.append("benchmark_results")

print(f"\nSUMMARY:")
print(f"Total completed analyses: {len(completed_analyses)}")
print(f"Available result sets: {len(available_results)}")

if len(completed_analyses) >= 4:  # Main + Advanced + Hierarchy + Validation
    print(f"✓ ANALYSIS COMPLETE - All core components finished")
else:
    print(f"⚠ ANALYSIS PARTIAL - Some components may be missing")

print(f"\nCompleted analysis components:")
for i, analysis in enumerate(completed_analyses, 1):
    print(f"  {i}. {analysis}")

# Memory cleanup recommendations
print(f"\nMEMORY USAGE NOTES:")
print(f"- Large analysis variables are retained for further use")
print(f"- Run 'del variable_name' to free memory if needed")
print(f"- All results have been exported to files")

print("=" * 60)
print("PERCOLATION ANALYSIS PIPELINE COMPLETED")
print("=" * 60)

In [None]:
# Display final summary of all variables and results available for use

print("AVAILABLE ANALYSIS VARIABLES")
print("=" * 60)

# List all major variables created during analysis
variables_info = [
    ("G", "Main network graph", 'G' in locals()),
    ("analysis_results", "Main percolation analysis results", 'analysis_results' in locals()),
    ("advanced_results", "Advanced analysis with critical exponents", 'advanced_results' in locals()),
    ("hierarchy_results", "Network hierarchy analysis", 'hierarchy_results' in locals()),
    ("validation_results", "Analysis validation results", 'validation_results' in locals()),
    ("community_analysis_results", "Community structure analysis", 'community_analysis_results' in locals()),
    ("resilience_results", "Network resilience analysis", 'resilience_results' in locals()),
    ("benchmark_results", "Performance benchmark results", 'benchmark_results' in locals()),
    ("final_analysis_thresholds", "Final threshold array", 'final_analysis_thresholds' in locals()),
    ("final_analysis_giant_sizes", "Final giant component sizes", 'final_analysis_giant_sizes' in locals()),
    ("final_analysis_critical_points", "Final critical points", 'final_analysis_critical_points' in locals())
]

print(f"{'Variable':<30} {'Description':<40} {'Available':<10}")
print("-" * 80)

available_count = 0
for var_name, description, available in variables_info:
    status = "✓ Yes" if available else "✗ No"
    print(f"{var_name:<30} {description:<40} {status:<10}")
    if available:
        available_count += 1

print("-" * 80)
print(f"Total available: {available_count}/{len(variables_info)}")

# Show key results if available
if 'analysis_results' in locals():
    print(f"\nKEY RESULTS SUMMARY:")
    print(f"Network size: {analysis_results['network_info']['nodes']} nodes")
    print(f"Critical points identified: {len(analysis_results['critical_points'])}")

if 'advanced_results' in locals():
    print(f"Critical threshold: {advanced_results['critical_threshold']:.6f}")
    print(f"Percolation threshold: {advanced_results['percolation_threshold']:.6f}")

print(f"\nFor detailed results, see exported files and comprehensive documentation.")
print("=" * 60)