# üó∫Ô∏è Multi-View Graph Analysis

This notebook analyzes the three graph views constructed for TRAF-GNN:
- **Physical Topology**: Road network connections
- **Spatial Proximity**: k-NN based on geographic distance
- **Traffic Correlation**: Similarity in traffic patterns

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from pathlib import Path
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline

print("‚úÖ Imports successful!")

## 1. Load Graph Adjacency Matrices

In [None]:
# Load all three graphs
graphs_dir = Path('../graphs')

A_physical = np.load(graphs_dir / 'metr-la_A_physical.npy')
A_proximity = np.load(graphs_dir / 'metr-la_A_proximity.npy')
A_correlation = np.load(graphs_dir / 'metr-la_A_correlation.npy')

print("üìä Graph Shapes:")
print(f"  Physical: {A_physical.shape}")
print(f"  Proximity: {A_proximity.shape}")
print(f"  Correlation: {A_correlation.shape}")

## 2. Basic Graph Statistics

In [None]:
def graph_stats(adj_matrix, name):
    """Calculate and print graph statistics"""
    n = adj_matrix.shape[0]
    edges = np.sum(adj_matrix > 0) - n  # Exclude self-loops
    density = edges / (n * (n - 1))
    degrees = np.sum(adj_matrix > 0, axis=1) - 1  # Exclude self-loops
    
    print(f"\n{'='*50}")
    print(f"{name} Graph Statistics")
    print(f"{'='*50}")
    print(f"  Nodes: {n}")
    print(f"  Edges: {edges}")
    print(f"  Density: {density:.4f}")
    print(f"  Avg Degree: {np.mean(degrees):.2f}")
    print(f"  Max Degree: {np.max(degrees):.0f}")
    print(f"  Min Degree: {np.min(degrees):.0f}")
    print(f"  Std Degree: {np.std(degrees):.2f}")
    
    return degrees

# Calculate statistics for all graphs
deg_physical = graph_stats(A_physical, "Physical Topology")
deg_proximity = graph_stats(A_proximity, "Spatial Proximity")
deg_correlation = graph_stats(A_correlation, "Traffic Correlation")

## 3. Visualize Adjacency Matrices

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

graphs = [
    (A_physical, 'Physical Topology', 'Reds'),
    (A_proximity, 'Spatial Proximity', 'Blues'),
    (A_correlation, 'Traffic Correlation', 'Greens')
]

for ax, (adj, title, cmap) in zip(axes, graphs):
    im = ax.imshow(adj, cmap=cmap, aspect='auto', interpolation='nearest')
    ax.set_title(title, fontsize=14, fontweight='bold')
    ax.set_xlabel('Node ID')
    ax.set_ylabel('Node ID')
    plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04)

plt.tight_layout()
plt.show()

## 4. Degree Distribution Analysis

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 4))

degrees_list = [
    (deg_physical, 'Physical Topology', 'red'),
    (deg_proximity, 'Spatial Proximity', 'blue'),
    (deg_correlation, 'Traffic Correlation', 'green')
]

for ax, (degrees, title, color) in zip(axes, degrees_list):
    ax.hist(degrees, bins=30, color=color, alpha=0.7, edgecolor='black')
    ax.axvline(np.mean(degrees), color='black', linestyle='--', 
               linewidth=2, label=f'Mean: {np.mean(degrees):.1f}')
    ax.set_xlabel('Degree')
    ax.set_ylabel('Frequency')
    ax.set_title(title)
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Graph Overlap Analysis

In [None]:
# Calculate edge overlaps
def get_edges(adj):
    """Get set of edges (excluding diagonal)"""
    edges = set()
    for i in range(adj.shape[0]):
        for j in range(adj.shape[1]):
            if i != j and adj[i, j] > 0:
                edges.add((i, j))
    return edges

edges_physical = get_edges(A_physical)
edges_proximity = get_edges(A_proximity)
edges_correlation = get_edges(A_correlation)

# Calculate overlaps
overlap_phys_prox = edges_physical & edges_proximity
overlap_phys_corr = edges_physical & edges_correlation
overlap_prox_corr = edges_proximity & edges_correlation
overlap_all = edges_physical & edges_proximity & edges_correlation

print("\nüìä Edge Overlap Analysis:")
print(f"  Physical edges: {len(edges_physical)}")
print(f"  Proximity edges: {len(edges_proximity)}")
print(f"  Correlation edges: {len(edges_correlation)}")
print(f"\n  Physical ‚à© Proximity: {len(overlap_phys_prox)} ({100*len(overlap_phys_prox)/len(edges_physical):.1f}%)")
print(f"  Physical ‚à© Correlation: {len(overlap_phys_corr)} ({100*len(overlap_phys_corr)/len(edges_physical):.1f}%)")
print(f"  Proximity ‚à© Correlation: {len(overlap_prox_corr)} ({100*len(overlap_prox_corr)/len(edges_proximity):.1f}%)")
print(f"\n  All three: {len(overlap_all)} ({100*len(overlap_all)/len(edges_physical):.1f}%)")

In [None]:
# Venn diagram data
from matplotlib.patches import Circle

fig, ax = plt.subplots(figsize=(10, 6))

# Calculate unique and shared edges
unique_physical = len(edges_physical - edges_proximity - edges_correlation)
unique_proximity = len(edges_proximity - edges_physical - edges_correlation)
unique_correlation = len(edges_correlation - edges_physical - edges_proximity)

data = {
    'Physical Only': unique_physical,
    'Proximity Only': unique_proximity,
    'Correlation Only': unique_correlation,
    'Phys ‚à© Prox': len(overlap_phys_prox) - len(overlap_all),
    'Phys ‚à© Corr': len(overlap_phys_corr) - len(overlap_all),
    'Prox ‚à© Corr': len(overlap_prox_corr) - len(overlap_all),
    'All Three': len(overlap_all)
}

# Bar chart of overlaps
colors = ['red', 'blue', 'green', 'purple', 'orange', 'cyan', 'magenta']
bars = ax.bar(data.keys(), data.values(), color=colors, alpha=0.7, edgecolor='black')
ax.set_ylabel('Number of Edges')
ax.set_title('Graph Edge Overlaps', fontsize=14, fontweight='bold')
ax.tick_params(axis='x', rotation=45)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

print("\nüîç Key Insight:")
if unique_physical > len(overlap_all):
    print("   ‚úì Graphs capture DIFFERENT relationships (good for multi-view learning!)")
else:
    print("   ‚ö†Ô∏è High overlap - graphs may be redundant")

## 6. Network Properties

In [None]:
# Convert to NetworkX for advanced analysis
def analyze_network(adj, name):
    """Analyze network properties using NetworkX"""
    G = nx.from_numpy_array(adj)
    
    print(f"\n{'='*50}")
    print(f"{name} Network Properties")
    print(f"{'='*50}")
    
    # Connectivity
    is_connected = nx.is_connected(G)
    print(f"  Connected: {is_connected}")
    
    if not is_connected:
        components = list(nx.connected_components(G))
        print(f"  Components: {len(components)}")
        print(f"  Largest component: {len(max(components, key=len))} nodes")
    
    # Clustering
    clustering = nx.average_clustering(G)
    print(f"  Clustering coefficient: {clustering:.4f}")
    
    # Diameter (for largest component if disconnected)
    if is_connected:
        diameter = nx.diameter(G)
        avg_path_length = nx.average_shortest_path_length(G)
        print(f"  Diameter: {diameter}")
        print(f"  Avg shortest path: {avg_path_length:.2f}")
    else:
        largest_cc = G.subgraph(max(nx.connected_components(G), key=len))
        diameter = nx.diameter(largest_cc)
        print(f"  Diameter (largest component): {diameter}")

# Analyze all networks
analyze_network(A_physical, "Physical Topology")
analyze_network(A_proximity, "Spatial Proximity")
analyze_network(A_correlation, "Traffic Correlation")

## 7. Visualize Sample Neighborhoods

In [None]:
# Select a random node and visualize its neighbors in each graph
sample_node = 100

fig, axes = plt.subplots(1, 3, figsize=(18, 5))

graphs_data = [
    (A_physical, 'Physical', 'red'),
    (A_proximity, 'Proximity', 'blue'),
    (A_correlation, 'Correlation', 'green')
]

for ax, (adj, title, color) in zip(axes, graphs_data):
    # Get neighbors
    neighbors = np.where(adj[sample_node] > 0)[0]
    neighbors = neighbors[neighbors != sample_node]  # Exclude self
    
    # Create subgraph
    nodes = [sample_node] + list(neighbors)
    subgraph_adj = adj[np.ix_(nodes, nodes)]
    
    # Draw
    G = nx.from_numpy_array(subgraph_adj)
    pos = nx.spring_layout(G, seed=42)
    
    # Color nodes
    node_colors = [color if i == 0 else 'lightgray' for i in range(len(nodes))]
    
    nx.draw(G, pos, ax=ax, node_color=node_colors, 
            node_size=500, with_labels=False, 
            edge_color='gray', alpha=0.6, width=2)
    
    ax.set_title(f"{title}\nNode {sample_node}: {len(neighbors)} neighbors", 
                fontweight='bold')

plt.tight_layout()
plt.show()

print(f"\nüéØ Node {sample_node} Neighborhood Analysis:")
print(f"  Physical neighbors: {np.sum(A_physical[sample_node] > 0) - 1}")
print(f"  Proximity neighbors: {np.sum(A_proximity[sample_node] > 0) - 1}")
print(f"  Correlation neighbors: {np.sum(A_correlation[sample_node] > 0) - 1}")

## 8. Summary & Insights

In [None]:
print("="*60)
print("üìù MULTI-VIEW GRAPH ANALYSIS SUMMARY")
print("="*60)

print("\n‚úÖ Three Complementary Graph Views Created:")
print("\n1. Physical Topology Graph")
print("   - Captures road network structure")
print("   - Sparse, low degree")
print("   - Represents direct connections")

print("\n2. Spatial Proximity Graph")
print("   - Based on geographic distance")
print("   - k-NN structure")
print("   - Captures nearby sensors")

print("\n3. Traffic Correlation Graph")
print("   - Based on historical patterns")
print("   - Learns functional relationships")
print("   - May connect distant but correlated nodes")

print("\nüéØ Key Findings:")
overlap_pct = 100 * len(overlap_all) / len(edges_physical)
if overlap_pct < 30:
    print(f"   ‚úì Low overlap ({overlap_pct:.1f}%) - graphs provide DIVERSE information")
    print("   ‚úì Multi-view learning will be beneficial!")
else:
    print(f"   ‚ö†Ô∏è High overlap ({overlap_pct:.1f}%) - graphs may be redundant")

print("\nüìã Next Steps:")
print("   1. These graphs will be used in the Multi-View GNN model")
print("   2. Each view will have its own graph convolution layer")
print("   3. Features will be fused using attention mechanism")
print("   4. Start Phase 3: Model Architecture Implementation")
print("="*60)