# Centrality in Networks

This notebook demonstrates various centrality measures in network analysis using NetworkX.

## Learning Objectives
- Understand different centrality measures
- Implement centrality calculations using NetworkX
- Visualize centrality distributions
- Compare different centrality measures

In [None]:
# Import required libraries
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Set style for better plots
plt.style.use('default')
sns.set_palette("husl")

# For better display in notebooks
%matplotlib inline
plt.rcParams['figure.figsize'] = (12, 8)

## 1. Creating Example Networks

Let's start by creating simple example networks to demonstrate centrality measures.

In [None]:
# Create an undirected graph
G_undirected = nx.Graph()
edges_undirected = [(1, 2), (2, 3), (2, 4), (6, 3), (4, 5), (4, 6), (5, 6)]
G_undirected.add_edges_from(edges_undirected)

# Create a directed graph
G_directed = nx.DiGraph()
edges_directed = [(1, 2), (2, 4), (2, 3), (4, 5), (4, 6), (5, 6), (6, 3)]
G_directed.add_edges_from(edges_directed)

print(f"Undirected graph: {G_undirected.number_of_nodes()} nodes, {G_undirected.number_of_edges()} edges")
print(f"Directed graph: {G_directed.number_of_nodes()} nodes, {G_directed.number_of_edges()} edges")

In [None]:
# Visualize the networks
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Undirected graph
pos1 = nx.spring_layout(G_undirected, seed=42)
nx.draw(G_undirected, pos1, with_labels=True, node_color='lightblue', 
        node_size=500, font_size=12, font_weight='bold', ax=ax1)
ax1.set_title('Undirected Graph', fontsize=14, fontweight='bold')

# Directed graph
pos2 = nx.spring_layout(G_directed, seed=42)
nx.draw(G_directed, pos2, with_labels=True, node_color='lightgreen', 
        node_size=500, font_size=12, font_weight='bold', ax=ax2)
ax2.set_title('Directed Graph', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

## 2. Degree Centrality

Degree centrality is the simplest centrality measure - it counts the number of connections for each node.

In [None]:
# Calculate degree centrality for undirected graph
degree_centrality_undir = nx.degree_centrality(G_undirected)
print("Degree Centrality (Undirected):")
for node, centrality in degree_centrality_undir.items():
    print(f"Node {node}: {centrality:.3f}")

# Calculate in-degree and out-degree for directed graph
in_degree = dict(G_directed.in_degree())
out_degree = dict(G_directed.out_degree())

print("\nIn-Degree (Directed):")
for node, degree in in_degree.items():
    print(f"Node {node}: {degree}")

print("\nOut-Degree (Directed):")
for node, degree in out_degree.items():
    print(f"Node {node}: {degree}")

In [None]:
# Visualize degree centrality
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Undirected graph with degree centrality coloring
node_colors_undir = [degree_centrality_undir[node] for node in G_undirected.nodes()]
nx.draw(G_undirected, pos1, with_labels=True, node_color=node_colors_undir, 
        cmap=plt.cm.Reds, node_size=500, font_size=12, font_weight='bold', ax=ax1)
ax1.set_title('Degree Centrality (Undirected)', fontsize=14, fontweight='bold')

# Directed graph with in-degree coloring
node_colors_dir = [in_degree[node] for node in G_directed.nodes()]
nx.draw(G_directed, pos2, with_labels=True, node_color=node_colors_dir, 
        cmap=plt.cm.Greens, node_size=500, font_size=12, font_weight='bold', ax=ax2)
ax2.set_title('In-Degree (Directed)', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

## 3. Closeness Centrality

Closeness centrality measures how close a node is to all other nodes in the network.

In [None]:
# Calculate closeness centrality
closeness_centrality_undir = nx.closeness_centrality(G_undirected)
closeness_centrality_dir = nx.closeness_centrality(G_directed)

print("Closeness Centrality (Undirected):")
for node, centrality in closeness_centrality_undir.items():
    print(f"Node {node}: {centrality:.3f}")

print("\nCloseness Centrality (Directed):")
for node, centrality in closeness_centrality_dir.items():
    print(f"Node {node}: {centrality:.3f}")

In [None]:
# Visualize closeness centrality
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Undirected graph with closeness centrality coloring
node_colors_closeness_undir = [closeness_centrality_undir[node] for node in G_undirected.nodes()]
nx.draw(G_undirected, pos1, with_labels=True, node_color=node_colors_closeness_undir, 
        cmap=plt.cm.Blues, node_size=500, font_size=12, font_weight='bold', ax=ax1)
ax1.set_title('Closeness Centrality (Undirected)', fontsize=14, fontweight='bold')

# Directed graph with closeness centrality coloring
node_colors_closeness_dir = [closeness_centrality_dir[node] for node in G_directed.nodes()]
nx.draw(G_directed, pos2, with_labels=True, node_color=node_colors_closeness_dir, 
        cmap=plt.cm.Blues, node_size=500, font_size=12, font_weight='bold', ax=ax2)
ax2.set_title('Closeness Centrality (Directed)', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

## 4. Betweenness Centrality

Betweenness centrality measures how often a node lies on the shortest paths between other nodes.

In [None]:
# Calculate betweenness centrality
betweenness_centrality_undir = nx.betweenness_centrality(G_undirected)
betweenness_centrality_dir = nx.betweenness_centrality(G_directed)

print("Betweenness Centrality (Undirected):")
for node, centrality in betweenness_centrality_undir.items():
    print(f"Node {node}: {centrality:.3f}")

print("\nBetweenness Centrality (Directed):")
for node, centrality in betweenness_centrality_dir.items():
    print(f"Node {node}: {centrality:.3f}")

In [None]:
# Visualize betweenness centrality
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Undirected graph with betweenness centrality coloring
node_colors_between_undir = [betweenness_centrality_undir[node] for node in G_undirected.nodes()]
nx.draw(G_undirected, pos1, with_labels=True, node_color=node_colors_between_undir, 
        cmap=plt.cm.Purples, node_size=500, font_size=12, font_weight='bold', ax=ax1)
ax1.set_title('Betweenness Centrality (Undirected)', fontsize=14, fontweight='bold')

# Directed graph with betweenness centrality coloring
node_colors_between_dir = [betweenness_centrality_dir[node] for node in G_directed.nodes()]
nx.draw(G_directed, pos2, with_labels=True, node_color=node_colors_between_dir, 
        cmap=plt.cm.Purples, node_size=500, font_size=12, font_weight='bold', ax=ax2)
ax2.set_title('Betweenness Centrality (Directed)', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

## 5. PageRank Centrality

PageRank is a variant of eigenvector centrality that includes a damping factor.

In [None]:
# Calculate PageRank centrality
pagerank_centrality_undir = nx.pagerank(G_undirected)
pagerank_centrality_dir = nx.pagerank(G_directed)

print("PageRank Centrality (Undirected):")
for node, centrality in pagerank_centrality_undir.items():
    print(f"Node {node}: {centrality:.3f}")

print("\nPageRank Centrality (Directed):")
for node, centrality in pagerank_centrality_dir.items():
    print(f"Node {node}: {centrality:.3f}")

## 6. Real-World Example: Les Miserables Network

Let's apply centrality measures to a real network - the character network from Les Miserables.

In [None]:
# Load the Les Miserables network
G_lesmis = nx.les_miserables_graph()
print(f"Les Miserables network: {G_lesmis.number_of_nodes()} nodes, {G_lesmis.number_of_edges()} edges")

# Calculate all centrality measures
degree_lesmis = nx.degree_centrality(G_lesmis)
closeness_lesmis = nx.closeness_centrality(G_lesmis)
betweenness_lesmis = nx.betweenness_centrality(G_lesmis)
pagerank_lesmis = nx.pagerank(G_lesmis)

# Create a DataFrame for comparison
centrality_df = pd.DataFrame({
    'Character': list(G_lesmis.nodes()),
    'Degree': [degree_lesmis[node] for node in G_lesmis.nodes()],
    'Closeness': [closeness_lesmis[node] for node in G_lesmis.nodes()],
    'Betweenness': [betweenness_lesmis[node] for node in G_lesmis.nodes()],
    'PageRank': [pagerank_lesmis[node] for node in G_lesmis.nodes()]
})

# Show top 10 characters by each centrality measure
print("\nTop 10 characters by Degree Centrality:")
print(centrality_df.nlargest(10, 'Degree')[['Character', 'Degree']])

print("\nTop 10 characters by Closeness Centrality:")
print(centrality_df.nlargest(10, 'Closeness')[['Character', 'Closeness']])

print("\nTop 10 characters by Betweenness Centrality:")
print(centrality_df.nlargest(10, 'Betweenness')[['Character', 'Betweenness']])

print("\nTop 10 characters by PageRank:")
print(centrality_df.nlargest(10, 'PageRank')[['Character', 'PageRank']])

In [None]:
# Visualize the Les Miserables network with degree centrality
plt.figure(figsize=(15, 10))
pos_lesmis = nx.spring_layout(G_lesmis, seed=42)

# Color nodes by degree centrality
node_colors_lesmis = [degree_lesmis[node] for node in G_lesmis.nodes()]
node_sizes_lesmis = [degree_lesmis[node] * 3000 for node in G_lesmis.nodes()]

nx.draw(G_lesmis, pos_lesmis, with_labels=True, node_color=node_colors_lesmis, 
        cmap=plt.cm.Reds, node_size=node_sizes_lesmis, font_size=8, 
        font_weight='bold', alpha=0.8)

plt.title('Les Miserables Character Network - Degree Centrality', fontsize=16, fontweight='bold')
plt.colorbar(plt.cm.ScalarMappable(cmap=plt.cm.Reds), label='Degree Centrality')
plt.show()

## 7. Correlation Between Centrality Measures

Let's examine how different centrality measures correlate with each other.

In [None]:
# Calculate correlation matrix
correlation_matrix = centrality_df[['Degree', 'Closeness', 'Betweenness', 'PageRank']].corr()

# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=0.5)
plt.title('Correlation Between Centrality Measures', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

## 8. Summary and Key Insights

### Key Takeaways:

1. **Degree Centrality**: Identifies the most connected nodes
2. **Closeness Centrality**: Finds nodes that can efficiently spread information
3. **Betweenness Centrality**: Identifies bridge nodes that control network flow
4. **PageRank**: Combines random walk with teleportation for robust ranking

### When to Use Each Measure:
- **Degree**: Simple analysis of connectivity
- **Closeness**: Information spreading or accessibility
- **Betweenness**: Network resilience and flow control
- **PageRank**: Web-like structures and directed networks