## Setup

Run this cell to install required packages (only needed on Google Colab).

In [None]:
# Install dependencies (for Google Colab)
import sys
if 'google.colab' in sys.modules:
    !pip install -q rustworkx matplotlib numpy scipy

In [None]:
# Import libraries
import random
import math
import os
import numpy as np
from scipy.stats import binom, poisson
import matplotlib.pyplot as plt
from itertools import combinations

import rustworkx as rx
from rustworkx.visualization import mpl_draw

# Visualization settings
NS_PURPLE = '#8e44ad'
NS_GREEN = '#2ecc71'
NS_ORANGE = '#FF9800'

plt.rcParams['figure.figsize'] = (8, 5)
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({
    'axes.spines.top': False,
    'axes.spines.right': False,
})

In [None]:
def set_seed(seed=None):
    """Set random seeds for reproducibility."""
    if seed is None:
        seed = int.from_bytes(os.urandom(4), 'big')
    random.seed(seed)
    np.random.seed(seed)
    return seed

---
## 3.2 The Random Network Model

A **random network** (Erdős–Rényi model) consists of $N$ nodes where each pair of nodes is connected with probability $p$.

### Key Definitions

| Term | Description |
|------|-------------|
| $G(N, p)$ | Each pair of $N$ labeled nodes is connected independently with probability $p$ |
| $G(N, L)$ | $N$ labeled nodes are connected by $L$ randomly placed links |
| $N$ | Number of nodes in the network |
| $p$ | Probability of connection between any two nodes |

### G(N, p) Model

Each pair of nodes is connected with probability $p$.

In [None]:
set_seed(2)

N = 5
p = 0.3

nodes = list(range(N))
possible_edges = [(u, v) for u, v in combinations(nodes, 2)]
edges = [e for e in possible_edges if random.random() < p]

print(f"Number of nodes: {N}")
print(f"Possible edges: {len(possible_edges)}")
print(f"Generated edges: {len(edges)} → {edges}")

### G(N, L) Model

Exactly $L$ links are randomly placed among all possible pairs.

In [None]:
set_seed(3)

N = 5
L = 3

nodes = list(range(N))
all_possible = list(combinations(nodes, 2))
n_possible = len(all_possible)

# Randomly select L edges
selected_indices = np.random.choice(n_possible, L, replace=False)
edges = [all_possible[i] for i in selected_indices]

print(f"Number of nodes: {N}")
print(f"Possible edges: {n_possible}")
print(f"Selected {L} edges: {edges}")

### Visualizing Random Networks

Different realizations of the same $G(N, p)$ model produce different networks.

In [None]:
set_seed(4)

N, p = 10, 0.3

# Generate three different realizations
graphs = [rx.undirected_gnp_random_graph(N, p) for _ in range(3)]

fig, axes = plt.subplots(1, 3, figsize=(12, 4))

for i, (g, ax) in enumerate(zip(graphs, axes)):
    layout = rx.circular_layout(g)
    mpl_draw(g, pos=layout, ax=ax, node_color=NS_PURPLE, edge_color=NS_GREEN,
             with_labels=False, node_size=400)
    ax.set_title(f"Realization {i+1}: {g.num_edges()} edges")
    ax.set_axis_off()

plt.suptitle(f"Three G(N={N}, p={p}) Random Networks", fontsize=14)
plt.tight_layout()
plt.show()

---
## 3.3 Number of Links

The probability that a random network has exactly $L$ links follows a binomial distribution:

$$p_L = \binom{\frac{N(N-1)}{2}}{L} p^L (1 - p)^{\frac{N(N-1)}{2} - L}$$

This is the product of:
- $\binom{\frac{N(N-1)}{2}}{L}$: Number of ways to place $L$ links among all pairs
- $p^L$: Probability that $L$ attempts result in links
- $(1-p)^{\frac{N(N-1)}{2} - L}$: Probability remaining attempts don't create links

In [None]:
def prob_L_links(N, p, L):
    """Probability that a random network has exactly L links."""
    n_possible = math.comb(N, 2)  # N(N-1)/2
    n_ways = math.comb(n_possible, L)
    return n_ways * (p ** L) * ((1 - p) ** (n_possible - L))

# Example
N, p, L = 10, 0.1, 5
print(f"P(L={L}) = {prob_L_links(N, p, L):.4f}")

### Distribution of Link Counts

In [None]:
N, p = 10, 0.3

n_possible = math.comb(N, 2)
Ls = list(range(n_possible + 1))
pmf = [prob_L_links(N, p, L) for L in Ls]

# Expected number of links
expected_L = p * n_possible

plt.figure(figsize=(10, 5))
plt.bar(Ls, pmf, color=NS_PURPLE, edgecolor='white')
plt.axvline(expected_L, color=NS_ORANGE, linestyle='--', linewidth=2, 
            label=f'⟨L⟩ = {expected_L:.1f}')
plt.xlabel('L (number of links)')
plt.ylabel('P(L)')
plt.title(f'Link Distribution for G(N={N}, p={p})')
plt.legend()
plt.show()

### Expected Number of Links

$$\langle L \rangle = p \cdot \frac{N(N-1)}{2}$$

In [None]:
N, p = 10, 0.1

n_possible = math.comb(N, 2)
expected_links = p * n_possible

print(f"With N={N} nodes, there are {n_possible} possible edges")
print(f"Expected number of links: ⟨L⟩ = {expected_links}")

### Average Degree

$$\langle k \rangle = \frac{2 \langle L \rangle}{N} = p(N - 1)$$

In [None]:
N, p = 10, 0.25

expected_links = p * math.comb(N, 2)
avg_degree_v1 = 2 * expected_links / N
avg_degree_v2 = p * (N - 1)

print(f"Average degree (from ⟨L⟩): {avg_degree_v1}")
print(f"Average degree (formula):  {avg_degree_v2}")
assert avg_degree_v1 == avg_degree_v2

---
## 3.4 Degree Distribution

The degree distribution $p_k$ is the probability that a randomly chosen node has degree $k$.

### 3.4.1 Binomial Distribution

The exact degree distribution of a random network follows the binomial distribution:

$$p_k = \binom{N-1}{k} p^k (1 - p)^{N-1-k}$$

Each node can connect to $N-1$ other nodes, and each connection occurs with probability $p$.

In [None]:
set_seed(42)

N, p = 20, 0.25
n = N - 1  # Each node can connect to N-1 others

# Simulate: sample degrees from binomial distribution
samples = np.random.binomial(n, p, 10000)
values, counts = np.unique(samples, return_counts=True)
empirical_pk = counts / counts.sum()

# Theoretical binomial PMF
ks = np.arange(0, N)
theoretical_pk = binom.pmf(ks, n=n, p=p)

plt.figure(figsize=(10, 5))
plt.bar(values, empirical_pk, color=NS_PURPLE, edgecolor='white', alpha=0.7,
        label='Empirical (10,000 samples)')
plt.plot(ks, theoretical_pk, 'o-', color=NS_GREEN, markersize=8,
         label='Theoretical Binomial')
plt.xlabel('Degree k')
plt.ylabel('$p_k$')
plt.title(f'Degree Distribution: Binomial(n={n}, p={p})')
plt.legend()
plt.show()

### 3.4.2 Poisson Approximation

For **sparse networks** where $\langle k \rangle \ll N$, the degree distribution is well approximated by the Poisson distribution:

$$p_k = e^{-\langle k \rangle} \frac{\langle k \rangle^k}{k!}$$

In [None]:
set_seed(11)

avg_degree = 5  # ⟨k⟩

# Sample from Poisson
samples = np.random.poisson(avg_degree, 10000)
values, counts = np.unique(samples, return_counts=True)
empirical_pk = counts / counts.sum()

# Theoretical Poisson PMF
ks = np.arange(0, 15)
theoretical_pk = poisson.pmf(ks, mu=avg_degree)

plt.figure(figsize=(10, 5))
plt.bar(values, empirical_pk, color=NS_PURPLE, edgecolor='white', alpha=0.7,
        label='Empirical (10,000 samples)')
plt.plot(ks, theoretical_pk, 'o', color=NS_GREEN, markersize=10,
         label='Theoretical Poisson')
plt.xlabel('Degree k')
plt.ylabel('$p_k$')
plt.title(f'Poisson Degree Distribution: ⟨k⟩ = {avg_degree}')
plt.legend()
plt.show()

### Binomial vs Poisson: When Does Poisson Work?

The Poisson approximation works well when $\langle k \rangle \ll N$. As $N$ increases with fixed $\langle k \rangle$, the binomial converges to Poisson.

In [None]:
avg_degree = 5
Ns = [10, 100, 1000, 10000]
ks = np.arange(0, 16)

# Poisson (the limit)
pk_poisson = poisson.pmf(ks, mu=avg_degree)

plt.figure(figsize=(10, 6))
plt.bar(ks, pk_poisson, color=NS_PURPLE, edgecolor='white', alpha=0.6,
        label='Poisson')

markers = ['x', '^', 'o', 's']
offsets = np.linspace(-0.2, 0.2, len(Ns))

for N, marker, dx in zip(Ns, markers, offsets):
    n = N - 1
    p = avg_degree / n  # Recover p from ⟨k⟩ = p(N-1)
    pk_binom = binom.pmf(ks, n=n, p=p)
    plt.plot(ks + dx, pk_binom, linestyle='none', marker=marker, markersize=7,
             label=f'Binomial N={N}')

plt.xlabel('Degree k')
plt.ylabel('$p_k$')
plt.xlim(-0.5, 15.5)
plt.ylim(0, 0.2)
plt.title(f'Binomial → Poisson as N increases (⟨k⟩ = {avg_degree})')
plt.legend(frameon=False)
plt.show()

### When Poisson Fails: Dense Networks

When $\langle k \rangle$ is not much smaller than $N$, the Poisson approximation breaks down.

In [None]:
avg_degree = 80  # High average degree
Ns = [100, 1000, 10000]
ks = np.arange(50, 110)

pk_poisson = poisson.pmf(ks, mu=avg_degree)

plt.figure(figsize=(10, 6))
plt.bar(ks, pk_poisson, color=NS_PURPLE, edgecolor='white', alpha=0.6,
        label='Poisson')

markers = ['^', 'o', 's']
offsets = np.linspace(-0.3, 0.3, len(Ns))

for N, marker, dx in zip(Ns, markers, offsets):
    n = N - 1
    p = avg_degree / n
    pk_binom = binom.pmf(ks, n=n, p=p)
    plt.plot(ks + dx, pk_binom, linestyle='none', marker=marker, markersize=6,
             label=f'Binomial N={N}')

plt.xlabel('Degree k')
plt.ylabel('$p_k$')
plt.xlim(55, 105)
plt.ylim(0, 0.06)
plt.title(f'Poisson fails when ⟨k⟩ = {avg_degree} is NOT ≪ N')
plt.legend(frameon=False)
plt.show()

---
## Summary

| Section | Key Concepts |
|---------|--------------|
| **3.2** | Random network models: $G(N,p)$ and $G(N,L)$ |
| **3.3** | Number of links follows binomial distribution; $\langle L \rangle = p \cdot N(N-1)/2$ |
| **3.4** | Degree distribution: Binomial (exact) or Poisson (sparse networks where $\langle k \rangle \ll N$) |

---

*Companion notebook for [Network Science](http://networksciencebook.com/) by Albert-László Barabási*