The lae of large numbers

In [None]:
# Demonstration of the Law of Large Numbers (LLN) in Python
# This code simulates two examples (Bernoulli and Normal), computes the running (sample) mean,
# and plots how the running mean converges to the theoretical expectation as sample size grows.
# Requirements for plots: uses matplotlib (no seaborn), each plot is a separate figure.
# Run in a Jupyter environment to see the plots inline.

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

np.random.seed(42)

def simulate_and_plot(dist_name, samples, theoretical_mean):
    # compute running mean
    running_mean = np.cumsum(samples) / np.arange(1, len(samples) + 1)
    
    # create dataframe for display (first few and last few values)
    df_preview = pd.DataFrame({
        "n": np.arange(1, len(samples) + 1),
        "sample": samples,
        "running_mean": running_mean
    })
    
    # show first 5 and last 5 rows in a neat table
    display_df = pd.concat([df_preview.head(), df_preview.tail()])
    
    print(f"--- {dist_name} example ---")
    print(f"Theoretical mean: {theoretical_mean}")
    print(f"Final sample mean (n={len(samples)}): {running_mean[-1]:.6f}")
    print(f"Absolute error: {abs(running_mean[-1] - theoretical_mean):.6f}\n")
    
    # display a small table
    try:
        from caas_jupyter_tools import display_dataframe_to_user
        display_dataframe_to_user(f"{dist_name} sample preview", display_df)
    except Exception:
        # Fallback: print the table if the helper is not available
        print(display_df.to_string(index=False))
    
    # Plot the running mean
    plt.figure(figsize=(10, 5))
    plt.plot(np.arange(1, len(samples) + 1), running_mean)
    # horizontal theoretical mean line
    plt.axhline(theoretical_mean, linestyle='--')
    plt.title(f"Running sample mean — {dist_name} (Law of Large Numbers)")
    plt.xlabel("Number of samples (n)")
    plt.ylabel("Running sample mean")
    # annotate final value
    plt.annotate(f"final mean = {running_mean[-1]:.4f}",
                 xy=(len(samples), running_mean[-1]),
                 xytext=(len(samples)*0.6, running_mean[-1] + 0.1),
                 arrowprops=dict(arrowstyle="->"))
    plt.grid(True)
    plt.show()


# Example 1: Bernoulli(p=0.3) — theoretical mean = p
n = 20000
p = 0.3
bern_samples = np.random.binomial(1, p, size=n)
simulate_and_plot(f"Bernoulli(p={p})", bern_samples, theoretical_mean=p)

# Example 2: Normal(mu=5, sigma=2) — theoretical mean = mu
n2 = 20000
mu = 5.0
sigma = 2.0
normal_samples = np.random.normal(mu, sigma, size=n2)
simulate_and_plot(f"Normal(mu={mu}, sigma={sigma})", normal_samples, theoretical_mean=mu)

# Optional: summarize convergence rate (mean squared error vs n) for Bernoulli
# Compute running MSE = E[(Xbar_n - mu)^2] estimated by a single run's squared error
running_mse = (np.cumsum(bern_samples) / np.arange(1, n+1) - p)**2
# downsample for plotting (so the line is not too heavy)
idx = np.linspace(0, n-1, 1000, dtype=int)
plt.figure(figsize=(10, 5))
plt.plot(idx+1, running_mse[idx])
plt.yscale('log')
plt.title("Squared error of running mean (Bernoulli) — log scale")
plt.xlabel("Number of samples (n)")
plt.ylabel("(running_mean - p)^2 (log scale)")
plt.grid(True)
plt.show()

print("Done. The plots above demonstrate the Law of Large Numbers: as n increases, the sample mean\nconverges to the theoretical expectation.")


--- Bernoulli(p=0.3) example --- 
Theoretical mean: 0.3 
Final sample mean (n=20000): 0.297950 
Absolute error: 0.002050 
--- Normal(mu=5.0, sigma=2.0) example --- 
Theoretical mean: 5.0 
Final sample mean (n=20000): 4.987747 
Absolute error: 0.012253 
Done. The plots above demonstrate the Law of Large Numbers: as n increases, the sample mean converges to the theoretical expectation.