In [1]:
%matplotlib inline

In [2]:
import os
import numpy as np
import scipy.stats as stats

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as grid

In [3]:
mpl.rc_file_defaults()

In [4]:
if not os.path.exists("./plots"):
    os.makedirs("./plots")

In [5]:
def generate_samples(dist, n=100, N=1000, **args):
    mappings = {
        "Binomial": lambda args: np.random.binomial(
            1, args["p"], (N, n)
        ),
        "Poisson": lambda args: np.random.poisson(
            args["lam"], (N, n)
        ),
        "Geometric": lambda args: np.random.geometric(
            args["p"], (N, n)
        ),
        "Uniform": lambda args: np.random.uniform(
            0, 1, (N, n)
        ),
        "Beta": lambda args: np.random.beta(
            1, 2, (N, n)
        ),
        "Gamma": lambda args: np.random.gamma(
            3, 1, (N, n)
        ),
        "Normal": lambda args: np.random.normal(
            args["mu"], args["std"], (N, n)
        ),
        "Chi-Squared": lambda args: np.random.chisquare(
            args["df"], (N, n)
        ),
        "Cauchy": lambda args: np.random.standard_cauchy(
            (N, n)
        )
    }
    
    if dist not in mappings:
        raise NotImplementedError
        
    return mappings[dist](args)

In [6]:
def qq_plot(dist, ax, n=100, N=1000, **args):
    samples = generate_samples(dist, n=n, N=N, **args)
    mean_samples = sorted(samples.mean(axis=1))
    
    stats.probplot(mean_samples, dist="norm", plot=ax)

    line = ax.get_lines()[1]
    line.set_linewidth(1.5)
    line.set_color("#cc9955")
    line.set_alpha(0.5)

    line = ax.get_lines()[0]
    line.set_markersize(1)
    line.set_color("#9955cc")
    line.set_alpha(0.5)
    
    plt.title("%s (Sample Size - %d)" % (dist, n))

In [7]:
distributions = [
    ("Binomial", {"p": 0.5}),
    ("Poisson", {"lam": 5}),
    ("Geometric", {"p": 0.5}),
    ("Uniform", {"p": 0.5}),
    ("Beta", {"p": 0.5}),
    ("Gamma", {"p": 0.5}),
    ("Chi-Squared", {"df": 2}),
    ("Cauchy", {}),
    ("Normal", {"mu": 0, "std": 1})
]

sample_sizes = [2, 5, 10, 50]

In [8]:
for dist, args in distributions:
    gs = grid.GridSpec(2, 2)

    for i, ss in enumerate(sample_sizes):
        ax = plt.subplot(gs[i])
        qq_plot(dist, ax, n=ss, **args)

    plt.tight_layout()
    plt.savefig("plots/%s.png" % dist)
    plt.clf()

<Figure size 768x576 with 0 Axes>