## Central Limit Theorem

In [53]:
# !pip install ipywidgets

# !pip install voila

# !jupyter labextension list

# !jupyter labextension install @jupyter-widgets/jupyterlab-manager

In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from ipywidgets import widgets
from IPython.display import display

%matplotlib inline

In [2]:
# distributions & parameters map
pop_subset_size = 100000
dist_param_map = (
    # function           # properties                                            # name / title
    ( np.random.gamma,   {'shape': 5.0, 'scale': 10.0, 'size': pop_subset_size}, 'Gamma'    ),
    ( np.random.poisson, {'lam': 15.0, 'size': pop_subset_size},                 'Poisson'  ),
    ( np.random.uniform, {'low': 5.0, 'high': 10.0, 'size': pop_subset_size},    'Uniform'  ),
    ( np.random.normal,  {'loc': 5.0, 'scale': 1.0, 'size': pop_subset_size},    'Gaussian' )
)

In [3]:
sample_sizes = [10, 25, 100, 250]

In [4]:
def replicate(times, func, *args):
    if times <= 0:
        times = 1
    result = list()
    for _ in range(times):
        result.append(func(*args))
    return result

In [5]:
# sample with replacement => independent
sample_mean = lambda *args: np.mean(np.random.choice(*args))

In [6]:
# sample space distribution
def plot_distribution_of_sample_space(samples, size, axis):
    sns.histplot(
        samples,
        bins=30,
        stat='density',
        #kde=True,
        color='orange',
        edgecolor='white',
        alpha=0.3,
        ax=axis,
        label=f'sample space size = {size}'
    )
    sns.kdeplot(
        samples,
        color='blueviolet',
        lw=2,
        ax=axis
    )
    #axis.legend(loc='best', prop={'size': 12})
    axis.set_title(f'Sample space size = {size}')
    axis.title.set_size(18)
    axis.set_xlabel('Bins', fontsize=14)
    axis.yaxis.label.set_size(14)


In [7]:
# sampling distribution
def plot_sampling_distribution_of_sample_means(statistic, size, axis):
    sns.histplot(
        statistic,
        bins=30,
        stat='density',
        #kde=True,
        color='darkgreen',
        edgecolor='white',
        alpha=0.3,
        ax=axis,
        label=f'sample size = {size}'
    )
    sns.kdeplot(
        statistic,
        color='darkblue',
        lw=2,
        ax=axis
    )
    #axis.legend(loc='best', prop={'size': 12})
    axis.set_title(f'Sample size = {size}')
    axis.title.set_size(18)
    axis.set_xlabel('Bins', fontsize=14)
    axis.yaxis.label.set_size(14)


In [8]:
def update_clt_for_selected_distribution(index, output_layer):
    # if all follow same distribution => identically distributed
    dist_func, dist_kwargs, dist_name = dist_param_map[index]
    sample_space = dist_func(**dist_kwargs)
    
    # repeat sample statistic calculation these many times
    rep = 5000

    with output_layer:
        # clear previous output
        output_layer.clear_output()
        
        # create subplot axes
        sns.set_style('white')
        fig, axes = plt.subplots(len(sample_sizes), 2, figsize=(7*2, 7*len(sample_sizes)))

        # sample mean as the statistic for sampling distribution
        plt_title = '\n'.join([f'{dist_name} Distribution', 'sample size comparison'])
        plt.suptitle(plt_title, y=0.9, verticalalignment='bottom', fontsize=20)
        for axis, sample_size in zip(axes, sample_sizes):
            sample_statistic = replicate(rep, sample_mean, sample_space, sample_size)
            plot_distribution_of_sample_space(sample_space, pop_subset_size, axis[0])
            plot_sampling_distribution_of_sample_means(sample_statistic, sample_size, axis[1])
        
        # show figure in output layer
        plt.show(fig)

In [9]:
# define the output plot layer
output_plot = widgets.Output()

# define the event handler
def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        
        # this print will print the selected value on log console (not below this cell as an output)
        # print(f'Selection changed to {change["new"]} at index {change["owner"].index}')
        
        # adjust offset for additional hint option
        idx = change["owner"].index
        if idx > 0:
            idx -= 1
            update_clt_for_selected_distribution(idx, output_plot)
        else:
            output_plot.clear_output()

# create the dropdown and render it
def render_distribution_selection_dropdown():
    # get the distribution names from their map
    dist_names = [tup[2] for tup in dist_param_map]

    # add the additional option for selection hint
    dist_names.insert(0, 'Make a selection')

    # create the drop down with distribution names
    dd_dist = widgets.Dropdown(
        options=dist_names,
        index=0,
        description='Distribution:',
        disabled=False
    )

    # attach the event handler
    dd_dist.observe(on_change)

    # render
    display(dd_dist)
    output_plot.clear_output()
    display(output_plot)
    

---

In [10]:
render_distribution_selection_dropdown()

Dropdown(description='Distribution:', options=('Make a selection', 'Gamma', 'Poisson', 'Uniform', 'Gaussian'),…

Output()

---