A notebook to illustrate/test `kmod.mctest.DS_FSSD`.

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'

import kmod
import kgof
import kgof.goftest as gof
# submodules
from kmod import data, density, kernel, util, plot
from kmod import mctest as mct
import matplotlib
import matplotlib.pyplot as plt
import autograd.numpy as np
import scipy.stats as stats

In [None]:
plot.set_default_matplotlib_options()
# # font options
# font = {
#     #'family' : 'normal',
#     #'weight' : 'bold',
#     'size'   : 18
# }

# plt.rc('font', **font)
# plt.rc('lines', linewidth=2)
# matplotlib.rcParams['pdf.fonttype'] = 42
# matplotlib.rcParams['ps.fonttype'] = 42

## Simple problem: 1D normal distributions

Data are drawn from $r=\mathcal{N}(\mu_r, \sigma_r^2)$. Two models: $p=\mathcal{N}(\mu_p, \sigma_p^2)$ and $q=\mathcal{N}(\mu_q, \sigma_q^2)$.

In [None]:
# Data generating distribution
seed = 21

mu_r = 0
var_r = 1
r = density.IsotropicNormal(np.array([mu_r]), var_r)
ds_r = r.get_datasource()
# sample data
n = 500
dat = ds_r.sample(n, seed=seed+1)
X = dat.data()

In [None]:
def visual_test_1dGauss(X, mu_p, var_p, mu_q, var_q, J=5):
    """
    Assume that both models p,q are 1D normal distributions.
    Plot p, q, and the data and test with the FSSD-based model 
    comparison test.
    
    :param X: data matrix
    :param mu_p: mean of the model p
    :param var_p: variance of the model p
    :param mu_q: mean of the model q
    :param var_q: variance of the model q
    
    :returns the test object
    """
    p = density.IsotropicNormal(np.array([mu_p]), var_p)
    q = density.IsotropicNormal(np.array([mu_q]), var_q)
    
    # kernel
    med = util.meddistance(X, subsample=1000)
    k = kernel.KGauss(sigma2=med**2/2.0)
    l = k

    # numbers of test locations
    Jp = J
    Jq = Jp
    # test locations
#     locs = util.fit_gaussian_draw(X, Jp+Jq, seed=seed+1)
#     V = locs[:Jp, :]
#     W = locs[Jp:, :]k
    locs = util.fit_gaussian_draw(X, Jp, seed=seed+1)
    V = locs
    W = V
    assert W.shape[0] == Jq
    
    # Plot p, q, data
    min_mean = min(mu_r, mu_p, mu_q)
    max_mean = max(mu_r, mu_p, mu_q)
    max_sd = max(var_r, var_p, var_q)**0.5

    dom = np.linspace(min_mean-2*max_sd, max_mean+2*max_sd, 200)
    den_p = np.exp(p.log_normalized_den(dom[:, np.newaxis]))
    den_q = np.exp(q.log_normalized_den(dom[:, np.newaxis]))

    plt.figure(figsize=(10, 5))
    plt.hist(X, bins=20, normed=True, label='Data', color='k')
    plt.plot(dom, den_p, 'r-', label='p')
    plt.plot(dom, den_q, 'b-', label='q')
    plt.title('H1: q fits better than p')
    plt.legend(loc='best')

    # test
    alpha = 0.01
    mcfssd = mct.DC_FSSD(p, q, k, l, V, W, alpha=alpha)
    test_result = mcfssd.perform_test(dat)
    display(test_result)
    return mcfssd

In [None]:
# two competing models. H0: FSSD^2(p) <= FSSD^2(q) i.e., p is better than q.
mu_p, var_p = 1.5, 2
mu_q, var_q = 0.5, 1
mcfssd = visual_test_1dGauss(X, mu_p, var_p, mu_q, var_q, J=5)

In [None]:
mcfssd.get_H1_mean_variance(dat)

In [None]:
import ipywidgets
from ipywidgets import interact, interactive, fixed
from IPython.display import display
import ipywidgets as widgets

mup_slide = ipywidgets.FloatSlider(value=1, min=-3, max=3, step=0.5)
muq_slide = ipywidgets.FloatSlider(value=0.5, min=-3, max=3.0, step=0.5)
vs = interact(visual_test_1dGauss, X=fixed(X), mu_p=mup_slide, var_p=(1e-4, 5, 0.5),
                mu_q=muq_slide, var_q=(1e-4, 5, 0.5))
display(vs)

----------------

## Stein witness functions

In [None]:
p = density.IsotropicNormal(mean=np.array([mu_p]), variance=var_p)
q = density.IsotropicNormal(mean=np.array([mu_q]), variance=var_q)
rdat = ds_r.sample(n=500, seed=8)

med = util.meddistance(rdat.data(), subsample=1000)
k = kernel.KGauss(sigma2=med**2/2.0)

In [None]:
wit_pr = gof.SteinWitness(p, k, rdat)
wit_qr = gof.SteinWitness(q, k, rdat)
dom = np.linspace(-5, 5, 100)
wit_pr_evals = wit_pr(dom[:, np.newaxis])
wit_qr_evals = wit_qr(dom[:, np.newaxis])
diff_wit2 = wit_pr_evals**2 - wit_qr_evals**2

In [None]:
visual_test_1dGauss(rdat.data(), mu_p, var_p, mu_q, var_q)

plt.plot(dom, wit_pr_evals, 'r--', linewidth=3, label='wit p,r')
plt.plot(dom, wit_qr_evals, 'b--', linewidth=3, label='wit q,r')
plt.plot(dom, diff_wit2*90, 'm-', label='$\mathrm{wit}^2$ Diff')
plt.legend(loc='lower left', bbox_to_anchor=(1., 0.))
plt.savefig('dcfssd_stein_diff.pdf', bbox_inches='tight')