# Demonstrate our estimator for Binomial observations

In [None]:
import numpy as np
from estimator import KS_test, estimateEntireLine
from sampling_utils import getSamples_binomial
from syntheticExperimentWrappers import drawAndEstimate_continuousDist
from utils import get_counts
from FWER_utils import estimateZeta_FWER_manyThresholds_parallel
import matplotlib.pyplot as plt
import scipy
import os

In [None]:
# The true distribution nu* has 80% of the mass at P(success)=1/2 (the 'null' in this setting),
#   and 20% of the mass beta-distributed at higher values
# Note, we add 1/2 to all means when we draw from the binomial, so that 0 is the 'null hypothesis' of P(success)=1/2
dist = [scipy.stats.bernoulli(0),
         scipy.stats.beta(a=4, b=8, scale=0.45, loc=0.05)]
prop = [0.8, 0.2]

n = 100000
t = 20   # 20 trials for each X_i
# Ask "what fraction of observations have true P(success) above gamma?" for 30 regularly-spaced gamma, and one large value
# (so we can get detail in the region where our estimator is nonzero, without wasting computation on a bunch of zeros)
gammas = list(np.linspace(0.5, 0.75, 30)) + [1]
tolerance = 0.01
alpha = 0.05

# Run the simulated experiment
observations, zetaHats = drawAndEstimate_continuousDist(distributionComponents=dist, 
                                                        mixingProportions=prop, 
                                                        n=n, 
                                                        tolerance=tolerance, 
                                                        alpha=alpha, 
                                                        gammas=gammas, 
                                                        t=t, 
                                                        discretization=1000, 
                                                        distribution="binom")

numCores = 10   # For parallelizing this computation across thresholds
zetaHats_FWER = estimateZeta_FWER_manyThresholds_parallel(observations, gammas, 
                                                          alpha=alpha, t=t, 
                                                          distribution='binom',
                                                         numCores=numCores)

In [None]:
plt.rcParams["figure.figsize"] = [4,3.5]
plt.rcParams["font.size"] = 16
ax = plt.subplot(111)
plt.gcf().subplots_adjust(bottom=0.15, left=0.25)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
plt.gca().axes.get_yaxis().set_ticks([])
plt.tight_layout(pad=2)

plt.hist(observations)
plt.xlabel("Test statistic")
plt.ylabel("Count")

folderName = "binomPlots"
if not os.path.exists(folderName):
    os.makedirs(folderName)
plt.savefig(folderName+"/bin-testStats.png")
plt.savefig(folderName+"/bin-testStats.eps")
plt.show()

In [None]:
ax = plt.subplot(111)
plt.gcf().subplots_adjust(bottom=0.15, left=0.25)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)

plt.tight_layout(pad=2)

plottingGrid = np.linspace(0.5, 1, 500)
ax.plot(plottingGrid, sum(np.array([p-p*c.cdf(plottingGrid-0.5) for (p, c) in zip(prop[1:], dist[1:])])), 
        label=r'$\zeta_{\nu_*}$', linewidth=2)
ax.plot(gammas, zetaHats, label=r'$\widehat{\zeta}_{KS}$', color="purple", linestyle="--", linewidth=2)
ax.plot(gammas, zetaHats_FWER, label=r'$\widehat{\zeta}_{FWER}$', color="red", linestyle=":", linewidth=2)
plt.legend()
plt.ylabel("Fraction of mass\nabove threshold")
plt.xlabel("Threshold")
plt.savefig(folderName+"/bin-est.png")
plt.savefig(folderName+"/bin-est.eps")
plt.savefig(folderName+"/bin-est.pdf")
plt.show()