# Demonstrate our estimator for Poisson observations

In [None]:
import numpy as np
from estimator import KS_test, estimateEntireLine
from sampling_utils import getSamples_poisson
from syntheticExperimentWrappers import drawAndEstimate_continuousDist
from utils import get_counts
from FWER_utils import estimateZeta_FWER_manyThresholds_parallel
import matplotlib.pyplot as plt
import scipy
import os

In [None]:
# The true distribution nu* has 85% of the mass at rate 1 (the 'null' in this setting),
#   and 15% of the mass beta-distributed at higher values
dist = [scipy.stats.bernoulli(1),  # The null hypothesis; mean is 1
          scipy.stats.beta(a=6, b=8, scale=6, loc=2)]
prop = [0.85, 0.15]

# Ask "what fraction of observations have true rate above gamma?" for 30 regularly-spaced gamma, and one large value
# (so we can get detail in the region where our estimator is nonzero, without wasting computation on a bunch of zeros)
mu2 = 5
gammas = list(np.linspace(1, 4.1, 30)) + [mu2+1]
n = 100000
alpha = 0.05
tolerance = 0.01

# Run the simulated experiment
observations, zetaHats = drawAndEstimate_continuousDist(distributionComponents=dist, 
                                                        mixingProportions=prop, 
                                                        n=n, 
                                                        tolerance=tolerance, 
                                                        alpha=alpha, 
                                                        gammas=gammas,
                                                        discretization=1000, 
                                                        distribution="poi")

numCores = 10   # For parallelizing this computation across thresholds
zetaHats_FWER = estimateZeta_FWER_manyThresholds_parallel(observations, gammas, 
                                                 alpha=0.05, 
                                                 distribution="poi",
                                                         numCores=numCores)

In [None]:
plt.rcParams["figure.figsize"] = [4,3.5]
#plt.rcParams["figure.figsize"] = [6,3]   # For the poster
plt.rcParams["font.size"] = 16
ax = plt.subplot(111)
plt.gcf().subplots_adjust(bottom=0.15, left=0.25)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
plt.gca().axes.get_yaxis().set_ticks([])

plt.hist(observations)
plt.xlabel("Test statistic")
plt.ylabel("Count")
plt.tight_layout(pad=2)

folderName = "poissonPlots"
if not os.path.exists(folderName):
    os.makedirs(folderName)
plt.savefig(folderName+"/poi-testStats.png")
plt.savefig(folderName+"/poi-testStats.eps")
plt.savefig(folderName+"/poi-testStats.pdf")
plt.show()

In [None]:
ax = plt.subplot(111)
plt.gcf().subplots_adjust(bottom=0.15, left=0.25)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)

plottingGrid = np.linspace(1, mu2+1, 500)
ax.plot(plottingGrid, sum(np.array([p-p*c.cdf(plottingGrid) for (p, c) in zip(prop[1:], dist[1:])])), 
        label=r'$\zeta_{\nu_*}$', linewidth=2)
ax.plot(gammas, zetaHats, label=r'$\widehat{\zeta}_{KS}$', color="purple", linestyle="--", linewidth=2)
ax.plot(gammas, zetaHats_FWER, label=r'$\widehat{\zeta}_{FWER}$', color="red", linestyle=":", linewidth=2)
#plt.legend(loc="upper right")  #Comment in for the poster
plt.ylabel("Fraction of mass\nabove threshold")
plt.xlabel("Threshold")
plt.tight_layout(pad=2)
plt.savefig(folderName+"/poi-est.png")
plt.savefig(folderName+"/poi-est.eps")
plt.savefig(folderName+"/poi-est.pdf")
plt.show()