# Explore the performance of our estimator in the two-spike Gaussian setting, for different values of gamma* (the alternate mean), as n increases

In [None]:
from syntheticExperimentWrappers import drawAndEstimate_GaussianTwoSpike
import numpy as np
import matplotlib.pyplot as plt
from time import time
from multiprocessing import Pool, freeze_support
import pickle
import scipy
import os

In [None]:
NUM_CORES = 30

In [None]:
folderName = "zetaHatVsN"
if not os.path.exists(folderName):
    os.makedirs(folderName)

In [None]:
# Plot the estimate zetaHat(0)
# as n increases
# for different separation regimes

discretization = 1000
sigma = 1
zeta = 0.1
tolerance = 0.001
numSamples = 100
alpha = 0.05
linestyleList = ["-", "--", ":", "-."]
threshold = 0

zetaHatDict = dict()
plt.rcParams["font.size"] = 16
plt.rcParams["figure.figsize"] = [7,4.5]
ax = plt.subplot(111)
plt.tight_layout(pad=2)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
for i, mu2 in enumerate([0.5, 1, 3, 5]):
    print(mu2)
    zetaHatDict[mu2] = dict()
    nVals = [2**i for i in range(7, 20)]
    avgZetaHats = []
    lbZetaHats = []
    ubZetaHats = []
    theoryLBs = []
    for n in nVals:
        try:
            # We may want to start and stop this task; this allows us to pick up where we left off
            zetaHats = pickle.load(open("./zetaHatVsN/zetaHats_"+str(n)+str(mu2)+".p", 'rb'))
        except:
            # If the file didn't exist, re-collect it
            t = time()
            print(n)
            # Redraw the values every time
            jobs = [(threshold, n, zeta, mu2, discretization, sigma, tolerance, "KS", alpha) for _ in range(numSamples)]
            with Pool(NUM_CORES) as p:
                zetaHats = p.starmap(drawAndEstimate_GaussianTwoSpike, jobs)

            print('    (Elapsed time: {0:8.1f} minutes)'.format((time()-t)/60))
            pickle.dump(zetaHats, open("./zetaHatVsN/zetaHats_"+str(n)+str(mu2)+".p", 'wb'))
        
        
        zetaHatDict[mu2][n] = zetaHats
        avgZetaHats.append(np.median(zetaHats))
        zetaHats = np.sort(zetaHats)
        lbZetaHats.append(zetaHats[5]) # 5th percentile, if we have 100 samples
        ubZetaHats.append(zetaHats[-6]) # 95th percentile, with 100 samples
        
        lb = zeta - np.sqrt(2*np.log(2/(alpha**2))/(n*(scipy.stats.norm.cdf(0.5*mu2) - scipy.stats.norm.cdf(-0.5*mu2))**2))
        theoryLB = np.max([0, lb])
        theoryLBs.append(theoryLB)
    
    # You could also choose to plot the theoretical lower bound, which gives a nice
    # agreement. But it makes the plot too busy.
    plt.semilogx(nVals, avgZetaHats, label=str(mu2), linestyle=linestyleList[i])
    plt.fill_between(nVals, lbZetaHats, ubZetaHats, alpha=0.2)
    #plt.semilogx(nVals, theoryLBs, color='C'+str(i), linestyle=':')


plt.axhline(y=zeta, xmin=0, xmax=20, linestyle='--', color="k")
plt.legend(title=r'$\gamma_*$', loc="lower right")
plt.xlabel("Number of hypotheses tested, n")
plt.ylabel(r'Estimated mass above zero, $\hat{\zeta}_n(0)$')
#plt.title(r'Median $\hat{\zeta}_n(0)$ with empirical 90% CIs from '+str(numSamples)+' samples')#\nwith theoretical lower bounds')
plt.savefig("./ZetaVsN.png")
plt.savefig("./ZetaVsN.eps")
plt.savefig("./ZetaVsN.pdf")