### Generate the training dataset

To obtain the estimator, we need to have training dataset $\{d_{j}, c_{j}, \alpha_{j}\}$.
1. $\alpha_{j}$ is the nominal level; samples from $p(\alpha)$ (uniform defaulted)

2. $d_{j}$ is the data stream that contains the noise 
    1. samples from $p(d|\theta)$, $\theta_{j}$ are samples from the prior distribution $p(\theta)$
    2. the noise was considered when getting the $d_{j}$, i.e. $d_{j}=h(\theta_{j})+noise$

3. $c_{j}$ is the indicator of whether the Bayesian credible set approximation contains the $\theta_{j}$

#### Following is the process of generating 5000 samples and the test signal d_{o}

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from tqdm import tqdm

from utils import PowerSpectralDensity, FFT, freq_PSD, inner_prod, waveform
from cal_fun import deriv_waveform,Gaussian,llike
#----------------------------------------------------
###
# step 1
###
#Settings for simulation
#----------------------------------------------------
prior_sample_num=5000
prior_range=1e-13
fdot_true=1e-8
eps_true=1e-6

tmax =  120*60*60                 # Final time
fs = 2*1e-3                     # Sampling rate
delta_t = np.floor(0.01/fs)       # Sampling interval
t = np.arange(0,tmax,delta_t)     # Form time vector from t0 = 0 to t_{n-1} = tmax. Length N [include zero]

#----------------------------------------------------
###
# step 2
###
#generate the test signal
#the test signal is without noise
#----------------------------------------------------
h_true_t = waveform(fdot_true,t,0)
h_true_f = FFT(h_true_t) 
# the test signal d_{o} is without noise
data_f = h_true_f          # Construct data stream d_{o}

y_sample=np.zeros((prior_sample_num+1,len(data_f)))
y_sample[0]=data_f.real
c=np.zeros(prior_sample_num)
nominal_l=np.zeros(prior_sample_num)

#------------------------------------------------------------
###
# step 3
###
#simulation process
#for each loop simulation the dataset {\alpha, d, c}
#------------------------------------------------------------
for s in tqdm(range(prior_sample_num)):
    
    np.random.seed(s*2023)
    #------------------------------------------------------------
    # generate fdot_prior
    #------------------------------------------------------------

    fdot_prior_sample = np.random.uniform(1e-8-prior_range,1e-8+prior_range)  
    true_params = fdot_prior_sample

    #------------------------------------------------------------
    # generate simulated signal
    #------------------------------------------------------------
    tmax =  120*60*60                 # Final time
    fs = 2*1e-3                     # Sampling rate
    delta_t = np.floor(0.01/fs)       # Sampling interval
    t = np.arange(0,tmax,delta_t)     # Form time vector from t0 = 0 to t_{n-1} = tmax. Length N [include zero]
    N_t = int(2**(np.ceil(np.log2(len(t)))))   # Round length of time series to a power of two. 
                                            
    h_true_t = waveform(true_params,t,0)
    h_true_f = FFT(h_true_t)         # Compute true signal in frequency domain

    freq,PSD = freq_PSD(t,delta_t)  # Extract frequency bins and PSD.

    variance_noise_f = N_t * PSD / (4 * delta_t)            # Calculate variance of noise, real and imaginary.
    N_f = len(variance_noise_f)                             # Length of signal in frequency domain

    # Generate frequency domain noise
    noise_f = np.random.normal(0,np.sqrt(variance_noise_f),N_f) + 1j*np.random.normal(0,np.sqrt(variance_noise_f),N_f) 
    
    # samples of from 𝑝(𝑑|𝜃)
    data_f = h_true_f + noise_f  # Construct data stream for d_{j}
    
    y_sample[s+1]=data_f.real  # consider the real part, we also tried |d(j)|^2 and |d(j)| but did not gain any improvement in the results
    
    #------------------------------------------------------------
    # posterior distribution approximation by Fisher Matrix
    #------------------------------------------------------------

    # Fisher Matrix computation
    exact_deriv_fdot_est = (0.5) * (2*np.pi * t**2)*(1-eps_true) * deriv_waveform(true_params,t, np.pi/2,eps_true)
    deriv_fdot_fft_est = FFT(exact_deriv_fdot_est)
    deriv_vec_est = deriv_fdot_fft_est
    Fisher_Matrix_est = inner_prod(deriv_vec_est,deriv_vec_est,PSD,delta_t,N_t)
    Cov_Matrix_est = 1/Fisher_Matrix_est
    precision_est = np.sqrt(Cov_Matrix_est)

    # compute bias from the true_value
    h_approx_t = waveform(true_params,t, eps_true)    # Compute approximate waveform model 
    h_approx_f = FFT(h_approx_t)                      # Compute approximate model frequency domain
    residuals_f = h_true_f - h_approx_f+noise_f

    biass = inner_prod(deriv_vec_est,residuals_f,PSD,delta_t,N_t)
    bias_para = biass/Fisher_Matrix_est    # Compute bias
    
    # samples from  𝑝(𝛼)
    nominal=np.random.uniform(0.78,0.97)  
    nominal_l[s]=nominal

    #------------------------------------------------------------
    #calucate c_{j}: whether the approximate Bayesian credible set contains the \theta_{j}
    #------------------------------------------------------------
    if np.abs(bias_para)<=norm.ppf(1-(1-nominal)/2)*precision_est:
        c[s]=1

np.save("./c_samples.npy",c)
np.save("./y_samples.npy",y_sample)
np.save("./nominal_samples.npy",nominal_l)

100%|██████████| 5000/5000 [03:28<00:00, 23.97it/s]
