This notebook estimates the noise level using auto-correlation visibilities and compare it with the posterior probability of the empirical standard deviation of the noise.

In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
from hera_cal.io import HERAData, HERACal
from scipy.optimize import curve_fit
import hera_pspec as hp
from pyuvdata import UVData
from scipy.stats import norm

In [None]:
# define the PDF of the complex double Gaussian (CNN) distribution, 
# which describes the real part of the power of the noise.
def real_pdf(z, s):
    a = 1/(s)
    b = (-np.abs(2*z))/(s)
    return a*np.exp(b)

# Compute logarithm of the posterior probability function
def log_prob(data, sigma, min_sig, max_sig):  
    
    # log of the prior prob of sigmas[i] -- assuming the prior is a Gaussian distribution
    sigmas = np.linspace(min_sig, max_sig, 1000)
    log_prior = norm.logpdf(sigmas, loc=sigma, scale=1)
    
    log_posterior = []
    for i in range(len(sigmas)):
        log_likelihood = []
        for j in range(len(data)):
            # compute log of the likelihood
            log_likelihood.append(np.log(real_pdf(data[j], sigmas[i])))
        log_posterior.append(np.sum(log_likelihood) + log_prior[i])     
    
    # return the posterior probability for each parameter sigma   
    return [sigmas, np.asarray(log_posterior)]

In [None]:
# load beam model
beamfile = 'HERA_NF_dipole_power.beamfits'
cosmo = hp.conversions.Cosmo_Conversions()
uvb = hp.pspecbeam.PSpecBeamUV(beamfile, cosmo=cosmo)

In [None]:
# load data into UVData objects
dfile = 'zen.2458101.clean-002.uvh5'
uvd = UVData()
uvd.read(dfile)

In [None]:
# find conversion factor from Jy to mK
Jy_to_mK = uvb.Jy_to_mK(np.unique(uvd.freq_array), pol='XX')
uvd.data_array *= Jy_to_mK[None, None, :, None]

In [None]:
spw_range = [520, 690]

# get time between integration [s]
t = uvd.integration_time[0]

# get channel width [Hz]
b = ((uvd.freq_array[0][spw_range[1]] - uvd.freq_array[0][spw_range[0]]))/(spw_range[1]-spw_range[0])
# or b = uvd.freq_array[0][1]-uvd.freq_array[0][0]

In [None]:
# get auto-correlation visibilities
v1 = uvd.get_data([83,83,'xx'])
v2 = uvd.get_data([84,84,'xx'])

# averaging by time
avg_v1 = np.mean(v1, axis=0)
avg_v2 = np.mean(v2, axis=0)

In [None]:
# estimate the noise level in frequency space using auto-visibilities
s_n = (avg_v1*avg_v2)/(b*t)
# get s_n in delay space (Parseval's theorem)
s_fftn = len(avg_v1) * s_n

During the derivation of the $\mathcal{CNN}$ distribution, we have assumed the standard deviation of the power (noise) is the product of the standard deviation values of visibilities. Let $\sigma_p$ be the std of the power, then $\sigma_p = \sqrt{\left(\frac{\sigma^2_{\tilde{n}}}{B_{full}}\right)^2}$, where $B_full$ is fullband width and $\sigma^2_{\tilde{n}}$ is the variance of noise in delay space.

In [None]:
# divided by fullband width to get the noise level in pspec
s_p = np.sqrt((s_fftn / (uvd.freq_array[0][1] - uvd.freq_array[0][0]))**2)

# get the maximum and minimum values in s_n
max_s = max(s_p.real)
min_s = min(s_p.real)

In [None]:
# create uvd object from data between time[16] and time[45]
uvd1 = uvd.select(times=np.unique(uvd.time_array)[16:45], inplace=False)

In [None]:
# estimate auto-baseline power spectrum *without normalization*
ds = hp.PSpecData(dsets=[uvd1, uvd1], wgts=[None, None], beam=None)
ds.rephase_to_dset(0)
ds.dsets[0].vis_units = 'mK'
ds.dsets[1].vis_units = 'mK'
baselines = [(83, 84)]
uvp = ds.pspec(baselines, baselines, (0, 1), [('xx', 'xx')], spw_ranges=[(520, 690)], 
               input_data_weight='identity', norm='I',
               taper='none', verbose=True)

spw = 0
dlys = uvp.get_dlys(spw) * 1e9
blp = ((83, 84), (83, 84))
key = (spw, blp, 'xx')
power = np.real(uvp.get_data(key))

In [None]:
# select power at high delays and average the power by time
data = np.mean(power[:,110:150],axis=0)

# fit CNN to time-average high-delay power to get an empirical std
y, x = np.histogram(data, bins='auto', density=True)
x = (x + np.roll(x, -1))[:-1] / 2.0
sig = curve_fit(real_pdf, x, y, p0=np.std(data))[0]

# compute the log of the posterior prob function of the empirical std
log_p = log_prob(data, sig, sig-50, sig+50)

In [None]:
# plot sigma_n and sigma_p
plt.figure(figsize=(8, 5))
plt.plot([max_s, max_s], [min(log_p[1]), max(log_p[1])], label='max $\sigma_{auto}$=%.2f'% max_s)
plt.plot([min_s, min_s], [min(log_p[1]), max(log_p[1])], label='min $\sigma_{auto}$=%.2f'% min_s)
plt.plot(log_p[0], log_p[1], label='$\sigma_{pspec}$=%.2f'% tuple(sig))
plt.xlabel("$\sigma$", fontsize=14)
plt.legend(fontsize=10)
plt.ylabel("$\ln(Pr(\sigma|P))$", fontsize=14)
plt.show()