In [None]:
%run fit_diagnostics.py
%run red_likelihood.py
%run red_utils.py
%run plot_utils.py
%run test_fns.py

In [None]:
%config Completer.use_jedi = False

## Final flagging from smooth_abs calfits

In [None]:
import numpy as np
from matplotlib.colors import LinearSegmentedColormap

In [None]:
plt.rcParams['figure.figsize'] = (12, 8)
%matplotlib inline

In [None]:
plot_figs = True
if plot_figs:
    import matplotlib as mpl
    mpl.rcParams['figure.dpi'] = 300

In [None]:
JD1 = 2458098.43869
JD_comp = 2458099
JD2 = match_lst(JD1, JD_comp, tint=0)
JD3 = match_lst(JD1, JD_comp, tint=-1)

In [None]:
band_edge = 50 # frequency channels to cut
add_ba = 14 # additional bad antennas

flags1 = np.mean(calfits_to_flags(JD1, 'smooth_abs', pol='ee', add_bad_ants=add_ba), \
                 axis=2).astype(int)[band_edge:-band_edge, :]
flags2 = np.mean(calfits_to_flags(JD2, 'smooth_abs', pol='ee', add_bad_ants=add_ba), \
                 axis=2).astype(int)[band_edge:-band_edge, :]
flags3 = np.mean(calfits_to_flags(JD3, 'smooth_abs', pol='ee', add_bad_ants=add_ba), \
                 axis=2).astype(int)[band_edge:-band_edge, :]

In [None]:
last_df = pd.read_pickle('jd_lst_map_idr2.pkl')
last1 = last_df[last_df['JD_time'] == JD1]['LASTs'].values[0]
last2 = last_df[last_df['JD_time'] == JD2]['LASTs'].values[0]
_, offset = find_nearest(last2, last1[0])

In [None]:
flagsc = np.concatenate((flags2[:, offset:], flags3[:, :offset]), axis=1) * 2
flagsf = flags1 + flagsc

In [None]:
fig, ax = plt.subplots(figsize=(11, 7))

# Define colors
colors = ((1.0, 1.0, 1.0), 'C2', 'C10', (1., 0., 0.))
cmap = LinearSegmentedColormap.from_list('Custom', colors, len(colors))

ax = sns.heatmap(flagsf.transpose(), cmap=cmap, vmin=0, vmax=3)
ax.xaxis.set_major_locator(ticker.MultipleLocator(50))
ax.xaxis.set_major_formatter(ticker.ScalarFormatter(useOffset=-50))
ax.yaxis.set_major_locator(ticker.MultipleLocator(5))
ax.yaxis.set_major_formatter(ticker.ScalarFormatter())
ax.set_xlabel('Frequency channel')
ax.set_ylabel('Time integration')

# Set the colorbar labels
colorbar = ax.collections[0].colorbar
colorbar.set_ticks(np.array([0., 0.75, 1.5, 2.25]) + 0.375)
colorbar.set_ticklabels(['False', '98', '99', 'Both'])
# colorbar.set_ticks(np.arange(0, 1.1, 0.1))

plt.tight_layout()
plt.show()

## Negative log-likelihood histograms

In [None]:
deg_df_path = 'deg_dfs'
rel_df_path = 'rel_dfs'

In [None]:
deg_df_j = find_deg_df(JD1, 'ee', 'jd', 'gaussian', dir=deg_df_path)
df_j = pd.read_pickle(deg_df_j)

In [None]:
flagsr = flagsf.ravel(order='F').astype(int).astype(bool)
deg_values = df_j['fun'].values.astype(float)

In [None]:
flagged_hist(deg_values, flagsr, xlabel=r'$-\ln(\mathcal{L}^G_\mathrm{deg})$', lower_cut=None, \
             upper_cut=0.008, bin_width=0.0002, hist_start=0, ylim=(0, 7000))

In [None]:
suspect_threshold = 0.009
suspect_slices = []
for i, n in enumerate(deg_values):
    if ~flagsr[i]:
        if n > suspect_threshold:
            suspect_slices.append(df_j.index.values[i] + (i, n))
suspect_slices            

### Median absolute normalized residual

In [None]:
df_j[['med_abs_norm_res_Re', 'med_abs_norm_res_Im']] = df_j.apply(lambda row: \
pd.Series(abs_residuals(row['norm_residual'])), axis=1)

df_j['med_abs_norm_res_comb'] = np.sqrt((df_j['med_abs_norm_res_Re']**2 + \
                                        df_j['med_abs_norm_res_Im']**2).values)

rman_values = df_j['med_abs_norm_res_comb'].values.astype(float)

In [None]:
%run plot_utils.py

In [None]:
flagged_hist(rman_values, flagsr, xlabel=r'$\mathcal{R}_{man}$', lower_cut=None, \
             upper_cut=0.2, bin_width=0.0025, hist_start=0, ylim=(0, 9000), figsize=(6,6))

In [None]:
suspect_threshold = 0.2
suspect_slices = []
for i, n in enumerate(rman_values):
    if ~flagsr[i]:
        if n > suspect_threshold:
            suspect_slices.append(df_j.index.values[i] + (i, n))
suspect_slices            

In [None]:
plot_res_heatmap(df_j, 'fun', index='time_int1', columns='freq', clip=True, clip_pctile=95, vmin=0)

### Normalize negative log-likelihoods by visibility amplitude mean/median

In [None]:
plot_res_heatmap(df_j, 'fun', index='time_int1', columns='freq', clip=True, clip_pctile=95)

In [None]:
rel_df_j = pd.read_pickle(find_rel_df(JD1, 'ee', 'gaussian', dir=rel_df_path))

In [None]:
with open(os.path.join('rel_dfs', 'rel_df.{}.{}.md.pkl'.format(JD1, 'ee')), 'rb') as f:
    md = pickle.load(f)

no_unq_bls = md['no_unq_bls']
no_min_p = 5 # number of columns in df that are attributes of the SciPy OptimizeResult 
vis_df = rel_df_j.iloc[:, no_min_p:no_unq_bls*2+no_min_p]

In [None]:
vis_df['vamp_mean'] = vis_df.apply(lambda row: \
                      np.mean(np.abs(makeCArray(row[:no_unq_bls*2].values.astype(float)))), \
                      axis=1)

vis_df['vamp_median'] = vis_df.apply(lambda row: \
                        np.median(np.abs(makeCArray(row[:no_unq_bls*2].values.astype(float)))), \
                        axis=1)

In [None]:
rel_df_j2a = pd.read_pickle(find_rel_df(JD2, 'ee', 'gaussian', dir=rel_df_path))
rel_df_j2b = pd.read_pickle(find_rel_df(JD3, 'ee', 'gaussian', dir=rel_df_path))

Nfreqs = rel_df_j.index.get_level_values('freq').unique().size
Ntints = rel_df_j.index.get_level_values('time_int').unique().size

indices = ['freq', 'time_int']
rel_df_j2a = rel_df_j2a[rel_df_j2a.index.get_level_values('time_int') >= offset]
rel_df_j2a.reset_index(inplace=True)
rel_df_j2a['time_int'] = np.tile(np.arange(Ntints - offset), Nfreqs)
rel_df_j2a.set_index(indices, inplace=True)

rel_df_j2b = rel_df_j2b[rel_df_j2b.index.get_level_values('time_int') < offset]
rel_df_j2b.reset_index(inplace=True)
rel_df_j2b['time_int'] = np.tile(np.arange(Ntints - offset, Ntints), Nfreqs)
rel_df_j2b.set_index(indices, inplace=True)

rel_df_j2 = pd.concat([rel_df_j2a, rel_df_j2b])

In [None]:
vis_df2 = rel_df_j2.iloc[:, no_min_p:no_unq_bls*2+no_min_p]

In [None]:
vis_df2['vamp_mean'] = vis_df2.apply(lambda row: \
                       np.mean(np.abs(makeCArray(row[:no_unq_bls*2].values.astype(float)))), \
                       axis=1)

vis_df2['vamp_median'] = vis_df2.apply(lambda row: \
                         np.median(np.abs(makeCArray(row[:no_unq_bls*2].values.astype(float)))), \
                         axis=1)

In [None]:
vis_df['vamp_mean_xdmean'] = (vis_df['vamp_mean'] + vis_df2['vamp_mean']) / 2
vis_df['vamp_median_xdmean'] = (vis_df['vamp_median'] + vis_df2['vamp_median']) / 2

In [None]:
plot_res_heatmap(vis_df, 'vamp_mean', clip=True, clip_pctile=97)

In [None]:
plot_res_heatmap(vis_df, 'vamp_median', clip=True, clip_pctile=97)

In [None]:
plot_res_heatmap(vis_df, 'vamp_mean_xdmean', clip=True, clip_pctile=97)

In [None]:
plot_res_heatmap(vis_df, 'vamp_median_xdmean', clip=True, clip_pctile=97)

In [None]:
vis_df.reset_index(inplace=True)
vis_df.rename(columns={'time_int': 'time_int1'}, inplace=True)
vis_df.set_index(['time_int1', 'freq'], inplace=True)

In [None]:
df_j['mean_nrm_nll'] = df_j['fun'] / vis_df['vamp_mean']**2
df_j['median_nrm_nll'] = df_j['fun'] / vis_df['vamp_median']**2

In [None]:
plot_res_heatmap(df_j, 'mean_nrm_nll', index='time_int1', columns='freq', clip=True, clip_pctile=97)

In [None]:
plot_res_heatmap(df_j, 'median_nrm_nll', index='time_int1', columns='freq', clip=True, clip_pctile=97)

In [None]:
mean_nrm_nll = df_j['mean_nrm_nll'].values.astype(float)
median_nrm_nll = df_j['median_nrm_nll'].values.astype(float)

In [None]:
flagged_hist(mean_nrm_nll, flagsr, xlabel=r'$-\ln(\mathcal{L}^G_\mathrm{deg}) \; / \; \overline{\left| V_\mathrm{obs} \right|}$', \
             lower_cut=None, upper_cut=25, bin_width=0.5, hist_start=0, ylim=None)

In [None]:
flagged_hist(median_nrm_nll, flagsr, xlabel=r'$-\ln(\mathcal{L}^G_\mathrm{deg}) \; / \; \mathrm{med} \left( \left| V_\mathrm{obs} \right| \right)}$', lower_cut=None, \
             upper_cut=7, bin_width=0.1, hist_start=0, ylim=None)

In [None]:
suspect_threshold = 7.5
suspect_slices = []
for i, n in enumerate(median_nrm_nll):
    if ~flagsr[i]:
        if n > suspect_threshold:
            suspect_slices.append(df_j.index.values[i] + (n,))
suspect_slices            

### Noise from autos

In [None]:
# Load inferred noise on data
noise_file = os.path.join('..', 'zen.{}.HH.noise_std.uvh5'.format(JD1))
hd_noise = HERAData(noise_file)
noise, noise_flags, _  = hd_noise.read()
RedG = md['redg']

In [None]:
bl_var = np.empty((RedG.shape[0], md['Ntimes'], md['Nfreqs']), dtype=complex)
for i in range(bl_var.shape[0]):
    bl_var[i, ...] = noise[(int(RedG[i, 1]), int(RedG[i, 1]), 'ee')] * \
                     noise[(int(RedG[i, 2]), int(RedG[i, 2]), 'ee')]

In [None]:
fig, ax = plt.subplots(figsize=(11, 7))
arr = np.absolute(bl_var.mean(axis=0))[:, 50:-50]
vmax = numpy.ceil(numpy.nanpercentile(arr, 92)*100)/100
ax = sns.heatmap(arr, vmax=vmax, vmin=None, cmap=sns.cm.rocket_r, center=None)
ax.xaxis.set_major_locator(ticker.MultipleLocator(50))
ax.xaxis.set_major_formatter(ticker.ScalarFormatter(useOffset=-50))
ax.yaxis.set_major_locator(ticker.MultipleLocator(5))
ax.yaxis.set_major_formatter(ticker.ScalarFormatter())
ax.set_xlabel('freq')
ax.set_ylabel('time_int')
plt.tight_layout()
plt.show()

In [None]:
noise_nrm_nll = df_j['fun'].values.astype(float) / arr.ravel()

In [None]:
fig, ax = plt.subplots(figsize=(11, 7))
arr = noise_nrm_nll.reshape((60, 924))
vmax = numpy.ceil(numpy.nanpercentile(arr, 95)*10000)/10000
ax = sns.heatmap(arr, vmax=vmax, vmin=0, cmap=sns.cm.rocket_r, center=None)
ax.xaxis.set_major_locator(ticker.MultipleLocator(50))
ax.xaxis.set_major_formatter(ticker.ScalarFormatter(useOffset=-50))
ax.yaxis.set_major_locator(ticker.MultipleLocator(5))
ax.yaxis.set_major_formatter(ticker.ScalarFormatter())
ax.set_xlabel('freq')
ax.set_ylabel('time_int')
plt.tight_layout()
plt.show()

In [None]:
flagged_hist(noise_nrm_nll, flagsr, xlabel=r'$-\ln(\mathcal{L}^G_\mathrm{deg}) \; / \; \sigma_{98}^2} $', lower_cut=None, \
             upper_cut=vmax, bin_width=None, hist_start=0, ylim=None)

In [None]:
suspect_threshold = 0.0001
suspect_slices = []
for i, n in enumerate(noise_nrm_nll):
    if ~flagsr[i]:
        if n > suspect_threshold:
            suspect_slices.append(df_j.index.values[i] + (n,))
suspect_slices

In [None]:
# Load inferred noise on data from 2nd dat
noise_file = os.path.join('..', 'zen.{}.HH.noise_std.uvh5'.format(JD2))
hd_noise = HERAData(noise_file)
noise, _, _  = hd_noise.read()

bl_var2a = np.empty((RedG.shape[0], md['Ntimes'], md['Nfreqs']), dtype=complex)
for i in range(bl_var2a.shape[0]):
    bl_var2a[i, ...] = noise[(int(RedG[i, 1]), int(RedG[i, 1]), 'ee')] * \
                       noise[(int(RedG[i, 2]), int(RedG[i, 2]), 'ee')]
    
# Load inferred noise on data from 2nd dat
noise_file = os.path.join('..', 'zen.{}.HH.noise_std.uvh5'.format(JD3))
hd_noise = HERAData(noise_file)
noise, _, _  = hd_noise.read()

bl_var2b = np.empty((RedG.shape[0], md['Ntimes'], md['Nfreqs']), dtype=complex)
for i in range(bl_var2b.shape[0]):
    bl_var2b[i, ...] = noise[(int(RedG[i, 1]), int(RedG[i, 1]), 'ee')] * \
                       noise[(int(RedG[i, 2]), int(RedG[i, 2]), 'ee')]

In [None]:
bl_var2 = np.concatenate((bl_var2a[:, offset:, :], bl_var2a[:, :offset, :]), axis=1)

In [None]:
bl_varca = (bl_var + bl_var2) / 2

In [None]:
from scipy.stats import mode
hd = HERAData(find_zen_file(JD1))
reds = fltBad(get_reds(hd.antpos, pols=['ee']), get_bad_ants(find_zen_file(JD_time=JD1)))
bl_types = RedG[:, 0]
slct_bl_type_id = mode(bl_types)[0][0] # selecting modal value for baseline type
slct_bl_type = reds[slct_bl_type_id][0]
print(slct_bl_type) # 14 m EW baselines

In [None]:
ew_14_bls = numpy.where(RedG[:, 0] == slct_bl_type_id)[0]

In [None]:
bl_varcr = bl_varca[ew_14_bls, ...] # selecting only 14m EW baselines
bl_varcr = numpy.mean(np.abs(bl_varcr), axis=0) # average over 14m EW baselines

In [None]:
from scipy.signal import savgol_filter
from astropy.stats import sigma_clip

In [None]:
def nan_helper(y):
    return np.isnan(y), lambda z: z.nonzero()[0]

In [None]:
bl_varc = numpy.empty_like(bl_varcr)
for i in range(bl_varcr.shape[1]):
    bl_varc[:, i] = sigma_clip(bl_varcr[:, i], sigma=4, cenfunc='median').filled(fill_value=np.nan)
    nans, x = nan_helper(bl_varc[:, i])
    bl_varc[:, i][nans]= np.interp(x(nans), x(~nans), bl_varc[:, i][~nans])
    
# Savitzky-Golay filter
for i in range(bl_varc.shape[1]):
    bl_varc[:, i] = savgol_filter(bl_varc[:, i], window_length=17, polyorder=3, mode='interp')
    
bl_varc[bl_varc < 0] = 1e-8 # zero pad bottom

In [None]:
test_freq = 381
plt.plot(bl_varcr[:, test_freq])
plt.plot(bl_varc[:, test_freq])
plt.show()

In [None]:
from statsmodels.nonparametric.kernel_regression import KernelReg

In [None]:
clf = KernelReg(bl_varc[test_tint, :], numpy.arange(hd.Nfreqs), var_type='c', reg_type='ll', bw = 'cv_ls')

In [None]:
y_pred = clf.fit()[0]

In [None]:
mf = numpy.ma.masked_array(bl_varcr[test_tint, :], fff)

In [None]:
test_tint = 30
fff = numpy.append(numpy.zeros(50), numpy.append(flagsf[:, test_tint], numpy.zeros(50))).astype(bool)
plt.plot(bl_varcr[test_tint, :])
plt.plot(mf)
plt.plot(y_pred)
plt.ylim((0, 300))
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(11, 7))
arr = np.absolute(bl_varca.mean(axis=0))[:, 50:-50]
vmax = numpy.ceil(numpy.nanpercentile(arr, 93)*100)/100
ax = sns.heatmap(arr, vmax=vmax, vmin=0, cmap=sns.cm.rocket_r, center=None)
ax.xaxis.set_major_locator(ticker.MultipleLocator(50))
ax.xaxis.set_major_formatter(ticker.ScalarFormatter(useOffset=-50))
ax.yaxis.set_major_locator(ticker.MultipleLocator(5))
ax.yaxis.set_major_formatter(ticker.ScalarFormatter())
ax.set_xlabel('freq')
ax.set_ylabel('time_int')
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(11, 7))
snoise_arr = np.absolute(bl_varc)[:, 50:-50]
vmax = numpy.ceil(numpy.nanpercentile(snoise_arr, 93)*100)/100
ax = sns.heatmap(snoise_arr, vmax=vmax, vmin=0, cmap=sns.cm.rocket_r, center=None)
ax.xaxis.set_major_locator(ticker.MultipleLocator(50))
ax.xaxis.set_major_formatter(ticker.ScalarFormatter(useOffset=-50))
ax.yaxis.set_major_locator(ticker.MultipleLocator(5))
ax.yaxis.set_major_formatter(ticker.ScalarFormatter())
ax.set_xlabel('freq')
ax.set_ylabel('time_int')
plt.tight_layout()
plt.show()

In [None]:
noise_nrm_nll = df_j['fun'].values.astype(float) / snoise_arr.ravel()

In [None]:
fig, ax = plt.subplots(figsize=(11, 7))
arr = noise_nrm_nll.reshape((60, 924))
vmax = numpy.ceil(numpy.nanpercentile(arr, 95)*10000)/10000
ax = sns.heatmap(arr, vmax=vmax, vmin=0, cmap=sns.cm.rocket_r, center=None)
ax.xaxis.set_major_locator(ticker.MultipleLocator(50))
ax.xaxis.set_major_formatter(ticker.ScalarFormatter(useOffset=-50))
ax.yaxis.set_major_locator(ticker.MultipleLocator(5))
ax.yaxis.set_major_formatter(ticker.ScalarFormatter())
ax.set_xlabel('freq')
ax.set_ylabel('time_int')
plt.tight_layout()
plt.show()

In [None]:
%run plot_utils.py

In [None]:
flagged_hist(noise_nrm_nll, flagsr, xlabel=r'$-\ln(\mathcal{L}^G_\mathrm{deg}) \; / \; \sigma_{14m}^2 $', lower_cut=None, \
             upper_cut=0.000098, bin_width=vmax/50, hist_start=0, logy=False, ylim=None, figsize=(6, 6))

In [None]:
snoise_arr[1, 311]

In [None]:
noise_nrm_nll[34472] / snoise_arr.ravel()[34472]

In [None]:
0.000098

In [None]:
suspect_threshold = 0.000098
suspect_slices = []
for i, n in enumerate(noise_nrm_nll):
    if ~flagsr[i]:
        if n > suspect_threshold:
            tint, freq = df_j.index.values[i]
            suspect_slices.append(df_j.index.values[i] + (i, n, rman_values[i], deg_values[i], snoise_arr.ravel()[i],))
#             suspect_slices.append((deg_values[i], arr[i], rman_values[i])
suspect_slices.sort(key=lambda x: x[1])

In [None]:
suspect_slices

In [None]:
[(44, 908, 41514, 0.2005497625082851), (31, 910, 29504, 0.20712143320814252)]

In [None]:
i = 41514
print(df_j.index.values[i] + (i, noise_nrm_nll[i], rman_values[i], deg_values[i], snoise_arr.ravel()[i],))

In [None]:
vis_list = list(map(str, np.arange(md['no_unq_bls']*2).tolist()))

In [None]:
visC_df = vis_df[vis_list].apply(lambda row: makeCArray(row.values), axis=1)
visC_df = pd.DataFrame(visC_df.values.tolist(), index=visC_df.index)
# visC_tint_df = visC_df.loc[pd.IndexSlice[:, test_tint], :].droplevel(level=1)
# visamp_tint_df = np.abs(visC_tint_df)
# visphase_tint_df = visC_tint_df.apply(np.angle)

In [None]:
visC_14ew = visC_df[2]

In [None]:
%gui qt

In [None]:
import napari

In [None]:
a = visC_14ew.loc[:, 311].values
np.var(a.real) + np.var(a.imag)

In [None]:
plt.plot(np.angle(a))

In [None]:
plt.plot(np.abs(a))

In [None]:
a = vis_df[vis_list[::2]].groupby(level=['freq']).std()

In [None]:
a.mean(axis=1)[51]

In [None]:
a.loc[50, ]