In [None]:
import matplotlib
import numpy as np
import pandas as pd
import pickle
from matplotlib import pyplot as plt

from plot_utils import df_heatmap
from red_likelihood import decomposeCArray, degVis, makeCArray, red_ant_sep
from red_utils import find_nearest, find_deg_df, find_rel_df, \
match_lst

In [None]:
%matplotlib inline

In [None]:
jd_time = 2458098.43869
jd_anchor = 2458099
pol = 'ee'
dist = 'gaussian'
dir_path = '/Users/matyasmolnar/Downloads/HERA_Data/robust_cal/simpleredcal'
coords = 'cartesian'

In [None]:
plot_figs = False
if plot_figs:
    import matplotlib as mpl
    mpl.rcParams['figure.dpi'] = 300

# Comparing a pair of datasets

## Loading 1st relatively calibrated dataframe

In [None]:
with open('rel_df.{}.{}.md.pkl'.format(jd_time, pol), 'rb') as f:
    md = pickle.load(f)
    
indices = ['freq', 'time_int']
resid_cols = ['residual', 'norm_residual']
vis_list = list(map(str, np.arange(md['no_unq_bls']*2).tolist()))
cvis_list = ['C' + vis_id for vis_id in list(map(str, np.arange(md['no_unq_bls']).tolist()))]
gain_list = list(map(str, np.arange(md['no_unq_bls']*2, (md['no_unq_bls'] + md['no_ants'])*2 ).tolist()))

rel_df_path = find_rel_df(jd_time, pol, dist, dir_path)
rel_df = pd.read_pickle(rel_df_path)
rel_df.drop(columns=resid_cols, inplace=True)

Nfreqs = rel_df.index.get_level_values('freq').unique().size
Ntints = rel_df.index.get_level_values('time_int').unique().size

## Loading 2nd relatively calibrated dataframe

Due to an offset in LAST, two relatively calibrated dataframes must be merged, with the appropriate cuts in LAST to align the merged dataframe with the 1st one

In [None]:
# find dataset from specified JD that contains visibilities at the same LAST
jd_time2 = match_lst(jd_time, jd_anchor)
rel_df_path2 = find_rel_df(jd_time2, pol, dist, dir_path)

# aligning datasets in LAST
last_df = pd.read_pickle('jd_lst_map_idr2.pkl')
last1 = last_df[last_df['JD_time'] == jd_time]['LASTs'].values[0]
last2 = last_df[last_df['JD_time'] == jd_time2]['LASTs'].values[0]
_, offset = find_nearest(last2, last1[0])

rel_df2 = pd.read_pickle(rel_df_path2)
rel_df2 = rel_df2[rel_df2.index.get_level_values('time_int') >= offset]
# shifting tints to align with those from jd_time
rel_df2.reset_index(inplace=True)
rel_df2['time_int'] = np.tile(np.arange(Ntints - offset), Nfreqs)
rel_df2.set_index(indices, inplace=True)

next_row = np.where(last_df['JD_time'] == jd_time2)[0][0] + 1
rel_df_path3 = find_rel_df(last_df.iloc[next_row]['JD_time'], pol, \
                           dist, dir_path)
rel_df3 = pd.read_pickle(rel_df_path3)
rel_df3 = rel_df3[rel_df3.index.get_level_values('time_int') < offset]
# shifting tints to align with those from jd_time
rel_df3.reset_index(inplace=True)
rel_df3['time_int'] = np.tile(np.arange(Ntints - offset, Ntints), Nfreqs)
rel_df3.set_index(indices, inplace=True)

# combined results dataframes that is now alinged in LAST by row number
# with rel_df:
rel_df_c = pd.concat([rel_df2, rel_df3])
rel_df_c.sort_index(inplace=True)
rel_df_c.drop(columns=resid_cols + gain_list, inplace=True)

## Degenerate transformation of the 1st dataframe

In [None]:
min_list = ['success', 'status', 'message', 'fun', 'nit']
rel_df_d = rel_df[min_list].copy()
rel_df_d = rel_df_d.reindex(columns=rel_df_d.columns.values.tolist() + vis_list)
rel_df_d.sample(5).sort_index()

In [None]:
deg_df_path = find_deg_df(jd_time, pol, 'jd.{}'.format(jd_anchor), dist, dir_path)
deg_df = pd.read_pickle(deg_df_path)

In [None]:
deg_df_d = deg_df[['0', '1', '2']].copy().reset_index()
deg_df_d.rename(columns={'time_int1': 'time_int', '0': 'amp', '1': 'tilt_x', '2':'tilt_y'}, inplace=True)
deg_df_d.set_index(indices, inplace=True)
deg_df_d.sort_index(inplace=True)
rel_df.drop(columns=gain_list, inplace=True)
rel_df = rel_df.join(deg_df_d)
rel_df.sample(5).sort_index()

In [None]:
ant_sep = red_ant_sep(md['redg'], md['antpos'])
rel_df_d[vis_list] = rel_df.apply(lambda row: pd.Series(decomposeCArray(degVis(ant_sep, \
                     makeCArray(row[len(min_list):len(min_list) + md['no_unq_bls']*2].values.astype(float)), 
                     *row[-3:].values.astype(float)))), axis=1)

In [None]:
rel_df_d.sample(5).sort_index()

## Combining degenerately consistent dataframes

In [None]:
# merging dataframes
rel_df_d['JD'] = int(jd_time)
rel_df_c['JD'] = int(jd_anchor)

rel_df_t = pd.concat([rel_df_d, rel_df_c])

rel_df_t.reset_index(inplace=True)
rel_df_t.set_index(['freq', 'time_int', 'JD'], inplace=True)
rel_df_t.sort_index(inplace=True)

In [None]:
rel_df_t.sample(5).sort_index()

## Statistics on combined dataframe

In [None]:
rel_df_t[vis_list].groupby(level=['freq', 'time_int']).mean()

In [None]:
rel_df_t[vis_list].groupby(level=['freq', 'time_int']).std()

## Plots

### Single time integration

In [None]:
time_integration = 40

In [None]:
# selecting time integration
rel_df_tint = rel_df_t.xs(time_integration, level='time_int', drop_level=True)
# turning into complex values
rel_df_tintc = rel_df_tint.apply(lambda row: pd.Series(makeCArray(row[vis_list].values.astype(float))), \
                                 axis=1)

In [None]:
rel_df_tintc

In [None]:
vis_abs_mean = rel_df_tintc.abs().groupby('freq').mean()

In [None]:
piv = pd.pivot_table(vis_abs_mean, columns='freq')
vmax = np.nanpercentile(piv.values, 95)
vmin = 0
df_heatmap(piv, xbase=50, ybase=5, vmax=vmax, vmin=vmin, \
           title='Mean visibility amplitudes for time_integration {}'.format(time_integration), \
           xlabel='Frequency', \
           ylabel='Redundant Baseline Group')

In [None]:
re_df = rel_df_tint.drop(columns=min_list).iloc[:, np.arange(2*md['no_unq_bls'], step=2)]\
        .groupby('freq').var()
im_df = rel_df_tint.drop(columns=min_list).iloc[:, np.arange(1, 2*md['no_unq_bls'], step=2)]\
        .groupby('freq').var()
re_df.columns = np.arange(md['no_unq_bls'])
im_df.columns = np.arange(md['no_unq_bls'])
var_df = re_df + im_df

In [None]:
piv = pd.pivot_table(var_df, columns='freq')
vmax = np.nanpercentile(piv.values, 95)
vmin = 0
df_heatmap(piv, xbase=50, ybase=5, vmax=vmax, vmin=vmin, \
           title='Visibility variance for time_integration {}'.format(time_integration), \
           xlabel='Frequency', \
           ylabel='Redundant Baseline Group')

# Statistics over multiple JDs

Starting from the JDs from the previous section, we add further JDs that cover the same LAST range by aligning them in LAST and degenerately transforming them to be consistent with the anchor day (JD 2458099 in this case)

In [None]:
idr2_jds = [2458098, 2458099, 2458101, 2458102, 2458103, 2458104, 2458105, \
            2458106, 2458107, 2458108, 2458109, 2458110, 2458111, 2458112, \
            2458113, 2458114, 2458115, 2458116, 2458140]
# 2458109 to be removed from idr2_jds as it has different antennas (antenna 14 flagged there)
jd_comp = [2458101]

In [None]:
# find dataset from specified JD that contains visibilities at the same LAST

jd_ci = jd_comp[0]

#### load rel cal dataframe and align in LAST ####

jd_timei = match_lst(jd_time, jd_ci)
rel_df_path2 = find_rel_df(jd_timei, pol, dist, dir_path)

# aligning datasets in LAST
lasti = last_df[last_df['JD_time'] == jd_timei]['LASTs'].values[0]
_, offset = find_nearest(lasti, last1[0])

rel_dfi = pd.read_pickle(rel_df_path2)
rel_dfi = rel_dfi[rel_dfi.index.get_level_values('time_int') >= offset]
# shifting tints to align with those from jd_time
rel_dfi.reset_index(inplace=True)
rel_dfi['time_int'] = np.tile(np.arange(Ntints - offset), Nfreqs)
rel_dfi.set_index(indices, inplace=True)

next_row = np.where(last_df['JD_time'] == jd_timei)[0][0] + 1
jd_timei2 = last_df.iloc[next_row]['JD_time']
rel_df_pathj = find_rel_df(jd_timei2, pol, dist, dir_path)
rel_dfj = pd.read_pickle(rel_df_pathj)
rel_dfj = rel_dfj[rel_dfj.index.get_level_values('time_int') < offset]
# shifting tints to align with those from jd_time
rel_dfj.reset_index(inplace=True)
rel_dfj['time_int'] = np.tile(np.arange(Ntints - offset, Ntints), Nfreqs)
rel_dfj.set_index(indices, inplace=True)

# combined results dataframes that is now alinged in LAST by row number
# with rel_df:
rel_dfk = pd.concat([rel_dfi, rel_dfj])
rel_dfk.sort_index(inplace=True)
rel_dfk.drop(columns=resid_cols + gain_list, inplace=True)


#### degenerate transformation ####

# load rel cal dataframe and align in LAST 

deg_df_pathi = find_deg_df(jd_timei, pol, 'jd.{}'.format(jd_anchor), dist, dir_path)
deg_dfi = pd.read_pickle(deg_df_pathi)
deg_dfi = deg_dfi[deg_dfi.index.get_level_values('time_int1') >= offset]
deg_dfi.sort_index(level=['freq', 'time_int1'], inplace=True)
deg_dfi.reset_index(inplace=True)
deg_dfi['time_int1'] = np.tile(np.arange(Ntints - offset), Nfreqs)
deg_indices = ['freq', 'time_int1']
deg_dfi.set_index(deg_indices, inplace=True)

deg_df_pathj = find_deg_df(jd_timei2, pol, 'jd.{}'.format(jd_anchor), dist, dir_path)
deg_dfj = pd.read_pickle(deg_df_pathj)
deg_dfj = deg_dfj[deg_dfj.index.get_level_values('time_int1') < offset]
deg_dfj.sort_index(level=['freq', 'time_int1'], inplace=True)
deg_dfj.reset_index(inplace=True)
deg_dfj['time_int1'] = np.tile(np.arange(Ntints - offset, Ntints), Nfreqs)
deg_dfj.set_index(deg_indices, inplace=True)

deg_dfk = pd.concat([deg_dfi, deg_dfj])
deg_dfk.sort_index(inplace=True)


# degenerate transformation of redundant visibility solutions

deg_dfk = deg_dfk[['0', '1', '2']].copy().reset_index()
deg_dfk.rename(columns={'time_int1': 'time_int', '0': 'amp', '1': 'tilt_x', '2':'tilt_y'}, inplace=True)
deg_dfk.set_index(indices, inplace=True)
deg_dfk.sort_index(inplace=True)
rel_dfk = rel_dfk.join(deg_dfk)

rel_df_di = rel_df[min_list].copy()
rel_df_di = rel_df_di.reindex(columns=rel_df_di.columns.values.tolist() + vis_list)

rel_df_di[vis_list] = rel_dfk.apply(lambda row: pd.Series(decomposeCArray(degVis(ant_sep, \
                      makeCArray(row[len(min_list):len(min_list) + md['no_unq_bls']*2].values.astype(float)), 
                      *row[-3:].values.astype(float)))), axis=1)


# merging dataframes

rel_df_di['JD'] = int(jd_timei)
rel_df_di.reset_index(inplace=True)
rel_df_di.set_index(['freq', 'time_int', 'JD'], inplace=True)
rel_df_di.sort_index(inplace=True)

rel_df_t = pd.concat([rel_df_t, rel_df_di])
rel_df_t.sort_index(inplace=True)

In [None]:
rel_df_t