## Send MWA data to JSON format for training

In [1]:
import sys
sys.path.append('/users/jmduchar/data/jmduchar/Research/mcgill25/rfi_characterization/python/')
from utils import *

import numpy as np
import yaml
import os
import glob
import json
from astropy.io import fits
import h5py
import matplotlib.pyplot as plt
from mapper import bg_subtract
plt.style.use('seaborn-v0_8')

In [2]:
def get_pointing(data, pointing):
    """
    Given a dictionary mapping obsids to pointings, and the desired pointing,
    return *all* obsids for that pointing.

    Parameters:
        data (dict): Dictionary of obsid strings mapped to integer values.
        pointing (int): The desired pointing

    Returns:
        list: Complete list of obsids from that poiting
    """
    result = []
    for i in data:
        if data[i] == pointing:
            result.append(i)

    return result  # return the pointing and all associated obsids for that night

In [3]:
# Get data
dirpath = '/users/jmduchar/data/jmduchar/Research/mcgill25/ssins_data/'
# reference_obs = '1093799552'  # known to contain RFI
# reference_obs = '1094662784'
reference_obs = '1090867840'

with open(dirpath+'gridpoint_dict.yaml', 'r') as file:
    data = yaml.safe_load(file)
    
pointing, obsids = get_night(data=data, target_obsid=reference_obs)
# obsids = get_pointing(data, pointing=0)
# obsids.sort()

len(obsids)

15

In [None]:
subtracted_data = bg_subtract(data_dir=dirpath+'tars/', obsids=obsids, N_terms=24)
subtracted_data = subtracted_data[~np.isnan(subtracted_data)]

subtracted_data.shape

channel_width not available in file, computing it from the freq_array spacing.
Antenna metadata are missing for this file. Since this is MWA data, the best way to fill in these metadata is to pass in an mwa_metafits_file which contains information about which antennas were connected when the data were taken. Since that was not passed, the antenna metadata will be filled in from a static csv file containing all the antennas that could have been connected.
channel_width not available in file, computing it from the freq_array spacing.
Antenna metadata are missing for this file. Since this is MWA data, the best way to fill in these metadata is to pass in an mwa_metafits_file which contains information about which antennas were connected when the data were taken. Since that was not passed, the antenna metadata will be filled in from a static csv file containing all the antennas that could have been connected.
invalid value encountered in multiply
invalid value encountered in divide
sig_arra

In [None]:
cb = np.load('../data/coarse_bands_1D.npy')
ssins_list = []

for obsid in obsids:
    file = dirpath + f"tars/{obsid}_SSINS_data.h5"
    
    with h5py.File(file, "r") as hf:
        
        # Access the SSINS
        ssins_rep = hf['Data']['metric_array'][:][:,:,0]  # xx polarization
        
        # Remove CB
        ssins_rep = ssins_rep[:,np.invert(cb)]
        
        # Avg across DTV7
        ssins_rep = np.mean(ssins_rep[:,350:525], axis=1)

        # Remove inf
        ssins_rep = ssins_rep[~np.isinf(ssins_rep)]

        # Append to list
        ssins_list.append(ssins_rep)

In [None]:
ssins = np.asarray(ssins_list).flatten()
ssins -= np.median(ssins)

In [None]:
plt.figure(figsize=(6,3))
plt.plot(ssins, '.-')
plt.title("Amplitude of SSINS averaged across DTV7 (night of 1093799552)", y=1.02)
plt.xlabel("time-step")
plt.ylabel("amplitude (median-subtracted)")
# plt.yscale('symlog')
plt.ylim(-15,15)
# plt.savefig("./sample.png",dpi=300,bbox_inches='tight')
plt.show()

plt.figure(figsize=(6,3))
plt.plot(subtracted_data, '.-')
plt.title("Amplitude of SSINS averaged across DTV7 (night of 1093799552)\nBackground-subtracted", y=1.02)
plt.xlabel("time-step")
plt.ylabel("amplitude")
# plt.yscale('symlog')
plt.ylim(-15,20)
# plt.savefig("./sample.png",dpi=300,bbox_inches='tight')
plt.show()

In [None]:
# Structure the data for Stan
stan_data = {
    "N": subtracted_data.shape[0],
    "y": subtracted_data.tolist(),
}

In [None]:
stan_data

In [None]:
# Write to JSON
with open("../data/three_state.data.json", "w") as f:
    json.dump(stan_data, f)

In [None]:
np.median(subtracted_data)