In [1]:
# grandwin/flagging/load_outliers.py

import h5py
import numpy as np
import pandas as pd
import argparse
import os

def load_outliers_from_h5(file_path, polarizations):
    with h5py.File(file_path, "r") as f:
        outliers_mask = f["outliers_mask"][:]
        obs_id = f["obs_id"][:].astype(str)
        time_blocks = f["time_blocks"][:]

    print("Outliers: ", outliers_mask.shape)

    flat_data = outliers_mask.reshape(-1, outliers_mask.shape[-1])
    
    # Create DataFrame
    df_outliers = pd.DataFrame(flat_data, columns=["XX", "XY", "YX", "YY"])

    df_outliers["obs_id"] = np.tile(obs_id, outliers_mask.shape[1]*outliers_mask.shape[2])
    df_outliers["time_index"] = np.repeat(np.arange(outliers_mask.shape[0]), outliers_mask.shape[1] * outliers_mask.shape[2])
    df_outliers["frequency"] = np.tile(np.arange(outliers_mask.shape[2]), outliers_mask.shape[0] * outliers_mask.shape[1])
    df_outliers["antenna"] = np.tile(np.repeat(np.arange(outliers_mask.shape[1]), outliers_mask.shape[2]), outliers_mask.shape[0])
    df_outliers["obs_id"] = df_outliers['time_index'].map(lambda t: int(obs_id[t]))
    df_outliers["timeblock"] = df_outliers['time_index'].map(lambda t: int(time_blocks[t]))

    selected_polarizations = polarizations  

    # Filter rows where any of the selected columns is True
    df_outliers = df_outliers[df_outliers[selected_polarizations].any(axis=1)].reset_index(drop=True)

    return df_outliers

In [None]:
# grandwin/flagging/apply_flags.py

from astropy.io import fits

def expand_timeblocks(timeblocks, win_step, uv_step):
    factor = int(win_step / uv_step)
    return np.array([tb * factor + i for tb in timeblocks for i in range(factor)])

def flag_uvfits_data(obs_id, uvfits_path, df_outliers, output_path, win_step, uv_step, output_dir):
    total_flags = []

    uv = fits.open(uvfits_path)

    flagsb = np.count_nonzero(uv[0].data.data[:, :, :, :, :, 2] < 0)
    datatot = np.count_nonzero(uv[0].data.data[:, :, :, :, :, 2] < 0) + np.count_nonzero(uv[0].data.data[:, :, :, :, :, 2] >= 0)
    print("Data shape before: ", uv[0].data.data.shape)
    print("Flags before: ", flagsb)
    print("Total data: ", datatot)

    _, unique_id = np.unique(uv[0].data['DATE'], return_inverse=True)

    dtimeblocks = np.unique(df_outliers['timeblock'])

    for j in range(len(dtimeblocks)):
        dfreqs = np.unique(df_outliers[df_outliers['timeblock'] == dtimeblocks[j]]['frequency'])
        print(dfreqs)

        extimeblocks = expand_timeblocks(df_outliers[df_outliers['timeblock'] == dtimeblocks[j]]['timeblock'].unique(), win_step, uv_step)
        print(extimeblocks)

        blindices = np.where(np.isin(unique_id, extimeblocks))[0]
        print(blindices)

        uv[0].data.data[np.ix_(blindices, [0], [0], dfreqs, [0,1,2,3], [2])] = np.abs(uv[0].data.data[np.ix_(blindices, [0], [0], dfreqs, [0,1,2,3], [2])]) * -1

        print("The data that being flagged ", len(dfreqs)*len(blindices)*4)
        total_flags.append(len(dfreqs)*len(blindices)*4)

    flagsa = np.count_nonzero(uv[0].data.data[:, :, :, :, :, 2] < 0)
    
    print("Data shape after: ", uv[0].data.data.shape)
    print("Flags after: ", flagsa)
    print(np.sum(total_flags))

    uv.writeto(output_path, overwrite=True)
    uv.close()

    g = str(obs_id) + ', ' + str(flagsb) + ', ' + str(flagsa) + ', ' + str(np.sum(total_flags)) + ', ' + str(datatot) + '\n'
    fflagged = open(output_dir+f"{obs_id}.log", mode='a', newline='\n')
    fflagged.write(g)
    fflagged.close()



In [None]:
# scripts/flag_data.py

def main():
    uvfits_dir = "/Users/eormacstudio/Documents/GitHub/GRANDWin/data/raw/uvfits_raw/"
    outlier_file = "/Users/eormacstudio/Documents/GitHub/GRANDWin/data/processed/detected_outliers/3/outliers_location_day_1_grid_0_integration_8_real.h5"
    output_dir = "/Users/eormacstudio/Documents/GitHub/GRANDWin/data/processed/uvfits_update/"
    uv_integration = 2
    win_integration = 8

    df_outliers = load_outliers_from_h5(outlier_file)

    print("Outliers: ", df_outliers)
    print("Observation id: ", df_outliers["obs_id"].unique())
    unique_obs_ids = [1095451432] #df_outliers["obs_id"].unique()

    for obs_id in unique_obs_ids:
        uvfits_path = os.path.join(uvfits_dir, f"{obs_id}_w_no_flags059-078.uvfits")
        output_path = os.path.join(output_dir, f"{obs_id}_w_no_flags059-078_flagged.uvfits")
        flag_uvfits_data(
            obs_id,
            uvfits_path,
            df_outliers[df_outliers["obs_id"] == obs_id].reset_index(drop=True),
            output_path,
            win_integration,
            uv_integration
        )

if __name__ == "__main__":
    main()

In [15]:
outlier_file = "/Users/eormacstudio/Documents/GitHub/GRANDWin/data/processed/detected_outliers/outliers_location_day_1_grid_0_integration_8_real.h5"
polarizations = ['XX', 'YY']
df_outliers = load_outliers_from_h5(outlier_file, polarizations)

Outliers:  (224, 128, 640, 4)


In [16]:
obs_id = 1095451432
df_outliers = df_outliers[df_outliers['obs_id'] == obs_id].reset_index(drop=True)

In [17]:
df_outliers

Unnamed: 0,XX,XY,YX,YY,obs_id,time_index,frequency,antenna,timeblock
0,True,False,False,False,1095451432,85,59,0,1
1,False,False,False,True,1095451432,85,11,3,1
2,False,False,False,True,1095451432,85,450,3,1
3,True,False,False,False,1095451432,85,86,5,1
4,True,False,False,False,1095451432,85,87,5,1
...,...,...,...,...,...,...,...,...,...
1971,True,True,True,False,1095451432,96,625,120,12
1972,True,False,True,True,1095451432,96,625,121,12
1973,True,False,False,False,1095451432,96,625,123,12
1974,True,False,False,False,1095451432,96,625,124,12


In [None]:
uvfits_dir = "/Users/eormacstudio/Documents/GitHub/GRANDWin/data/raw/uvfits_raw/"
uvfits_path = os.path.join(uvfits_dir, f"{obs_id}_w_no_flags059-078.uvfits")

total_flags = []

uv = fits.open(uvfits_path)

flagsb = np.count_nonzero(uv[0].data.data[:, :, :, :, :, 2] < 0)
datatot = np.count_nonzero(uv[0].data.data[:, :, :, :, :, 2] < 0) + np.count_nonzero(uv[0].data.data[:, :, :, :, :, 2] >= 0)
print("Data shape before: ", uv[0].data.data.shape)
print("Flags before: ", flagsb)
print("Total data: ", datatot)

date_unique, unique_id = np.unique(uv[0].data['DATE'], return_inverse=True)

dtimeblocks = np.unique(df_outliers['timeblock'])

for j in range(3): #len(dtimeblocks)):
    dfreqs = np.unique(df_outliers[df_outliers['timeblock'] == dtimeblocks[j]]['frequency'])
    print(dfreqs)

    extimeblocks = expand_timeblocks(df_outliers[df_outliers['timeblock'] == dtimeblocks[j]]['timeblock'].unique(), 8, 2)
    print(extimeblocks)

    blindices = np.where(np.isin(unique_id, extimeblocks))[0]
    print(blindices)

    uv[0].data.data[np.ix_(blindices, [0], [0], dfreqs, [0,1,2,3], [2])] = np.abs(uv[0].data.data[np.ix_(blindices, [0], [0], dfreqs, [0,1,2,3], [2])]) * -1

    print("The data that being flagged ", len(dfreqs)*len(blindices)*4)
    total_flags.append(len(dfreqs)*len(blindices)*4)

flagsa = np.count_nonzero(uv[0].data.data[:, :, :, :, :, 2] < 0)
print("Data shape after: ", uv[0].data.data.shape)
print("Flags after: ", flagsa)
print(np.sum(total_flags))

g = str(obs_id) + ', ' + str(flagsb) + ', ' + str(flagsa) + ', ' + str(np.sum(total_flags)) + ', ' + str(datatot) + '\n'
fflagged = open(result_directory+file_flagged, mode='a', newline='\n')
fflagged.write(g)
fflagged.close()

uv.writeto(raw_directory + "%s/raw_%s_w_flag059-078_updated.uvfits" %(obs_id, obs_id), overwrite=True)
uv.close()

print("Add observationid %s for flagged observation!" %(obs_id))
file = open(finished_file, mode='a', newline='')
file.write("%s \n" %(obs_id))
file.close()


In [11]:
expand_timeblocks(df_outliers[df_outliers['timeblock'] == 3]['timeblock'].unique(), 8, 2)

array([12, 13, 14, 15])

In [14]:
np.where(np.isin(unique_id, dextimeblocks))[0]

array([ 97536,  97537,  97538, ..., 130045, 130046, 130047],
      shape=(32512,))

In [10]:
df_outliers

Unnamed: 0,XX,XY,YX,YY,obs_id,time_index,frequency,antenna,timeblock
0,False,False,False,True,1095451432,85,3,0,1
1,False,False,False,True,1095451432,85,25,0,1
2,True,False,False,False,1095451432,85,59,0,1
3,False,False,False,True,1095451432,85,164,0,1
4,True,False,False,False,1095451432,85,260,0,1
...,...,...,...,...,...,...,...,...,...
15812,True,False,False,False,1095451432,96,450,125,12
15813,True,True,False,False,1095451432,96,625,125,12
15814,True,False,False,False,1095451432,96,300,126,12
15815,True,False,False,False,1095451432,96,348,126,12


In [None]:
obs_id = 1095451432


if len(hdul) > 1:
    print(hdul[1].columns)
    print(hdul[1].data)
    print(hdul[0].columns)
    print(hdul[0].data)

In [None]:
hdul[0].data['BASELINE']

In [None]:
hdul[1].data['POLTYA']

In [None]:
df_outliers[df_outliers['obs_id'] == str(obs_id)]