# Do daily visual checks of data quality
## L. Caffarello, July 2022
### Updates by A. Ordog, August-September 2022
### August 25, 2022 (AO): 
#### - changed filenames to account for survey phases
#### - changed frequency channels read in to all channels (instead of every 12th)
#### - included 5 MHz to each side of the central frequency in the single-scan, 1D timeseries plots
#### - included persistent RFI mask indication (grey background) on timeseries plots
### September 12, 2022 (AO):
#### - option to mask out persistent RFI
#### - indicate elevation of scans
#### - scan IDs indicated in waterfall plot columns

## Import packages:

In [1]:
# import dva_sdhdf_combine
import imp
import os
import subprocess
import h5py
import numpy as np
from astropy.time import Time
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
import datetime
import matplotlib.dates as mdates
from matplotlib.dates import HourLocator as HourLocator
from matplotlib.dates import MinuteLocator as MinuteLocator
from mpl_toolkits.axes_grid1 import make_axes_locatable
from astropy import units as u
from astropy.time import TimeDelta
from ipywidgets import interact
from ipywidgets import interactive_output

#### Change the directory to where the files are located" ####
day ='01'
directory = '/srv/data/dva/survey_azimuth_scans/'
#directory = '/srv/data/dva/survey_azimuth_scans/day_45_lightning/'
#directory = '../DVA/Data_Files/DVA_Day_Surveys/'

#TODO: actually do the scan properties definition in the beginning
##############################################################

## Read in the file listing azimuth scan start and stop times:

In [3]:
scan_id = []    # The scan id number
scan_start = []  # Start time of the scan (UTC)
scan_stop = []   # Stop time of the scan (UTC)
scan_el = []

# Read in the data and store it in arrays:
with open(directory+'DVAsurvey_phase1_day0'+day+'.txt') as fp:
    for line in fp:       
        scan_id.append(int(line.split()[0]))
        scan_start.append(line.split()[1]+'T'+line.split()[2][0:12])
        scan_stop.append(line.split()[3]+'T'+line.split()[4][0:12])
        scan_el.append(line.split()[5][0:2])
        
# Print out the scan numbers with their start and stop times:
for i in range(0,len(scan_id)):
    print(f"{scan_id[i]:04}",scan_start[i],scan_stop[i],scan_el[i])

# Convert start and stop times to Modified Julian Day (MJD).
# This is needed for plotting and for selecting out data collected
# between particular times:
scan_start_mjd = Time(scan_start, format='isot',scale='utc').mjd
scan_stop_mjd  = Time(scan_stop,  format='isot',scale='utc').mjd


0041 2022-06-07T04:16:05.291 2022-06-07T04:34:07.198 49
0042 2022-06-07T04:36:26.049 2022-06-07T04:54:28.279 49
1555 2022-06-07T04:59:10.310 2022-06-07T05:17:12.390 20
0260 2022-06-07T05:21:54.456 2022-06-07T05:39:56.371 49
0261 2022-06-07T05:42:15.375 2022-06-07T06:00:17.472 49
0262 2022-06-07T06:02:00.228 2022-06-07T06:20:02.213 49
0263 2022-06-07T06:21:44.857 2022-06-07T06:39:46.723 49
0264 2022-06-07T06:42:05.621 2022-06-07T07:00:07.724 49
0265 2022-06-07T07:01:50.183 2022-06-07T07:19:52.104 49
1778 2022-06-07T07:24:34.427 2022-06-07T07:42:36.183 20
0483 2022-06-07T07:47:54.539 2022-06-07T08:05:56.863 49
0484 2022-06-07T08:07:39.391 2022-06-07T08:25:41.423 49
1997 2022-06-07T08:30:23.775 2022-06-07T08:48:25.852 20
1998 2022-06-07T08:50:08.338 2022-06-07T09:08:10.522 20
0703 2022-06-07T09:13:28.600 2022-06-07T09:31:30.723 49
0704 2022-06-07T09:33:13.442 2022-06-07T09:51:15.300 49
0705 2022-06-07T09:53:33.755 2022-06-07T10:11:36.030 49
0706 2022-06-07T10:13:18.543 2022-06-07T10:31:20

## Read in scan files and stich them together:

In [5]:
t_set = []
az_set = []
dec_set = []
ra_set = []
el_set = []
noise_set = []
trim_flag = []

scan0 = f"{scan_id[0]:04}"

freq_channel_increment = 1 #TODO: I'll have to change this to 1 once I am sure I'm reading the data correctly

# Use one of the scans to get the list of frequencies:
file = h5py.File(directory+'dva_survey_phase1_raw_'+scan0+'.h5','r')
freq = file['data']['beam_0']['band_SB0']['frequency'][::freq_channel_increment]/1e6

# Create empty arrays for the power data:
RR_set = np.empty([0,len(freq)])
LL_set = np.empty([0,len(freq)])
reRL_set = np.empty([0,len(freq)])
imRL_set = np.empty([0,len(freq)])

# Loop through all the scans in the "scan_num" list:
for i in scan_id:
#for i in scan_id[0:5]:
    print(i)
    # select the file:
    file = h5py.File(directory+'dva_survey_phase1_raw_'+f"{i:04}"+'.h5','r')
    print(file)
    
    # access the correct location in the file structure:
    dataset = file['data']['beam_0']['band_SB0']['scan_0']
    
    # Add the position and time data to the corresponding arrays:
    dec_set = np.concatenate([dec_set,dataset['metadata']['declination']])
    ra_set = np.concatenate([ra_set,dataset['metadata']['right_ascension']])
    el_set = np.concatenate([el_set,dataset['metadata']['elevation']])
    az_set = np.concatenate([az_set,dataset['metadata']['azimuth']])
    t_set = np.concatenate([t_set,dataset['metadata']['utc']])
    noise_set = np.concatenate([noise_set,dataset['metadata']['noise_state']]) #This is a "mask" for noise regions 1 = noise 0=all good
    trim_flag = np.concatenate([trim_flag,dataset['metadata']['trim_scan_flag']])
    
    # Add the spectrometer power data to the corresponding arrays:
    RR_set = np.concatenate([RR_set,dataset['data'][:,0,::freq_channel_increment]],axis=0)
    LL_set = np.concatenate([LL_set,dataset['data'][:,1,::freq_channel_increment]],axis=0)
    reRL_set = np.concatenate([reRL_set,dataset['data'][:,2,::freq_channel_increment]],axis=0)
    imRL_set = np.concatenate([imRL_set,dataset['data'][:,3,::freq_channel_increment]],axis=0)
    
t_plt = Time(t_set, format='isot',scale='utc').mjd

41
<HDF5 file "dva_survey_phase1_raw_0041.h5" (mode r)>
42
<HDF5 file "dva_survey_phase1_raw_0042.h5" (mode r)>
1555
<HDF5 file "dva_survey_phase1_raw_1555.h5" (mode r)>
260
<HDF5 file "dva_survey_phase1_raw_0260.h5" (mode r)>
261
<HDF5 file "dva_survey_phase1_raw_0261.h5" (mode r)>
262
<HDF5 file "dva_survey_phase1_raw_0262.h5" (mode r)>
263
<HDF5 file "dva_survey_phase1_raw_0263.h5" (mode r)>
264
<HDF5 file "dva_survey_phase1_raw_0264.h5" (mode r)>
265
<HDF5 file "dva_survey_phase1_raw_0265.h5" (mode r)>
1778
<HDF5 file "dva_survey_phase1_raw_1778.h5" (mode r)>
483
<HDF5 file "dva_survey_phase1_raw_0483.h5" (mode r)>
484
<HDF5 file "dva_survey_phase1_raw_0484.h5" (mode r)>
1997
<HDF5 file "dva_survey_phase1_raw_1997.h5" (mode r)>
1998
<HDF5 file "dva_survey_phase1_raw_1998.h5" (mode r)>
703
<HDF5 file "dva_survey_phase1_raw_0703.h5" (mode r)>
704
<HDF5 file "dva_survey_phase1_raw_0704.h5" (mode r)>
705
<HDF5 file "dva_survey_phase1_raw_0705.h5" (mode r)>
706
<HDF5 file "dva_survey_ph

## Polarized Intensity

In [6]:
polarized = []
for i,j in zip(reRL_set,imRL_set):
    PI = np.sqrt((i**2)+(j**2))
    polarized.append(PI)
polarized_plot = np.array(polarized)

#print(LL_set)
print("LL_set:", np.shape(LL_set))
print(" t_plt:", np.shape(t_plt))
print("  freq:", np.shape(freq))

LL_set: (41481, 8250)
 t_plt: (41481,)
  freq: (8250,)


## Read in persistent RFI mask

In [7]:
i = 0
RFI_mask_idx = []
with open('/srv/data/dva/RFIpersist_mask/RFIpersist_mask.txt') as fp:
    for line in fp:
        if i>0: 
            #print(line)
            RFI_mask_idx.append(int(line.split()[0]))
        i=i+1

## Leo's original code:

In [None]:
df = freq[1]-freq[0]

def DVA_Waterfall_View():
    #TODO: Add another DVA_Waterfall interactive_function such that I can change between LL_Set, RR_set, etc...
    power_min = 66 #AO changed from 70
    power_max = 78

    fig,axs = plt.subplots(1,1,figsize=(15,10)) 
    fs = 16
    
    for i in range(0,len(scan_id)):
        w = np.where((t_plt>=scan_start_mjd[i]) & (t_plt<=scan_stop_mjd[i]))[0]
        extent = [scan_start_mjd[i],scan_stop_mjd[i],freq[0],freq[-1]]
    
        im = axs.imshow(10*np.log10(LL_set[w,:].T),aspect='auto',vmin=power_min,vmax=power_max,
                        origin='lower',extent=extent,cmap='viridis')
    
    #im = axs.imshow(10.*np.log10(LL_set.T),aspect='auto',vmin=power_min,vmax=power_max,origin='lower',
    #            extent=[t_plt[0],t_plt[-1],freq[0],freq[-1]])

    divider = make_axes_locatable(axs)
    cax = divider.append_axes('right', size='2%', pad=0.05)
    cbar = fig.colorbar(im, cax=cax, orientation='vertical')
    cbar.ax.tick_params(labelsize=fs) 
    cbar.set_label('Power (dB)', fontsize=fs)

    axs.set_xlim(t_plt[0],t_plt[-1])
    axs.set_ylim(freq[0],freq[-1])
    axs.tick_params(axis='both', labelsize=fs)
    axs.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
    axs.fmt_xdata = mdates.DateFormatter('%H:%M:%S')
    axs.set_xlabel('Time (UTC)',fontsize=fs)        
    axs.set_ylabel('Frequency (MHz)',fontsize=fs)


def DVA_Cross_Sections(freq_chosen, time_idx, freq_cross_section):
    freq_measured = np.where(abs(freq-freq_chosen)<df)[0][0]

    fs = 12    
    fig,axs1 = plt.subplots(1,1,figsize=(16,6))  
    # TODO: use the log of LL_set_clean
    if(freq_cross_section):
        power_min = 66 #AO added limits
        power_max = 78
        # AO changed to log scale and added RR:
        axs1.plot(freq,10*np.log10(LL_set[time_idx,:]), label='LL')
        axs1.plot(freq,10*np.log10(RR_set[time_idx,:]), label='RR')
        axs1.vlines(freq_chosen, 0 , 100e9, color = 'red')
        axs1.set_ylim(power_min,power_max) #AO changed this to log scale limits
        axs1.set_xlim(350,1050)
        #axs1.set_ylim(np.min(LL_set[:,freq_measured]), np.max(LL_set[time_idx,:]))
        axs1.set_xlabel('Frequency',fontsize=fs)
    else:
        power_min = 66 #AO added limits
        power_max = 78
        # AO changed to log scale and added RR:
        axs1.scatter(t_plt, 10*np.log10(LL_set[:,freq_measured]), label='LL',s=0.5)
        axs1.scatter(t_plt, 10*np.log10(RR_set[:,freq_measured]), label='RR',s=0.5)
        axs1.vlines(t_plt[time_idx], 0 , 100e9, color = 'red')

        axs1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
        axs1.set_ylim(power_min,power_max) #AO changed this to log scale limits
        axs1.fmt_xdata = mdates.DateFormatter('%H:%M:%S')
        axs1.set_xlabel('Time (UTC)',fontsize=fs)
        axs1.set_xlim(t_plt[0],t_plt[-1])
    axs1.set_ylabel('Power',fontsize=fs)
    axs1.legend()
    axs1.grid() # AO added grid

def DVA_Visualization(waterfall_enabled):
    if waterfall_enabled:
        interact(DVA_Waterfall_View)
    else:
        interact(DVA_Cross_Sections, freq_chosen = (350, 1000, df), freq_cross_section = False, time_idx = (0,len(t_plt)-1))


interact(DVA_Visualization, waterfall_enabled = True)

#TODO: plot power in DB

## Updated visualization, including single-scan option
### Changes made by AO:
#### - power bounds in dB to see lower power signal
#### - changed to log scale and added RR to time series plots and spectra
#### - added grids to 1D plots
#### - added option for single-scan plots (both waterfall and 1D)

In [8]:
df = freq[1]-freq[0]
power_min = 65
power_max = 79
fs = 14

def DVA_Waterfall_View(RFI_masked):  
    
    #TODO: implement toggling between RR, LL,reRL, imRL
       
    fig,axs = plt.subplots(1,1,figsize=(15,10)) 
    
    for i in range(0,len(scan_id)):
        w = np.where((t_plt>=scan_start_mjd[i]) & (t_plt<=scan_stop_mjd[i]))[0]
        extent = [scan_start_mjd[i],scan_stop_mjd[i],freq[0],freq[-1]]
        
        if scan_el[i] == '49':
            textclr = 'blue'
        else:
            textclr = 'red'
                
        data_plot = 10*np.log10(LL_set[w,:].T)
        if RFI_masked:
            data_plot[RFI_mask_idx,:] = np.nan
    
        im = axs.imshow(data_plot,aspect='auto',vmin=power_min,vmax=power_max,
                        origin='lower',extent=extent,cmap='viridis')

        axs.text(scan_start_mjd[i]+3e-3,1038,f"{scan_id[i]:04}",rotation=45,fontsize=fs,color=textclr)
    
    divider = make_axes_locatable(axs)
    cax = divider.append_axes('right', size='2%', pad=0.05)
    cbar = fig.colorbar(im, cax=cax, orientation='vertical')
    cbar.ax.tick_params(labelsize=fs) 
    cbar.set_label('Power (dB)', fontsize=fs)

    axs.set_xlim(t_plt[0],t_plt[-1])
    axs.set_ylim(freq[0],freq[-1])
    axs.tick_params(axis='both', labelsize=fs)
    axs.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
    axs.fmt_xdata = mdates.DateFormatter('%H:%M:%S')
    axs.set_xlabel('Time (UTC)',fontsize=fs)        
    axs.set_ylabel('Frequency (MHz)',fontsize=fs)

def DVA_Waterfall_View_singlescan(scan_chosen, RFI_masked):
    
    #TODO: implement toggling between RR, LL,reRL, imRL

    fig,axs = plt.subplots(1,1,figsize=(15,10)) 
    
    scan_id_plot = scan_chosen
    scan_idx = np.where(np.array(scan_id) == scan_id_plot)
    
    w = np.where((t_plt>=scan_start_mjd[scan_idx]) & (t_plt<=scan_stop_mjd[scan_idx]))[0]
    extent = [scan_start_mjd[scan_idx][0],scan_stop_mjd[scan_idx][0],freq[0],freq[-1]]
    
    data_plot = 10*np.log10(LL_set[w,:].T)
    if RFI_masked:
        data_plot[RFI_mask_idx,:] = np.nan
    
    im = axs.imshow(data_plot,aspect='auto',vmin=power_min,vmax=power_max,
                    origin='lower',extent=extent,cmap='viridis')

    divider = make_axes_locatable(axs)
    cax = divider.append_axes('right', size='2%', pad=0.05)
    cbar = fig.colorbar(im, cax=cax, orientation='vertical')
    cbar.ax.tick_params(labelsize=fs) 
    cbar.set_label('Power (dB)', fontsize=fs)

    axs.set_xlim(extent[0],extent[1])
    axs.set_ylim(freq[0],freq[-1])
    axs.tick_params(axis='both', labelsize=fs)
    axs.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
    axs.fmt_xdata = mdates.DateFormatter('%H:%M:%S')
    axs.set_xlabel('Time (UTC)',fontsize=fs)        
    axs.set_ylabel('Frequency (MHz)',fontsize=fs)
    axs.set_title('Scan '+str(scan_id_plot)+' , el = '+str(scan_el[scan_idx[0][0]]),fontsize=fs)


def DVA_Cross_Sections(freq_chosen, time_idx, freq_cross_section,RFI_masked):
    
    freq_measured = np.where(abs(freq-freq_chosen)<df)[0][0]
    freq_below = np.where((freq>freq_chosen-5) & (freq<freq_chosen))[0]
    freq_above = np.where((freq<freq_chosen+5) & (freq>freq_chosen))[0]
   
    fig,axs1 = plt.subplots(1,1,figsize=(16,6))  

    if(freq_cross_section):
        
        data_plot_L = 10*np.log10(LL_set[time_idx,:])
        data_plot_R = 10*np.log10(RR_set[time_idx,:])
        
        if RFI_masked:
            data_plot_L[RFI_mask_idx] = np.nan
            data_plot_R[RFI_mask_idx] = np.nan

        axs1.plot(freq,data_plot_L,label='LL',color='blue')
        axs1.plot(freq,data_plot_R,label='RR',color='red')        
        axs1.vlines(freq_chosen, 0 , 100e9, color = 'purple')
        axs1.set_ylim(power_min,power_max) 
        axs1.set_xlim(350,1050)
        axs1.set_xlabel('Frequency',fontsize=fs)
        axs1.tick_params(axis='both',labelsize=fs)
    else:
        axs1.scatter(t_plt, 10*np.log10(LL_set[:,freq_measured]),label='LL',s=0.8,zorder=1,color='blue')
        axs1.scatter(t_plt, 10*np.log10(RR_set[:,freq_measured]),label='RR',s=0.8,zorder=1,color='red')
          
        axs1.vlines(t_plt[time_idx], 0 , 100e9, color = 'purple')
        
        for i in range(0,len(scan_id)):
            if scan_el[i] == '49':
                elclr = 'C0'
            else:
                elclr = 'C1'        
            axs1.axvspan(scan_start_mjd[i],scan_stop_mjd[i],color=elclr,alpha=0.2,zorder=0)
            axs1.text(scan_start_mjd[i]+3e-3,power_max+0.5,f"{scan_id[i]:04}",rotation=45,fontsize=fs)

        axs1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
        axs1.set_ylim(power_min,power_max)
        axs1.fmt_xdata = mdates.DateFormatter('%H:%M:%S')
        axs1.set_xlabel('Time (UTC)',fontsize=fs)
        axs1.set_xlim(t_plt[0],t_plt[-1])
        axs1.tick_params(axis='both',labelsize=fs)
        axs1.tick_params(axis='both',labelsize=fs)
        
        if freq_measured in RFI_mask_idx:
            axs1.set_facecolor('lightgray')        
        
    axs1.set_ylabel('Power',fontsize=fs)
    axs1.legend(fontsize=fs,markerscale=5)
    axs1.grid()

    
def DVA_Cross_Sections_singlescan(freq_chosen,scan_chosen,RFI_masked,freq_cross_section,time_idx):
    
    freq_measured = np.where(abs(freq-freq_chosen)<df)[0][0]
    freq_below = np.where((freq>freq_chosen-5) & (freq<freq_chosen))[0]
    freq_above = np.where((freq<freq_chosen+5) & (freq>freq_chosen))[0]
    #print(freq[freq_measured],freq[freq_range])
          
    scan_id_plot = scan_chosen
    scan_idx = np.where(np.array(scan_id) == scan_id_plot)
    
    w = np.where((t_plt>=scan_start_mjd[scan_idx]) & (t_plt<=scan_stop_mjd[scan_idx]))[0]
         
    fig,axs1 = plt.subplots(1,1,figsize=(16,6)) 
            
    if(freq_cross_section):
            
        data_plot_L = 10*np.log10(LL_set[w[time_idx],:])
        data_plot_R = 10*np.log10(RR_set[w[time_idx],:])
        
        if RFI_masked:
            data_plot_L[RFI_mask_idx] = np.nan
            data_plot_R[RFI_mask_idx] = np.nan
            
        axs1.plot(freq,data_plot_L, label='LL',color='blue')
        axs1.plot(freq,data_plot_R, label='RR',color='red')
        axs1.vlines(freq_chosen, 0 , 100e9, color = 'purple')
        axs1.set_ylim(power_min,power_max)
        axs1.set_xlim(350,1030)
        axs1.set_xlabel('Frequency',fontsize=fs)
        axs1.tick_params(axis='both',labelsize=fs)
    else:
        axs1.plot(t_plt[w], 10*np.log10(LL_set[w,freq_measured]), label='LL',color='blue',zorder=1)
        axs1.plot(t_plt[w], 10*np.log10(RR_set[w,freq_measured]), label='RR',color='red',zorder=1)
        for ifreq in freq_above:
            axs1.plot(t_plt[w], 10*np.log10(LL_set[w,ifreq]), alpha=0.5,color='C0',zorder=0)
            axs1.plot(t_plt[w], 10*np.log10(RR_set[w,ifreq]), alpha=0.5,color='C1',zorder=0)
        for ifreq in freq_below:
            axs1.plot(t_plt[w], 10*np.log10(LL_set[w,ifreq]), alpha=0.5,color='C9',zorder=0)
            axs1.plot(t_plt[w], 10*np.log10(RR_set[w,ifreq]), alpha=0.5,color='salmon',zorder=0)
        axs1.vlines(t_plt[w][time_idx], 0 , 100e9, color = 'purple')

        axs1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
        axs1.set_ylim(power_min,power_max) #AO changed this to log scale limits
        axs1.fmt_xdata = mdates.DateFormatter('%H:%M:%S')
        axs1.set_xlabel('Time (UTC)',fontsize=fs)
        axs1.set_xlim(t_plt[w][0],t_plt[w][-1])
        axs1.tick_params(axis='both',labelsize=fs)
        axs1.set_title('Scan '+str(scan_id_plot)+' , el = '+str(scan_el[scan_idx[0][0]]),fontsize=fs)
            
        if freq_measured in RFI_mask_idx:
             axs1.set_facecolor('lightgray')
        
    axs1.set_ylabel('Power',fontsize=fs)
    axs1.legend(fontsize=fs,markerscale=5)
    axs1.grid()  
        
def DVA_Visualization(waterfall_enabled,singlescan):    
  
    if waterfall_enabled:
        if singlescan:
            interact(DVA_Waterfall_View_singlescan,scan_chosen = scan_id, RFI_masked = False)
        else:
            interact(DVA_Waterfall_View, RFI_masked = False)
    else:
        if singlescan:
            interact(DVA_Cross_Sections_singlescan, freq_chosen = (350, 1030, df), 
                     scan_chosen = scan_id, RFI_masked = False, freq_cross_section = False,
                    time_idx = (0,1802))
        else:
            interact(DVA_Cross_Sections, freq_chosen = (350, 1030, df), 
                     freq_cross_section = False, time_idx = (0,len(t_plt)-1), RFI_masked = False)

In [9]:
interact(DVA_Visualization, waterfall_enabled = False, singlescan = False)


interactive(children=(Checkbox(value=False, description='waterfall_enabled'), Checkbox(value=False, descriptio…

<function __main__.DVA_Visualization(waterfall_enabled, singlescan)>