# Convert DVA scans into SPASS format
## A. Ordog, March 2023
### (1) Explore data structure in example file
### (2) Write out example DVA file in SPASS format

In [None]:
from astropy.io import fits
import numpy as np
import matplotlib.pyplot as plt
from tabulate import tabulate
import h5py
from astropy.time import Time

# (1) Explore data/metadata structure in SPASS files

## Example of Ettore's data files:

In [None]:
# Change directory as needed on elephant
scan = fits.open('/home/ordoga/Python/DVA2/east.2008-07-19_1350_spass_39D_1934.sdfits')
print(repr(scan[0].header))
print('')
print(repr(scan[1].header))

## Organize metadata by indices

In [None]:
headers1 = ['scan name', 
            'bandwidth (Hz)',
            'scan date', 
            'dt', 
            'Tsys',
            '?', 
            'f0 (Hz)',
            'df (Hz)',
            'focus tan',
            'focus rot']
idx1 = np.array([0,1,2,3,4,5,9,10,17,18])

headers2 = ['t bin',
            't [s past MJD start]',
            'RA',
            'dec',
            'az',
            'el']
idx2 = np.array([6,7,11,12,13,14])

headers3 = ['par. angle',
            'focus axi']
idx3 = np.array([15,16])

## Print out metadata for 5 different timestamps
## Indices indicate: scan[1].data[t_idx][idx], where idx lists different metadata

In [None]:
print('Values that stay constant with time within the aziuth scan:')
table = [idx1, headers1, np.array(scan[1].data[0],dtype='object')[idx1],
                         np.array(scan[1].data[1],dtype='object')[idx1],
                         np.array(scan[1].data[2],dtype='object')[idx1],
                         np.array(scan[1].data[500],dtype='object')[idx1],
                         np.array(scan[1].data[1887],dtype='object')[idx1]]
print(tabulate(table))

print('')
print('Values that change with time within the azimuth scan (understood)')
table = [idx2, headers2, np.array(scan[1].data[0],dtype='object')[idx2],
                         np.array(scan[1].data[1],dtype='object')[idx2],
                         np.array(scan[1].data[2],dtype='object')[idx2],
                         np.array(scan[1].data[500],dtype='object')[idx2],
                         np.array(scan[1].data[1887],dtype='object')[idx2]]
print(tabulate(table))

print('')
print('Values that change with time within the azimuth scan (NOT understood)')
table = [idx3, headers3, np.array(scan[1].data[0],dtype='object')[idx3],
                         np.array(scan[1].data[1],dtype='object')[idx3],
                         np.array(scan[1].data[2],dtype='object')[idx3],
                         np.array(scan[1].data[500],dtype='object')[idx3],
                         np.array(scan[1].data[1887],dtype='object')[idx3]]
print(tabulate(table))

## Print out Stokes data for single time stamp

In [None]:
print('Data in 30 frequency channels at single timestamp (120)')
print('')
print('Stokes I')
print(scan[1].data[120][8][0][0][0])
print('')
print('Stokes Q')
print(scan[1].data[120][8][0][0][1])
print('')
print('Stokes U')
print(scan[1].data[120][8][0][0][2])
print('')
print('Stokes V')
print(scan[1].data[120][8][0][0][3])
print(scan[1].data[120][8][0][0][3][0:3])

## Plot Stokes I,Q,U,V,PI vs t at single frequency for sanity check

In [None]:
t = np.empty(len(scan[1].data))
for i in range(0,len(t)): t[i] = scan[1].data[i][7]
    
StokesI = np.empty(len(scan[1].data))
for i in range(0,len(t)): StokesI[i] = scan[1].data[i][8][0][0][0][10]  
StokesQ = np.empty(len(scan[1].data))
for i in range(0,len(t)): StokesQ[i] = scan[1].data[i][8][0][0][1][10]   
StokesU = np.empty(len(scan[1].data))
for i in range(0,len(t)): StokesU[i] = scan[1].data[i][8][0][0][2][10]   
StokesV = np.empty(len(scan[1].data))
for i in range(0,len(t)): StokesV[i] = scan[1].data[i][8][0][0][3][10]
    
plt.figure(figsize=(12,4))
plt.scatter(t,StokesI,s=5,label='Stokes I')
plt.scatter(t,StokesQ,s=5,label='Stokes Q')
plt.scatter(t,StokesU,s=5,label='Stokes U')
plt.scatter(t,StokesV,s=5,label='Stokes V')
plt.scatter(t,np.sqrt(StokesU**2+StokesQ**2),s=5,label='PI')
plt.legend()
plt.grid()

In [None]:
az = np.empty(len(scan[1].data))
for i in range(0,len(t)): az[i] = scan[1].data[i][13] 
el = np.empty(len(scan[1].data))
for i in range(0,len(t)): el[i] = scan[1].data[i][14]  
RA = np.empty(len(scan[1].data))
for i in range(0,len(t)): RA[i] = scan[1].data[i][11] 
dec = np.empty(len(scan[1].data))
for i in range(0,len(t)): dec[i] = scan[1].data[i][12]

    
i1 = 0
i2 = 1888
plt.figure(figsize=(12,10))
plt.plot(t[i1:i2],az[i1:i2],label='az')
plt.plot(t[i1:i2],el[i1:i2],label='el')
plt.plot(t[i1:i2],RA[i1:i2],label='RA')
plt.plot(t[i1:i2],dec[i1:i2],label='dec')
plt.legend()
plt.grid()
plt.ylim(-100,200)

# Questions:
### (1) Why does azimuth value increment in jumps while RA/dec change smoothly?
### (2) Is it correct that most of the metadata is not used in the map-making code?
### (3) Why are RA and dec used directly from data to compute az/el rather than telescope generated az/el?

# (2) Generate example DVA file in SPASS format

## Read in example SPASS file

In [None]:
# Change directory as needed on elephant
spass_file = fits.open('/home/ordoga/Python/DVA2/east.2008-07-19_1350_spass_39D_1934.sdfits')


## Read in example DVA file

In [None]:
dva_file = h5py.File('/media/ordoga/15m_band1_survey/dva_phase1/survey_phase1_day09/dva_survey_phase1_raw_0906.h5','r')
dva_dataset = dva_file['data']['beam_0']['band_SB0']['scan_0']

# Coordinates, times and noise flag:
dec = dva_dataset['metadata']['declination']
ra = dva_dataset['metadata']['right_ascension']
el = dva_dataset['metadata']['elevation']
az = dva_dataset['metadata']['azimuth']
t = dva_dataset['metadata']['utc']
noise = dva_dataset['metadata']['noise_state']

# Time in MJD:
t_mjd = Time(t, format='isot',scale='utc').mjd

# For the binned version, the following would be replaced
# by the medians calculated for each selected frequency.
# Here I am just randomly grabbing channels 100 to 103:
RR = dva_dataset['data'][:,0,100:103]
LL = dva_dataset['data'][:,1,100:103]
reRL = dva_dataset['data'][:,2,100:103]
imRL = dva_dataset['data'][:,3,100:103]
freq = dva_file['data']['beam_0']['band_SB0']['frequency'][100:103]

print(RR.shape)
print(freq.shape)

## Set up values from DVA to go into SPASS file
### The only one that needs to be changed here is 'scanname' to include East or West

In [None]:
dim       = len(t)    # number of datapoints (timestamps)
scannum   = 1    # Don't know what this is. Don't think it matters.
scanname  = 'phase1_0906'    # Just a descriptive string. Should include East or West.
bandwidth = 680e6    # Full survey bandwidth
date      = str(t[0])[2:12]    # Take date from first datapoint (extract just date, not time)
t_samp    = (t_mjd[1]-t_mjd[0])*24*3600    # Time sampling in seconds
Tsys      = 1.0    # Don't know what this is. Don't think it matters.
time_s    = (t_mjd-np.floor(t_mjd[0]))*24*3600
freq0     = 350e6    # Leave as this for now. Doesn't get used.
dfreq     = 1e6    # Leave as this for now. Doesn't get used.
StokesI   = 0.5*(RR+LL)
StokesQ   = reRL.copy()
StokesU   = imRL.copy()
StokesV   = 0.5*(RR-LL)    # Something like that. Good enough for now

# A few sanity checks:
print(dim,date,t_samp,StokesI.shape,time_s)

## Replace data in SPASS example file with DVA data

In [None]:
# Copy SPASS file and trim it down to DVA length:
dva_new_file = spass_file.copy()
dva_new_file[1].data = spass_file[1].data[0:dim]
print(dva_new_file[1].data.shape)

# Loop through time data points and populate new file:
for i in range(0,dim):
    dva_new_file[1].data[i][0]  =  scanname
    dva_new_file[1].data[i][1]  =  bandwidth
    dva_new_file[1].data[i][2]  =  date
    dva_new_file[1].data[i][3]  =  t_samp
    dva_new_file[1].data[i][4]  =  Tsys
    dva_new_file[1].data[i][5]  =  scannum
    dva_new_file[1].data[i][7]  =  time_s[i]
    
    # Fill the first three channels with our Stokes values:
    dva_new_file[1].data[i][8][0][0][0][0:3] = StokesI[i,:]
    dva_new_file[1].data[i][8][0][0][1][0:3] = StokesQ[i,:]
    dva_new_file[1].data[i][8][0][0][2][0:3] = StokesU[i,:]
    dva_new_file[1].data[i][8][0][0][3][0:3] = StokesV[i,:]
    
    # I don't know how to reduce the size of the data array from
    # 30 frequency channels to 3, so for now fill the rest with
    # zero (we will just not make maps with these channels).
    dva_new_file[1].data[i][8][0][0][0][3:30] = 0
    dva_new_file[1].data[i][8][0][0][1][3:30] = 0
    dva_new_file[1].data[i][8][0][0][2][3:30] = 0
    dva_new_file[1].data[i][8][0][0][3][3:30] = 0
    
    dva_new_file[1].data[i][9]  =  freq0
    dva_new_file[1].data[i][10] =  dfreq
    dva_new_file[1].data[i][11] = RA[i]
    dva_new_file[1].data[i][12] = dec[i]
    dva_new_file[1].data[i][13] = az[i]
    dva_new_file[1].data[i][14] = el[i]
    

## Write out new file

In [None]:
new_dva_filename = '/home/ordoga/Python/DVA2/test_dva_spass_file.fits'
dva_new_file.writeto(new_dva_filename,overwrite=True)

## Check to see how new file was populated

In [None]:
headers1 = ['scan name', 
            'bandwidth (Hz)',
            'scan date', 
            'dt', 
            'Tsys',
            '?', 
            'f0 (Hz)',
            'df (Hz)',
            'focus tan',
            'focus rot']
idx1 = np.array([0,1,2,3,4,5,9,10,17,18])

headers2 = ['t bin',
            't [s past MJD start]',
            'RA',
            'dec',
            'az',
            'el']
idx2 = np.array([6,7,11,12,13,14])

headers3 = ['par. angle',
            'focus axi']
idx3 = np.array([15,16])

print('Values that stay constant with time within the aziuth scan:')
table = [idx1, headers1, np.array(dva_new_file[1].data[0],dtype='object')[idx1],
                         np.array(dva_new_file[1].data[1],dtype='object')[idx1],
                         np.array(dva_new_file[1].data[2],dtype='object')[idx1],
                         np.array(dva_new_file[1].data[200],dtype='object')[idx1],
                         np.array(dva_new_file[1].data[500],dtype='object')[idx1]]
print(tabulate(table))

print('')
print('Values that change with time within the azimuth scan (understood)')
table = [idx2, headers2, np.array(dva_new_file[1].data[0],dtype='object')[idx2],
                         np.array(dva_new_file[1].data[1],dtype='object')[idx2],
                         np.array(dva_new_file[1].data[2],dtype='object')[idx2],
                         np.array(dva_new_file[1].data[200],dtype='object')[idx2],
                         np.array(dva_new_file[1].data[500],dtype='object')[idx2]]
print(tabulate(table))

print('')
print('Values that change with time within the azimuth scan (NOT understood)')
table = [idx3, headers3, np.array(dva_new_file[1].data[0],dtype='object')[idx3],
                         np.array(dva_new_file[1].data[1],dtype='object')[idx3],
                         np.array(dva_new_file[1].data[2],dtype='object')[idx3],
                         np.array(dva_new_file[1].data[200],dtype='object')[idx3],
                         np.array(dva_new_file[1].data[500],dtype='object')[idx3]]
print(tabulate(table))

print('')
print('')
print('Data in 30 frequency channels at single timestamp (120)')
print('')
print('Stokes I')
print(dva_new_file[1].data[120][8][0][0][0])
print('')
print('Stokes Q')
print(dva_new_file[1].data[120][8][0][0][1])
print('')
print('Stokes U')
print(dva_new_file[1].data[120][8][0][0][2])
print('')
print('Stokes V')
print(dva_new_file[1].data[120][8][0][0][3])