In [1]:
import numpy as np
import matplotlib.pyplot as plt
import glob
import os
import sys
from pyuvdata import UVData, UVCal
import hera_cal as hc
from hera_cal.data import DATA_PATH
from collections import OrderedDict as odict
from pyuvdata import utils as uvutils
import copy
import uvtools as uvt
from hera_cal.datacontainer import DataContainer
import aipy
import operator
import astropy

In [2]:
def make_dictionary (JD):
    PATH = '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/'
    
    #load in all files for that day
    filenames = sorted(glob.glob(os.path.join(PATH, JD + '/zen.' + JD +'*.HH.uvh5')))
    
    #create dictionary
    master_dict = {}
    
    for file in filenames:
        uvd = UVData()
        uvd.read(file, read_data=False, read_metadata=True)
        LST = np.unique(uvd.lst_array)
        JD = np.unique(uvd.time_array)
    
        for (J, L) in zip(JD, LST): 
            master_dict.update({(J, L): file})
        
    return master_dict

In [3]:
def find_files(JD, LSTstart, LSTend):
    files = []
    master_Dict = make_dictionary (JD)
    dict_list = list(master_Dict)
    for i,(J,LST) in enumerate(dict_list):
        if ((min(LSTstart, LSTend) <= dict_list[i][1] and dict_list[i][1] <= max(LSTstart, LSTend))):
            files.append(list(master_Dict.values())[i])
    
    return np.unique(files)

In [4]:
%%time
find_files('2458098', 5.6926909028766195, 6.173443655032846)

CPU times: user 14.2 s, sys: 787 ms, total: 14.9 s
Wall time: 38 s


array(['/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.12552.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.13298.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.14043.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.14789.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.15535.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.16280.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.17026.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.17772.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.18517.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.19263.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.20008.HH.uvh5'],
      dtype='<U75')

In [4]:
#find files without making a new dictionary
def find_files_ND(JD, LSTstart, LSTend, master_Dict):
    files = []
    
    dict_list = list(master_Dict)
    for i,(J,LST) in enumerate(dict_list):
        #should gthe <= >= be used for both?
        if ((min(LSTstart, LSTend) <= dict_list[i][1] and dict_list[i][1] <= max(LSTstart, LSTend))):
            files.append(list(master_Dict.values())[i])
    
    return np.unique(files)

In [6]:
%%time
mast = make_dictionary ('2458098')

CPU times: user 13.7 s, sys: 147 ms, total: 13.9 s
Wall time: 14 s


In [7]:
%%time
find_files_ND('2458098', 5.6926909028766195, 6.173443655032846, mast)

CPU times: user 17.1 ms, sys: 1 ms, total: 18.1 ms
Wall time: 17.5 ms


array(['/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.12552.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.13298.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.14043.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.14789.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.15535.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.16280.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.17026.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.17772.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.18517.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.19263.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.20008.HH.uvh5'],
      dtype='<U75')

In [5]:
#is there a better way to do this without a for loop?
def max_min_LSTs(dict_list):
    Max = 0
    Min = 1e99
    for i, (J, LST) in enumerate(dict_list):
        if (Max < dict_list[i][1]): 
            Max = dict_list[i][1]
        if (Min > dict_list[i][1]): 
            Min = dict_list[i][1]
    return Max, Min

In [6]:
def slice_day(JD, interval):
    master_Dict = make_dictionary (JD)
    dict_list = list(master_Dict)
    
    #max, min LST values
    Max, Min = max_min_LSTs(dict_list)
    
    sliced = {}
    
    #this for-loop isn't correct it doesn't include all the ranges. what about last value?
    for i in np.arange(Min, Max, interval):
        sliced.update({(JD, i, i+interval): find_files_ND(JD, i, i+interval, master_Dict)})
        
    sliced.update({(JD, i, i+interval): find_files_ND(JD, i, Max, master_Dict)})
    
    return sliced, master_Dict

In [10]:
#is it exactly 2 min chunks? if not what should I do with the leftovers?
#should I throw them into a new bin or put them in the one closest

In [46]:
dic = list(mast)
J, L = zip(*dic)
LS = np.array(L)
print(L)

(5.6926909028766195, 5.6934738878774285, 5.694256872878221, 5.695039857879055, 5.695822842879846, 5.696605827880649, 5.697388812881452, 5.698171797882268, 5.69895478288304, 5.699737767883854, 5.7005207528846515, 5.701303737885449, 5.702086722886259, 5.702869707887054, 5.703652692887834, 5.704435677888656, 5.705218662889461, 5.706001647890237, 5.7067846328910266, 5.707567617891814, 5.708350602892602, 5.709133587893402, 5.709916572894174, 5.710699557894958, 5.711482542895739, 5.712265527896536, 5.713048512897316, 5.713831497898082, 5.7146144828988605, 5.715397467899652, 5.716180452900428, 5.716963437901217, 5.717746422902019, 5.718529407902764, 5.71931239290355, 5.720095377904322, 5.720878362905076, 5.721661347905873, 5.722444329972781, 5.723227314973548, 5.724010299974299, 5.724793284975077, 5.72557626997584, 5.726359254976601, 5.727142239977362, 5.727925224978108, 5.7287082099788815, 5.729491194979639, 5.730274179980409, 5.7310571649811655, 5.731840149981934, 5.732623134982686, 5.73340

In [47]:
np.where(np.logical_and(LS >= 5.6934738878774284 , LS <= 5.695822842879847))[0]

array([1, 2, 3, 4])

In [48]:
dicti = list(np.array((dic)))
Dict = np.array(dic, dtype='float, float')
print(Dict[1])

(2458098.12570586, 5.69347389)


In [49]:
#check in dictionary which file the data is in. then go into that file and do a "select"

#uvd = UVData()
#uvd.read(file, read_data=False, read_metadata=True)
#
#
#
#
#

EVERYTHING BELOW THIS IS TO TEST SLICING UP THE DATA BUT ONLY FOR TWO FILES

In [17]:
def test_dict():
    #make dictionary code but only for two files
    files = list(['/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.12552.HH.uvh5',
       '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.13298.HH.uvh5'])

    #create dictionary
    master_dict = {}
    
    for file in files:
        uvd = UVData()
        uvd.read(file, read_data=False, read_metadata=True)
        LST = np.unique(uvd.lst_array)
        JD = np.unique(uvd.time_array)
    
        for (J, L) in zip(JD, LST): 
            master_dict.update({(J, L): file})
        
    return master_dict

In [7]:
def test_sliced(JD, interval, master_Dict):
    dict_list = list(master_Dict)
    
    #max, min LST values
    Max, Min = max_min_LSTs(dict_list)
    
    sliced = {}
    
    #this for-loop isn't correct it doesn't include all the ranges. what about last value?
    for i in np.arange(Min, Max, interval):
        sliced.update({(JD, i, i+interval): find_files_ND(JD, i, i+interval, master_Dict)})
        
    sliced.update({(JD, i, i+interval): find_files_ND(JD, i, Max, master_Dict)})
    
    return sliced, master_Dict

In [18]:
test_dict = test_dict()

In [19]:
print(test_dict)

{(2458098.125581584, 5.6926909028766195): '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.12552.HH.uvh5', (2458098.1257058596, 5.6934738878774285): '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.12552.HH.uvh5', (2458098.1258301353, 5.694256872878221): '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.12552.HH.uvh5', (2458098.125954411, 5.695039857879055): '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.12552.HH.uvh5', (2458098.1260786867, 5.695822842879846): '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.12552.HH.uvh5', (2458098.1262029624, 5.696605827880649): '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.12552.HH.uvh5', (2458098.126327238, 5.697388812881452): '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.12552.HH.uvh5', (2458098.1264515137, 5.698171797882268): '/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.12552.HH.uvh5', (2458098.1265757894, 5.69895478288304): 

In [54]:
find_files_ND('2458098', 5.69269090287661954, 5.6934738878774285, test_dict)

array(['/lustre/aoc/projects/hera/H1C_IDR2/IDR2_2/2458098/zen.2458098.12552.HH.uvh5'],
      dtype='<U75')

In [55]:
sliced = test_sliced('2458098', 0.000581776, test_dict)

split_data is the general function (test_dict and test_sliced are the testing/temporary functions used for testing)

In [9]:
def split_data(JD, interval):
    #make dictionary of LST intervals to file(s)
    sliced, master_dict = test_sliced(JD, interval, test_dict) #will be slice_day
    
    Max, Min = max_min_LSTs(dict_L)
    
    #this is not right for multiple files. uvd is being initiliazed twice and it shouldn't
    for i in np.arange(Min, Max, interval):
        files = find_files_ND(JD, i, i+interval, master_dict)
        
        uvd = UVData()
        uvd.read(files, read_data=False, read_metadata=True)
        LST = np.unique(uvd.lst_array)
        JD = np.unique(uvd.time_array)
    
        indexs = np.where(np.logical_and(LST >= i, LST <= i+interval))
        uvd.select(times=JD[indexs])
        uvd.write('zen.' + JD + '.' + i + '-' + (i+interval) + '.HH.uvh5')
        
    #NEED TO CALL IT ONE LAST TIME????
    read_write_file('zen.' + JD + '.' + (i+interval) + '-' + Max + '.HH.uvh5')

In [10]:
def read_write_file (files, LSTmin, LSTmax):
    uvd = UVData()
    uvd.read(files, read_data=False, read_metadata=True)
    LST = np.unique(uvd.lst_array)
    JD = np.unique(uvd.time_array)
    
    indexs = np.where(np.logical_and(LST >= LSTmin, LST <= LSTmax))
    uvd.select(times=JD[indexs])
    uvd.write()

In [None]:
def split_data_2files(JD, interval):
    #make dictionary of LST intervals to file(s)
    sliced, master_dict = test_sliced(JD, interval, test_dict) #will be slice_day
    
    Max, Min = max_min_LSTs(dict_L)
    
    #this is not right for multiple files. uvd is being initiliazed twice and it shouldn't
    for i in np.arange(Min, Max, interval):
        files = find_files_ND(JD, i, i+interval, master_dict)
        
        uvd = UVData()
        for file in files:
            uvd.read(file, read_data=False, read_metadata=True)
        LST = np.unique(uvd.lst_array)
        JD = np.unique(uvd.time_array)
    
        indexs = np.where(np.logical_and(LST >= i, LST <= i+interval))
        uvd.select(times=JD[indexs])
        uvd.write('zen.' + JD + '.' + i + '-' + (i+interval) + '.HH.uvh5')
        
    #NEED TO CALL IT ONE LAST TIME????
    read_write_file('zen.' + JD + '.' + (i+interval) + '-' + Max + '.HH.uvh5')