In [1]:
import numpy as np
import pandas as pd
import glob
import os
from astropy.time import Time

In [2]:
dat_files = glob.glob("gbt_dats/traas_dats/full_lband/*dat")
dat_files[0]

'gbt_dats/traas_dats/full_lband/spliced_blc4041424344454647_guppi_58844_09167_HIP95194_0023.gpuspec.0000.dat'

In [3]:
def format_directory(dir_path, band):
    dat_files = glob.glob(dir_path)
    file_names = np.empty_like(dat_files)
    julian_day = np.empty_like(dat_files)
    julian_second = np.empty_like(dat_files)
    band_name = []
    for i in range(len(dat_files)):
        file_names[i] = os.path.basename(dat_files[i])
        julian_day[i] = file_names[i].split("_")[3]
        julian_second[i] = file_names[i].split("_")[4]
        band_name.append(band)
    data_dict = {"filename":file_names, "band":band_name, "julian day":julian_day, "julian second":julian_second}
    df = pd.DataFrame(data_dict)
    return df

In [4]:
l_band = format_directory("gbt_dats/traas_dats/full_lband/*dat", "L")
s_band = format_directory("gbt_dats/traas_dats/full_sband/*dat", "S")
c_band = format_directory("gbt_dats/traas_dats/full_cband/*dat", "C")
x_band = format_directory("gbt_dats/traas_dats/full_xband/*dat", "X")

In [5]:
all_files = pd.concat([l_band, s_band, c_band, x_band], axis=0, ignore_index=True)
all_files

Unnamed: 0,filename,band,julian day,julian second
0,spliced_blc4041424344454647_guppi_58844_09167_...,L,58844,09167
1,spliced_blc4041424344454647_guppi_58885_62397_...,L,58885,62397
2,spliced_blc4041424344454647_guppi_58885_59164_...,L,58885,59164
3,spliced_blc5051525354555657_guppi_58892_34784_...,L,58892,34784
4,spliced_blc4041424344454647_guppi_58844_06267_...,L,58844,06267
...,...,...,...,...
646,spliced_blc00010203040506o7o0111213141516o0212...,X,58806,42870
647,spliced_blc00010203040506o7o0111213141516o7o02...,X,58810,10766
648,spliced_blc10111213141516o7o0212223242526o7o03...,X,58868,40636
649,spliced_blc00010203040506o7o0111213141516o7o02...,X,58810,17084


In [6]:
def julian_to_yyyymmdd(julian_day, julian_second):
    total_date = np.empty_like(julian_day)
    for i in range(len(julian_day)):
        total_date[i] = julian_day[i] + "." + julian_second[i]
    times = list(total_date)
    t = Time(times, format="mjd")
    return t.isot

In [7]:
times = julian_to_yyyymmdd(all_files["julian day"].values, all_files["julian second"])
all_files["yyyymmdd"] = times

In [8]:
all_files.sort_values("yyyymmdd")

Unnamed: 0,filename,band,julian day,julian second,yyyymmdd
477,spliced_blc40414243444546o7o0515253545556o7o06...,C,58744,13593,2019-09-18T03:15:44.352
474,spliced_blc40414243444546o7o0515253545556o7o06...,C,58744,13914,2019-09-18T03:20:21.696
478,spliced_blc40414243444546o7o0515253545556o7o06...,C,58744,14235,2019-09-18T03:24:59.040
399,spliced_blc40414243444546o7o0515253545556o7o06...,C,58744,14558,2019-09-18T03:29:38.112
432,spliced_blc40414243444546o7o0515253545556o7o06...,C,58744,14881,2019-09-18T03:34:17.184
...,...,...,...,...,...
118,spliced_blc0001020304050607_guppi_58965_60374_...,L,58965,60374,2020-04-26T14:29:23.136
61,spliced_blc0001020304050607_guppi_58965_60691_...,L,58965,60691,2020-04-26T14:33:57.024
48,spliced_blc0001020304050607_guppi_58965_61011_...,L,58965,61011,2020-04-26T14:38:33.504
124,spliced_blc0001020304050607_guppi_58965_61330_...,L,58965,61330,2020-04-26T14:43:09.120


## Save DataFrame to csv

In [9]:
all_files.to_csv("file_dates.csv")

## Here are the earliest and most recently observed files from the bl_tess dataset

In [10]:
all_files.sort_values("yyyymmdd")["yyyymmdd"].iloc[0], all_files.sort_values("yyyymmdd")["yyyymmdd"].iloc[-1]

('2019-09-18T03:15:44.352', '2020-04-26T14:47:45.600')