In [1]:
import pandas as pd
import numpy as np
import os
from astropy.utils.data import download_file
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import shutil


def duration_data_to_df():
    """
    This function makes strips the duration data to only contain the required information and puts it in "DataFrames"
    """
    # Create directory
    if 'DataFrames' not in os.listdir():
        os.mkdir("DataFrames")

    # Load and clean DataFrame:
    # Messy garbage that adds the names to the table
    DF = pd.read_table("summary/Duration_BATSE_new.dat", sep="\s+", comment='#', header=None, index_col=0)  # Load DataFrame
    GRB_Names = pd.read_table("summary/Basic_BATSE_new.dat", sep="\s+", comment='#', header=None, usecols=[0,1,2],index_col=0)
    GRB_Names[1] = GRB_Names[1] + " " + GRB_Names[2]
    del GRB_Names[2]
    DF = pd.concat([GRB_Names, DF], axis=1)
    DF.reset_index(inplace=True)
    DF.columns = {0,1,2,3,4,5,6,7}
    
    # print(DF.head())
    DF = DF.iloc[:, [0, 1, 2, 4, 5, 7]]  # Only take required columns
    DF.columns = ['Trig_id', 'GRBname', 'T50', 'start_T50', 'T90', 'start_T90']  # Name columns
    # Convert columns to numeric values
    for col in ['T50', 'start_T50', 'T90', 'start_T90']:
        # If not possible write Nan
        DF[col] = pd.to_numeric(DF[col], errors='coerce')
    DF.drop_duplicates(subset='Trig_id', inplace=True)  # Drop duplicate data
    DF.set_index('Trig_id', inplace=True, drop=True)  # Set index to GRBname
    # Save data
    DF.to_pickle("DataFrames/duration_data_BATSE.dat")

    return None

def fluence_data_to_df():
    """
    Save the best_fit fluence from each GRB and saves it in DataFrames as a pandas DataFrame
    """
    # Load relevant summary files as dataframes
    # Formats the fluence data so that it is read correctly -- messy, but it works
    df1 = pd.read_table("summary/Fluence_BATSE.dat", sep=' ', comment='#', header=None, skipinitialspace=True, skiprows=lambda x: x%5 != 1)
    df2 = pd.read_table("summary/Fluence_BATSE.dat", sep=' ', comment='#', header=None, skipinitialspace=True, skiprows=lambda x: x%5 != 2)
    df3 = pd.read_table("summary/Fluence_BATSE.dat", sep=' ', comment='#', header=None, skipinitialspace=True, skiprows=lambda x: x%5 != 3)
    df4 = pd.read_table("summary/Fluence_BATSE.dat", sep=' ', comment='#', header=None, skipinitialspace=True, skiprows=lambda x: x%5 != 4)
    df5 = pd.read_table("summary/Fluence_BATSE.dat", sep=' ', comment='#', header=None, skipinitialspace=True, skiprows=lambda x: x%5 != 0)
    df_partial_1 = pd.concat([df1, df2, df3, df4, df5], axis=1, ignore_index=True)
    
    # Adding in the second fluence file
    df6 = pd.read_table("summary/Fluence_BATSE_cont.dat", sep=' ', comment='#', header=None, skipinitialspace=True, skiprows=lambda x: x%5 != 0)
    df7 = pd.read_table("summary/Fluence_BATSE_cont.dat", sep=' ', comment='#', header=None, skipinitialspace=True, skiprows=lambda x: x%5 != 1)
    df8 = pd.read_table("summary/Fluence_BATSE_cont.dat", sep=' ', comment='#', header=None, skipinitialspace=True, skiprows=lambda x: x%5 != 2)
    df9 = pd.read_table("summary/Fluence_BATSE_cont.dat", sep=' ', comment='#', header=None, skipinitialspace=True, skiprows=lambda x: x%5 != 3)
    df10 = pd.read_table("summary/Fluence_BATSE_cont.dat", sep=' ', comment='#', header=None, skipinitialspace=True, skiprows=lambda x: x%5 != 4)
    df_partial_2 = pd.concat([df6, df7, df8, df9, df10], axis=1, ignore_index=True)
    
    df = pd.concat([df_partial_1, df_partial_2], axis=0, ignore_index=True)
    
    
    fluence = pd.concat([df.iloc[:,0], df.iloc[:,1] + df.iloc[:,3] + df.iloc[:,5] + df.iloc[:,7]], axis=1, ignore_index=True)
    fluence.columns = ["Trigger", "fluence"]
    fluence.set_index('Trigger', inplace=True, drop=True)
    
    hardness = pd.concat([df.iloc[:,0], df.iloc[:,3]/df.iloc[:,1]], axis=1, ignore_index=True)
    hardness.columns = ["Trigger", "hardness"]
    hardness.set_index('Trigger', inplace=True, drop=True)
    hardness = hardness.replace([-np.inf,0,np.inf], np.nan)
    
    peak_flux = pd.concat([df.iloc[:,0], df.iloc[:,9]], axis=1, ignore_index=True)
    peak_flux.columns = ["Trigger", "peak_flux"]
    peak_flux.set_index('Trigger', inplace=True, drop=True)

    # Save file
    if 'DataFrames' not in os.listdir():
        os.mkdir("DataFrames")
    
    #fluence.to_pickle("DataFrames/fluence_data_BATSE.dat")
    hardness.to_pickle("DataFrames/hardness_data_BATSE.dat")
    #peak_flux.to_pickle("DataFrames/peak_flux_BATSE.dat")


def get_LC(trig):
    """
    Function to download a lightcurve given it's name and trig_id
    """
    
    interval = (int(trig)-1)//200*200
    lc_url = f"https://heasarc.gsfc.nasa.gov/FTP/compton/data/batse/trigger/{str(interval+1).zfill(5)}_{str(interval+200).zfill(5)}/{trig.zfill(5)}_burst/discsc_bfits_{trig}.fits.gz"

    try:
        tmp_path = download_file(lc_url)
        batlc_path = f"BATSE2/GRB{trig}.fits"
        shutil.move(tmp_path, batlc_path)
    except:
        print(f"Download GRB{trig} manually (not automatically downloaded)")
        return False
    return True


def update_LCs():
    """ Function that downloads the availible light curves. This function will take the duration_data.dat to get list of 
    trig_ids and GRBnames. """

    # Make sure the required files are downloaded
    #if 'Duration_BATSE.dat' not in os.listdir('summary'):
    #    get_summary_files()
    if 'duration_data_BATSE.dat' not in os.listdir('DataFrames'):
        duration_data_to_df()
    if 'BATSE2' not in os.listdir():
        os.mkdir("BATSE2")

    # Load trig_ids and names from file
    trigs = list(pd.read_pickle("DataFrames/duration_data_BATSE.dat").index)

    # Already downloaded files
    downloaded = list(map(lambda s: s[: -7], os.listdir("BATSE2")))
    # print(downloaded)

    operations = {'Downloaded': [], 'Error': [], 'Existed': []}
    error_log = ""

    # Loop through names
    for trig in trigs:
        trig = str(trig)
        if trig not in downloaded:  # If not downloaded call function to download
            success = get_LC(trig)
        else:
            # print(f"{name} is already downloaded")
            operations['Existed'].append(trig)
            continue

        # Add to log depending on success of it
        if success:
            # print(f"{name} downloaded successfully ")
            operations['Downloaded'].append(trig)
        else:
            print(f"{trig} not downloaded")
            operations['Error'].append(trig)
            error_log += f"{trig} \t downloading error \n"

    # Write errors to log
    err_file = open("Error_log_BATSE2.txt", "w")
    err_file.write(error_log)
    err_file.close()

    # List of operations
    return operations

    # downloaded = map(lambda s: s[: -7], os.listdir("LightCurves"))
    #

if __name__ == "__main__":  # Make folders if not already in:
    if "summary" not in os.listdir():
        os.mkdir("summary")
    if "DataFrames" not in os.listdir():
        os.mkdir("DataFrames")

    # Update the lightcurves
    #duration_data_to_df()
    fluence_data_to_df()
    #log = update_LCs()