In [1]:
'''Swift specific but good foe inspirations'''
import numpy as np
import pandas as pd
from astropy.io import fits
import os

# Read files
duration_data = pd.read_pickle("DataFrames/duration_data_BATSE.dat")
fluence_data = pd.read_pickle("DataFrames/fluence_data_BATSE.dat")
data_problems = pd.read_pickle("DataFrames/data_problems_BATSE.dat")
outliers = pd.read_pickle("DataFrames/outliers_BATSE.dat")


def cut_norm_lc(filename): #Prepare single light curve, cut to T90 and normalize by fluence
    trigger = int(filename[9:-5])
    #Cut lightcurve
    lc = pd.concat([pd.DataFrame(fits.open(filename)[2].data.field('TIMES')), pd.DataFrame(fits.open(filename)[2].data.field('RATES'))], axis = 1, ignore_index = True)
    if str(trigger) in data_problems.index:
        lc.loc[data_problems.loc[str(trigger),'location']] /= 100
    #if f'GRB{str(trigger)}' in list(outliers):
        #lc = lc.drop(index=data_problems.loc[str(trigger),'location'])
    lc = lc.loc[lc.loc[:,0].apply(lambda x: duration_data.loc[trigger,'start_T90'] <= x and x <= (duration_data.loc[trigger,'start_T90'] + duration_data.loc[trigger,'T90']))]
    lc.iloc[:,0] = pd.to_timedelta(pd.Series(lc.iloc[:,0]), unit = 's')
    lc.iloc[:,1] = pd.to_timedelta(pd.Series(lc.iloc[:,1]), unit = 's')
    lc = lc.resample('64ms', on=0).mean().pad()
    #lc = lc.resample('64ms', on=0).mean().dropna()
    lc.reset_index(drop=True,inplace=True)
    #lc = lc.iloc[:,[1,2,3,4]] / float(fluence_data.loc[trigger,'fluence'])
    lc = lc.loc[:,[2,3,4,5]] / float(fluence_data.loc[trigger,'fluence'])
    return len(lc), lc # Return length and the cut lightcurve


def prepare_lcs():
    # Go through all LightCurves in the folder BATSE and prepare them
    path = "BATSE/"
 
    unpadded_curves = []
    grbnames = []
    errors = []

    # Go through all the files
    max_len = 0 # Record longest burst
    count = 1

    error_log = ""

    for file in os.listdir(path):
        try:     
            if count % 100 == 0:
                print(f"{count} files done")
            count += 1
            length, lc = cut_norm_lc(path + file)
            if length < 1:
                error_log += f"{file[:-5]} \t Too short \n"
                continue
            unpadded_curves.append(lc)
            grbnames.append(file[:-5])
            if length > max_len:
                max_len = length
        except: # If we recieve an error we log it
            errors.append(file)
            error_log += f"{file[:-5]} \t Couldn't cut and normalize \n"
            print(f"error with {file}")
        # os.remove(path + file)
    
    # save backup for debugging purposes
    print("LightCurves normalised and cut")
    pd.to_pickle([unpadded_curves, grbnames, errors, max_len], "backup_BATSE.dat")

    # Load backup
    # (unpadded_curves, grbnames, errors, max_len) = pd.read_pickle("backup_BATSE.dat")

    prepared_lcs = []

    # Go through and pad
    count = 0
    for lc in unpadded_curves:
        temp = np.zeros(shape = (max_len, 4))
        temp[:len(lc), :] = lc
        prepared_lcs.append(temp.reshape(-1))
        count += 1

        if count % 100 == 0:
            print(f"{count} lightcurves padded")

    del unpadded_curves

    # Make to DataFrame
    prepared_dataset = pd.DataFrame(prepared_lcs)
    prepared_dataset.index = grbnames[:len(prepared_dataset)]
    prepared_dataset.index = grbnames
    rows_with_inf = prepared_dataset[prepared_dataset.apply(lambda x: any(np.isinf(x)), axis=1)]
    prepared_dataset.replace([np.inf, -np.inf], np.nan,inplace=True)
    prepared_dataset = prepared_dataset.dropna()
    prepared_dataset.to_pickle('non_fft_dataset_BATSE.dat')
    print(prepared_dataset)

     # Write errors to log
    err_file = open("Error_log_BATSE.txt", "w")
    err_file.write(error_log)
    err_file.close()

if __name__ == "__main__":
    prepare_lcs()
    
# Any following error message is probably due to missing fluence data -- nothing we can do about it

error with GRB1008.fits
error with GRB1071.fits
error with GRB1110.fits
error with GRB1118.fits
error with GRB1121.fits
error with GRB1142.fits
error with GRB1212.fits
error with GRB1297.fits
error with GRB1301.fits
error with GRB1307.fits
error with GRB1310.fits
error with GRB1311.fits
error with GRB1319.fits
error with GRB1321.fits
error with GRB1365.fits
error with GRB1379.fits
error with GRB1388.fits
error with GRB1413.fits
error with GRB142.fits
error with GRB1422.fits
100 files done
error with GRB1430.fits
error with GRB1459.fits
error with GRB1462.fits
error with GRB1465.fits
error with GRB1466.fits
error with GRB1469.fits
error with GRB1473.fits
error with GRB1479.fits
error with GRB1482.fits
error with GRB1484.fits
error with GRB1485.fits
error with GRB1489.fits
error with GRB1493.fits
error with GRB1506.fits
error with GRB1517.fits
error with GRB1524.fits
error with GRB1525.fits
error with GRB1526.fits
error with GRB1530.fits
error with GRB1544.fits
error with GRB1547.fits
er

error with GRB6229.fits
error with GRB6236.fits
error with GRB6240.fits
error with GRB6260.fits
error with GRB6262.fits
1600 files done
error with GRB6294.fits
error with GRB6296.fits
error with GRB6302.fits
error with GRB6358.fits
error with GRB6366.fits
error with GRB6382.fits
error with GRB6389.fits
error with GRB6393.fits
error with GRB640.fits
1700 files done
error with GRB6428.fits
error with GRB6432.fits
error with GRB6433.fits
error with GRB6476.fits
error with GRB6479.fits
error with GRB6491.fits
error with GRB6492.fits
error with GRB6500.fits
error with GRB6514.fits
error with GRB6516.fits
error with GRB6526.fits
error with GRB6532.fits
error with GRB6545.fits
error with GRB6563.fits
error with GRB6567.fits
error with GRB6570.fits
error with GRB6581.fits
error with GRB6599.fits
1800 files done
error with GRB6608.fits
error with GRB6640.fits
error with GRB6651.fits
error with GRB666.fits
error with GRB6680.fits
error with GRB6690.fits
error with GRB6691.fits
error with GRB6699