In [1]:
'''Swift specific but good foe inspirations'''
import numpy as np
import pandas as pd
from astropy.io import fits
from sklearn.preprocessing import StandardScaler
from math import ceil, floor
import os

# Read files
duration_data = pd.read_pickle("DataFrames/duration_data_BATSE.dat")
fluence_data = pd.read_pickle("DataFrames/fluence_data_BATSE.dat")

def cut_norm_lc(filename): #Prepare single light curve, cut to T90 and normalize by fluence
    trigger = int(filename[6:-4])
    #Cut lightcurve
    file = pd.read_csv(filename, skiprows=1, header=None, sep='\s+')
    times = pd.DataFrame(fits.open(f'BATSE/GRB{trigger}.fits')[2].data.field('TIMES'))
    header = pd.Series(data=file.loc[0].values, index=['trig#', 'npts', 'nlasc', '1preb'], dtype='int64')
    trig_time = header['nlasc'] + 32
    lc = file.drop(0).reset_index(drop=True)
    for i in range(4):
        b1 = lc.iloc[0,i]
        b2 = lc.iloc[-1,i]
        lc.iloc[:,i] = lc.iloc[:,i] - [b1 + (b2-b1)/(len(lc)-1)*i for i in range(len(lc))]
    start = trig_time + floor(max([times.min().min(), duration_data.loc[trigger,'start_T90']])/0.064)
    end = trig_time + ceil(min([times.max().max(), duration_data.loc[trigger,'start_T90']+duration_data.loc[trigger,'T90']])/0.064) - 1
    lc = lc.loc[start:end]
    #lc = lc.loc[(trig_time + duration_data.loc[trigger,'start_T90']/0.064):(trig_time + (duration_data.loc[trigger,'start_T90'] + duration_data.loc[trigger,'T90'])/0.064 - 1)]
    lc.reset_index(drop=True,inplace=True)
    lc = lc.iloc[:,[0,1,2,3]] / float(fluence_data.loc[trigger,'fluence'])
    return len(lc), lc # Return length and the cut lightcurve


def prepare_lcs():
    # Go through all LightCurves in the folder BATSE and prepare them
    path = "LC/"
 
    unpadded_curves = []
    grbnames = []
    errors = []

    # Go through all the files
    max_len = 0 # Record longest burst
    count = 1

    error_log = ""

    for file in os.listdir(path):
        try:     
            if count % 100 == 0:
                print(f"{count} files done")
            count += 1
            length, lc = cut_norm_lc(path + file)
            if length < 1:
                error_log += f"{file[:-4]} \t Too short \n"
                continue
            unpadded_curves.append(lc)
            grbnames.append(file[3:-4])
            if length > max_len:
                max_len = length
        except Exception as e: # If we recieve an error we log it
            errors.append(file)
            error_log += f"{file[:-4]} \t {e} \n"
            print(f"error with {file}")
        # os.remove(path + file)
    
    # save backup for debugging purposes
    print("LightCurves normalised and cut")
    pd.to_pickle([unpadded_curves, grbnames, errors, max_len], "backup_BATSE_LC.dat")

    # Load backup
    # (unpadded_curves, grbnames, errors, max_len) = pd.read_pickle("backup_BATSE_LC.dat")

    prepared_lcs = []

    # Go through and pad
    count = 0
    for lc in unpadded_curves:
        temp = np.zeros(shape = (max_len, 4))
        temp[:len(lc), :] = lc
        prepared_lcs.append(temp.reshape(-1))
        count += 1

        if count % 100 == 0:
            print(f"{count} lightcurves padded")

    del unpadded_curves

    # Make to DataFrame
    prepared_dataset = pd.DataFrame(prepared_lcs)
    prepared_dataset.index = grbnames[:len(prepared_dataset)]
    prepared_dataset.index = grbnames
    rows_with_inf = prepared_dataset[prepared_dataset.apply(lambda x: any(np.isinf(x)), axis=1)]
    prepared_dataset.replace([np.inf, -np.inf], np.nan, inplace=True)
    prepared_dataset = prepared_dataset.dropna()
    prepared_dataset.to_pickle('non_fft_dataset_BATSE_LC.dat')
    print(prepared_dataset)

     # Write errors to log
    err_file = open("Error_log_BATSE_LC.txt", "w")
    err_file.write(error_log)
    err_file.close()

if __name__ == "__main__":
    prepare_lcs()
    
# Any following error message is probably due to missing fluence data -- nothing we can do about it

error with basic_table.csv
error with duration_table.csv
error with GRB1200.csv
100 files done
error with GRB1518.csv
error with GRB1533.csv
error with GRB1540.csv
error with GRB1541.csv
error with GRB1546.csv
error with GRB1553.csv
error with GRB1558.csv
error with GRB1561.csv
error with GRB1567.csv
error with GRB1586.csv
error with GRB1588.csv
error with GRB1590.csv
200 files done
error with GRB2213.csv
300 files done
400 files done
error with GRB2463.csv
error with GRB2504.csv
error with GRB2513.csv
error with GRB2529.csv
error with GRB2536.csv
error with GRB254.csv
error with GRB2542.csv
error with GRB2551.csv
500 files done
600 files done
error with GRB3099.csv
700 files done
error with GRB3251.csv
error with GRB3253.csv
error with GRB3431.csv
800 files done
error with GRB3580.csv
error with GRB3709.csv
error with GRB3711.csv
error with GRB3803.csv
900 files done
error with GRB3911.csv
error with GRB3915.csv
error with GRB3938.csv
1000 files done
error with GRB5496.csv
1100 files 