In [1]:
'''Swift specific but good foe inspirations'''
import numpy as np
import pandas as pd
import os

# Read files
duration_data = pd.read_pickle('DataFrames/duration_data.dat')
fluence_data = pd.read_pickle('DataFrames/fluence_data.dat')
discarded = pd.read_table('DataFrames/discarded_bursts_Swift.txt')


def cut_norm_lc(filename): #Prepare single light curve, cut to T100 and normalize by fluence
    grbname = filename[12:-7]
    #Cut lightcurve
    lc = pd.read_csv(filename, sep = ' ', header = None)
    lc = lc.loc[:, [0, 1, 3, 5, 7]]
    lc = lc.loc[lc.loc[:,0].apply(lambda x: duration_data.T100_start[grbname] <= x and x <= duration_data.T100_end[grbname])]
    lc.reset_index(drop=True,inplace=True)
    lc = lc.iloc[:,[1,2,3,4]] / float(fluence_data.fluence[grbname])
    return len(lc), lc # Return length and the cut lightcurve
    


def prepare_lcs():
    # Go through all LightCurves in the folder Light Curve and prepare them
    path = "LightCurves/"
 
    unpadded_curves = []
    grbnames = []
    errors = []

    # Go through all the files
    max_len = 0 # Record longest burst
    count = 1

    error_log = ""

    for file in os.listdir(path):
        try: 
            if count % 100 == 0:
                print(f"{count} files done")
            count += 1
            length, lc = cut_norm_lc(path + file)
            if length < 1:
                error_log += f"{file[:-7]} \t Too short \n"
                continue
            if file in list(discarded.discarded):
                error_log += f"{file[:-7]} \t Discarded \n"
                continue
            unpadded_curves.append(lc)
            grbnames.append(file[:-7])
            if length > max_len:
                max_len = length
        except: # If we recieve an error we log it
            errors.append(file)
            error_log += f"{file[:-7]} \t Couldn't cut and normalize \n"
            print(f"error with {file}")
        # os.remove(path + file)
    
    # save backup for debugging purposes
    print("LightCurves normalised and cut")
    pd.to_pickle([unpadded_curves, grbnames, errors, max_len], "backup.dat")

    # Load backup
    # (unpadded_curves, grbnames, errors, max_len) = pd.read_pickle("backup.dat")

    prepared_lcs = []

    # Go through and pad
    count = 0
    for lc in unpadded_curves:
        temp = np.zeros(shape = (max_len, 4))
        temp[:len(lc), :] = lc
        prepared_lcs.append(temp.reshape(-1))
        count += 1
        if count % 100 == 0:
            print(f"{count} lightcurves padded")

    del unpadded_curves

    # Make to DataFrame
    prepared_dataset = pd.DataFrame(prepared_lcs)
    prepared_dataset.index = grbnames[:len(prepared_dataset)]
    prepared_dataset.index = grbnames
    prepared_dataset = prepared_dataset.dropna()
    prepared_dataset.to_pickle('non_fft_dataset.dat')
    print(prepared_dataset)

     # Write errors to log
    err_file = open("Error_log.txt", "w")
    err_file.write(error_log)
    err_file.close()

if __name__ == "__main__":
    prepare_lcs()

100 files done
200 files done
300 files done
400 files done
500 files done
600 files done
700 files done
800 files done
900 files done
1000 files done
1100 files done
1200 files done
1300 files done
error with GRB211211A_lc.dat
LightCurves normalised and cut
100 lightcurves padded
200 lightcurves padded
300 lightcurves padded
400 lightcurves padded
500 lightcurves padded
600 lightcurves padded
700 lightcurves padded
800 lightcurves padded
900 lightcurves padded
1000 lightcurves padded
1100 lightcurves padded
1200 lightcurves padded
1300 lightcurves padded
                    0              1              2              3      \
GRB041217    25716.431168   25332.490276   72727.340123   17187.207667   
GRB041219C   44786.689496   29409.994404   23371.116606  -21551.961499   
GRB041220    99854.528414  209384.474307  176684.421948   45550.694780   
GRB041223     1762.756386    2897.753465    3444.927054    1583.759293   
GRB041224     5434.651316    2783.723920    2852.123422    1764.7357