In [1]:
'''Swift specific but good foe inspirations'''
import numpy as np
import pandas as pd
from astropy.io import fits
import os

# Read files
duration_data = pd.read_csv('DataFrames/duration_data_Fermi.csv', index_col=0)
fluence_data = pd.read_csv('DataFrames/fluence_data_Fermi.csv', index_col=0)
#fluence_data.loc['GRB200826187','fluence'] = '4.8E-06'

def normalize_lc(filename): # Normalize by fluence
    grbname = filename[8:-4]
    #Cut lightcurve
    lc = pd.DataFrame(fits.open(filename)[2].data.field('NRGFLUX'))
    lc = lc.squeeze() / float(fluence_data.loc[f'GRB{grbname}','fluence'])
    return len(lc), lc # Return length and the normalized lightcurve
    


def prepare_lcs():
    # Go through all LightCurves in the folder Fermi and prepare them
    path = "Fermi/"
 
    unpadded_curves = []
    grbnames = []
    errors = []

    # Go through all the files
    max_len = 0 # Record longest burst
    count = 1

    error_log = ""

    for file in os.listdir(path):
        try: 
            if count % 100 == 0:
                print(f"{count} files done")
            count += 1
            length, lc = normalize_lc(path + file)
            if lc.isnull().values.any():
                error_log += f"GRB{file[2:-4]} \t Missing data \n"
                continue
            unpadded_curves.append(lc)
            grbnames.append(f"GRB{file[2:-4]}")
            if length > max_len:
                max_len = length
        except: # If we recieve an error we log it
            errors.append(file)
            error_log += f"GRB{file[2:-4]} \t Couldn't cut and normalize \n"
            print(f"error with {file}")
        # os.remove(path + file)
    
    # save backup for debugging purposes
    print("LightCurves normalised and cut")
    pd.to_pickle([unpadded_curves, grbnames, errors, max_len], "backup_Fermi.dat")

    # Load backup
    # (unpadded_curves, grbnames, errors, max_len) = pd.read_pickle("backup_Fermi.dat")

    prepared_lcs = []

    # Go through and pad
    count = 0
    for lc in unpadded_curves:
        temp = np.zeros(shape = (max_len, 2))
        temp[:len(lc), :] = lc
        prepared_lcs.append(temp.reshape(-1))
        count += 1
        if count % 100 == 0:
            print(f"{count} lightcurves padded")

    del unpadded_curves

    # Make to DataFrame
    prepared_dataset = pd.DataFrame(prepared_lcs)
    prepared_dataset.index = grbnames[:len(prepared_dataset)]
    prepared_dataset.index = grbnames
    prepared_dataset.to_pickle('non_fft_dataset_Fermi.dat')
    print(prepared_dataset)

     # Write errors to log
    err_file = open("Error_log_Fermi.txt", "w")
    err_file.write(error_log)
    err_file.close()

if __name__ == "__main__":
    pass
    prepare_lcs()

100 files done
200 files done
300 files done
400 files done
500 files done
600 files done
700 files done
800 files done
900 files done
1000 files done
1100 files done
1200 files done
1300 files done
1400 files done
1500 files done
1600 files done
1700 files done
1800 files done
1900 files done
2000 files done
2100 files done
2200 files done
2300 files done
2400 files done
2500 files done
2600 files done
2700 files done
2800 files done
2900 files done
3000 files done
3100 files done
LightCurves normalised and cut
100 lightcurves padded
200 lightcurves padded
300 lightcurves padded
400 lightcurves padded
500 lightcurves padded
600 lightcurves padded
700 lightcurves padded
800 lightcurves padded
900 lightcurves padded
1000 lightcurves padded
1100 lightcurves padded
1200 lightcurves padded
1300 lightcurves padded
1400 lightcurves padded
1500 lightcurves padded
1600 lightcurves padded
1700 lightcurves padded
1800 lightcurves padded
1900 lightcurves padded
2000 lightcurves padded
2100 lightc