# Catalog generators

**Caden Gobat**, The George Washington University

In [2]:
import pandas as pd, numpy as np, requests
from bs4 import BeautifulSoup as bs
from code.utilities import split_filters
from code.xrt import XRT_lightcurve, get_photonIndex, get_temporalIndex, get_columnDensity, grb_list

alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
alpha += alpha.lower()
numeric = ".0123456789"

## Short GRB sample

In [3]:
swift = pd.read_html("https://swift.gsfc.nasa.gov/archive/grb_table/fullview/")[0] # get latest Swift catalog
swift.columns = [col[0] for col in swift.columns] # reduce/flatten MultiIndex

In [4]:
for grb in swift.loc[swift["TriggerNumber"].apply(pd.to_numeric,errors="coerce").isna(),"GRB"]: # non-Swift bursts
    bat_data = pd.read_html(f"https://swift.gsfc.nasa.gov/archive/grb_table/fullview/{grb}/")[0]
    T90 = bat_data.loc[bat_data[0]=="T90: c",1].values # get the T90 anyway, if it exists
    swift.loc[swift["GRB"]==grb,"BAT T90[sec]"] = pd.to_numeric(T90, errors="coerce")

In [5]:
grb_list = pd.read_table("https://www.swift.ac.uk/xrt_curves/grb.list",
                         sep=" |\t",header=None,engine="python",
                         names=["_","GRB","TriggerNumber"]).drop("_",axis=1)

for i,row in swift.iterrows():
    print()
    print(row["GRB"],end=": ")
    print(row["BAT T90[sec]"],end=" ")
    try:
        trig = int(row["TriggerNumber"])
    except ValueError:
        if row["GRB"] in grb_list["GRB"]:
            trig = int(grb_list.loc[grb_list["GRB"]==row["GRB"],"TriggerNumber"])
        else:
            real_t90 = np.nan
            continue
    url = f"https://gcn.gsfc.nasa.gov/notices_s/{trig}/BA/"
    try:
        page = requests.get(url)
        soup = bs(page.content,"html.parser")
        lines = soup.find("pre").text.split("\n")
        t90_line = [line.strip() for line in lines if "T90" in line]
        assert len(t90_line)>0
        real_t90 = pd.to_numeric(t90_line[0].split()[1])
        
    except:
        real_t90 = np.nan
        continue
    swift.loc[i,"T90"] = round(real_t90,3)

for i,row in swift.iterrows():
    if pd.notna(row["T90"]):
        print(row["T90"])
        continue
    else:
        try:
            swift.loc[i,"T90"] = pd.to_numeric(swift.loc[i,"BAT T90[sec]"],errors="coerce")
        except:
            pass


220412B: 0.14 
220412A: 41.66 
220408A: 17.25 
220404A: 2.54 
220403B: 27.0 
220325A: 3.50 
220319A: 6.44 
220306B: 12.34 
220305A: 21.12 
220302A: nan 
220219B: nan 
220218A: nan 
220210A: nan 
220118A: 10.61 
220117C: 8.55 
220117B: 24.37 
220117A: 49.81 
220107B: nan 
220107A: nan 
220101A: 173.36 
211229A: nan 
211227A: 83.79 
211225B: 121.54 
211223C: 15.00 
211221A: 671.67 
211211A: 51.37 
211207A: 3.73 
211129A: 113.01 
211107B: nan 
211106A: 1.75 
211025A: 103.5 
211024B: 603.5 
211024A: nan 
211023B: 1.30 
210930A: 11.81 
210928A: nan 
210919A: 0.16 
210912A: 28.35 
210905A: nan 
210901A: 46.77 
210827A: nan 
210824A: 37.55 
210822A: 180.8 
210820A: 196.8 
210818A: 73.56 
210807C: 89.97 
210807A: 156.30 
210802A: nan 
210731A: 22.51 
210730A: 3.86 
210726A: 0.39 
210725B: 48.00 
210725A: 53.54 
210724A: 50.57 
210723A: 48.54 
210722A: 50.20 
210712A: 136.41 
210708B: 298.43 
210708A: 2.77 
210706A: nan 
210704A: nan 
210702A: 138.2 
210626A: nan 
210622A: nan 
210619B: 60.90 

101030A: 92.0 
101024A: 18.7 
101023A: 80.8 
101020A: 175.0 
101017A: 70.0 
101011A: 71.5 
101008A: 104 
100928A: 3.3 
100924A: 96.0 
100917A: 66 
100915A: 200 
100909A: nan 
100906A: 114.4 
100905A: 3.4 
100904A: 31.3 
100902A: 428.8 
100901A: 439 
100823A: 16.9 
100816A: 2.9 
100814A: 174.5 
100807A: 7.9 
100805A: 15.0 
100802A: 487 
100728B: 12.1 
100728A: 198.5 
100727A: 84 
100725B: 200 
100725A: 141 
100724A: 1.4 
100719A: 36.0 
100713A: nan 
100704A: 197.5 
100702A: 0.16 
100628A: 0.036 
100625A: 0.33 
100621A: 63.6 
100619A: 97.5 
100615A: 39 
100614A: 225 
100606A: 480 
100528A: nan 
100526B: 64.0 
100526A: 102 
100522A: 35.3 
100518A: nan 
100514A: nan 
100513A: 84 
100508A: 52 
100504A: 97.3 
100427A: nan 
100425A: 37.0 
100424A: 104 
100423A: nan 
100420A: 48 
100418A: 7.0 
100414A: nan 
100413B: nan 
100413A: 191 
100401A: nan 
100331B: nan 
100324A: nan 
100316D: nan 
100316C: 9.3 
100316B: 3.8 
100316A: 7.0 
100305A: 69.7 
100302A: 17.9 
100224A: nan 
100219A: 18.8 
1002

050713A: 124.700 
050712: 51.600 
050701: 21.800 
050607: 26.400 
050603: 12.400 
050528: 11.300 
050525A: 8.800 
050522: nan 
050520: nan 
050509B: 0.073 
050509A: 11.400 
050507: >15 
050505: 58.900 
050504: nan 
050502B: 17.700 
050422: 59.300 
050421: 15.000 
050418: 82.300 
050416B: 3.400 
050416A: 2.500 
050412: 26.500 
050410: 42.500 
050408: nan 
050406: 5.400 
050401: 33.300 
050326: 29.300 
050319: 152.500 
050318: 32 
050315: 95.600 
050306: 158.300 
050223: 22.500 
050219B: 30.700 
050219A: 23.700 
050215B: 8.100 
050215A: 87.300 
050202: 0.270 
050128: 19.200 
050126: 24.800 
050124: 4.000 
050117: 166.600 
041228: 55.400 
041226: 89.700 
041224: 177.200 
041223: 109.100 
041220: 5.600 
041219C: 4.8 
041219B: 30 
041219A: 520 
041217: 5.8 0.14
41.66
17.248
2.536
26.992
3.504
6.44
12.336
21.12
6.836
270.224
10.608
24.368
51.744
173.672
83.792
121.544
15.0
671.668
50.324
3.728
113.012
103.512
603.476
1.296
11.812
0.164
778.08
46.768
90.0
180.784
196.824
73.56
89.972
156.304


In [12]:
XRT_obs = pd.read_csv("./products/all_XRT_observations.csv")["GRB"].tolist()

GCN_flagged = ['040924', '051227', '051221', '051211', '051210', '051114', '051105', '051103',
               '050925', '050815', '050813', '050724', '050709', '050603', '050509', '061217',
               '061210', '061201', '061021', '061006', '060912', '060801', '060717', '060502',
               '060429', '060427', '060313', '060121', '071227', '071112', '071017', '070923',
               '070810', '070809', '070729', '070724', '070714', '070707', '070610', '070429',
               '070406', '070209', '070208', '070201', '070124', '081226B', '081226A', '081223',
               '081216', '081211B', '081211', '081105', '081024B', '081024A', '080919', '080913',
               '080905', '080503', '080426', '080413', '080123', '080121', '091126B', '091126A',
               '091117A', '091109B', '090929A', '090927A', '090916A', '090831A', '090715A', '090621B',
               '090621A', '090607', '090531B', '090515', '090510', '090426', '090423', '090417A',
               '101224A', '101219A', '101129A', '100816A', '100724A', '100703A', '100702A', '100628A',
               '100625A', '100216A', '100213A', '100206A', '100117A', '111222A', '111121A', '111117A',
               '111026A', '111020A', '110802A', '110715A', '110420B', '110402A', '110112B', '110112A',
               '110106A', '121226A', '120830A', '120817B', '120811B', '120804A', '120630A', '120521A',
               '120403A', '120305A', '120229A', '131224A', '131126A', '131125A', '131004A', '131002A',
               '130912A', '130822A', '130716A', '130626A', '130603B', '130515A', '130313A', '141212A',
               '141205A', '141202A', '141102A', '140930B', '140903A', '140831A', '140622A', '140619B',
               '140611A', '140606A', '140604A', '140516A', '140428B', '140414A', '140320A', '140209A',
               '140129B', '151229A', '151228A', '151221A', '151127A', '150922A', '150906B', '150831A',
               '150728A', '150710A', '150424A', '150423A', '150301A', '150120A', '150118C', '150101A',
               '161129A', '161104A', '161004A', '161001A', '160927A', '160829A', '160825A', '160822A',
               '160821B', '160820A', '160714A', '160709A', '160624A', '160620A', '160612A', '160601A',
               '160425A', '160411A', '160410A', '160408A', '160406A', '160307A', '160303A', '160228A',
               '160219A', '160111A', '171223A', '171211A', '171106A', '171103A', '171030A', '171007A',
               '170921B', '170827B', '170827A', '170826A', '170825A', '170822A', '170817A', '170816A',
               '170805B', '170805A', '170728B', '170728A', '170708A', '170616A', '170524A', '170428A',
               '170403A', '170325A', '170222A', '170220A', '170219A', '170206A', '170127C', '170127B',
               '170112A', '181225A', '181222B', '181126B', '181126A', '181123B', '181121A', '180824A',
               '180805B', '180728B', '180727A', '180718A', '180716A', '180715B', '180715A', '180703B',
               '180626C', '180618A', '180529A', '180523B', '180418A', '180402A', '180317A', '180204A',
               '191221A', '191203A', '191116A', '191101B', '191031D', '191031C', '191017C', '190913A',
               '190903A', '190831B', '190830B', '190813A', '190810A', '190724A', '190719C', '190630A',
               '190627A', '190626B', '190610A', '190606A', '190427A', '190331C', '190326A', '190206A',
               '190121A', '211227A', '211225C', '211224A', '211222A', '211221A', '211207A', '211203A',
               '211124A', '211106A', '211104A', '211031A', '211024A', '211023B', '210929A', '210927A',
               '210924A', '210923A', '210919B', '210919A', '210909A', '210904A', '210903B', '210822B',
               '210727A', '210726A', '210725B', '210708A', '210707A', '210704A', '210622A', '210621A',
               '210619A', '210618A', '210605A', '210601A', '210529B', '210529A', '210528A', '210510A',
               '210506A', '210424B', '210425A', '210421C', '210421B', '210413B', '210410A', '210326A',
               '210323A', '210307B', '210217A', '210205B', '210124B', '210119A', '201227A', '201222A',
               '201221D', '201221B', '201221A', '201214B', '201130A', '201111A', '201109A', '201108A',
               '201103A', '201015A', '201010A', '201006A', '200928A', '200923A', '200920B', '200920A',
               '200916B', '200917A', '200908A', '200907B', '200907A', '200903C', '200826A', '200824A',
               '200815A', '200817A', '200805A', '200729A', '200718A', '200716C', '200714B', '200710A',
               '200704A', '200706A', '200703A', '200701A', '200626A', '200623B', '200623A', '200605A',
               '200522A', '200521A', '200517A', '200514B', '200512A', '200509B', '200506B', '200501A',
               '200423A', '200420A', '200415A', '200411A', '200409A', '200405B', '200401A', '200327A',
               '200325A', '200313B', '200308A', '200307A', '200306B', '200224C', '200221A', '200219A',
               '200212A', '200203A', '200129A', '200128B', '200128A', '200103A'] # flagged GCNs

ruled_out = ["050603","050815","051227","060717","061021","070208","080426","100724A","110715A",
             "131002A","140129B","140209A","160228A","191031C","201221A"]

GCN_flagged = np.setdiff1d(np.intersect1d(GCN_flagged,XRT_obs), ruled_out)

class_tbl = pd.read_csv("./data/Jespersen_Table1.csv")
jesp = [name[3:] for name in class_tbl.loc[class_tbl["Class"]=="S","GRB"]]

published = ['050202', '050509B', '050709', '050724A', '050813', '050906', '050925',
             '051210', '051221A', '060121', '060313', '060502B', '060801', '061006',
             '061201', '061210', '061217', '070209', '070406', '070429B', '070707',
             '070714B', '070724A', '070729', '070809', '070810B', '071017', '071112B',
             '071227', '080121', '080123', '080426', '080503', '080702A', '080905A',
             '080919', '081024A', '081024B', '081226A', '081226B', '090305', '090305A',
             '090426', '090426A', '090510', '090515', '090607', '090621B', '090916',
             '091109B', '091117', '100117A', '100206A', '100213', '100625A', '100628A',
             '100702A', '101219A', '101224A', '110112A', '110112B', '110420B', '111020A',
             '111117A', '111121A', '111222A', '120229A', '120305A', '120521A', '120630A',
             '120804A', '120817B', '121226A', '130313A', '130515A', '130603B', '130626A',
             '130716A', '130822A', '130912A', '131004A', '131125A', '131126A', '131224A',
             '140129B', '140320A', '140402A', '140414A', '140516A', '140606A', '140619B',
             '140622A', '140903A', '140930B', '141202A', '141205A', '141212A', '150101A',
             '150101B', '150120A', '150301A'] # Fong, et al.

published += ['130313A', '130822A', '130912A', '140903A', '141212A', '150120A', '150423A',
          '150831A', '160303A', '160408A', '160410A', '160411A', '160601A', '160612A',
          '160624A', '161001A', '170112A', '170127B', '170428A', '170524A', '180715A',
          '180718A', '180727A', '180805B', '181126A', '190427A', '191031D', '200623A',
          '201221D'] # Rastinejad, et al.

In [16]:
sGRBs = swift[(swift["BAT T90[sec]"].apply(pd.to_numeric, errors="coerce") <= 2) | swift["GRB"].isin(GCN_flagged) | swift["GRB"].isin(published)].copy() # filter catalog for GRBs with a valid T90 that is <2s

In [17]:
# format columns
sGRBs.drop(['Time[UT]', 'BAT RA(J2000)', 'BAT Dec(J2000)', 'BAT 90%Error Radius[arcmin]',
            'BAT Fluence(15-150 keV)[10-7 erg/cm2]', 'BAT Fluence90% Error(15-150 keV)[10-7 erg/cm2]',
            'BAT 1-sec PeakPhoton Flux(15-150 keV)[ph/cm2/sec]', 'BAT 1-sec PeakPhoton Flux90% Error(15-150 keV)[ph/cm2/sec]',
            'BAT Photon Index(15-150 keV)(PL = simple power-law,CPL = cutoff power-law)',
            'BAT Photon Index90% Error(15-150 keV)', 'XRT 90%Error Radius[arcsec]', 'XRT Column Density(NH)[1021 cm-2]',
            'XRT Early Flux(0.3-10 keV)[10-11 erg/cm2/s]', 'XRT 11 Hour Flux(0.3-10 keV)[10-11 erg/cm2/s]',
            'XRT 24 Hour Flux(0.3-10 keV)[10-11 erg/cm2/s]', 'UVOT RA(J2000)', 'UVOT Dec(J2000)', 'UVOT 90%Error Radius[arcsec]',
            'Host Galaxy', 'Comments', 'References', 'Burst Advocate'],axis=1,inplace=True) # already in master catalog
sGRBs.rename(columns={'XRT Time to FirstObservation[sec]':'XRT dt [sec]','XRT InitialTemporalIndex':"α",
                      'XRTSpectral Index(Gamma)':'Γ','UVOT Time toFirst Observation[sec]':'UVOT dt [sec]',
                      'UVOT Other FilterMagnitudes':'Other UVOT Filters',"XRT RA(J2000)":"RA","XRT Dec(J2000)":"Dec"},inplace=True)

In [18]:
optical_obs = []
for idx,entry in sGRBs["Other UVOT Filters"].iteritems():
    if pd.isna(entry):
        entries = []
    else:
        entries = split_filters(entry)
    try:
        filterdict = dict([entry.split(">") if ">" in entry else entry.split("=") if "=" in entry else None for entry in entries])
        filterdict["idx"] = idx
        optical_obs.append(filterdict)
    except:
        print("error",idx,end="; ")

for obs in optical_obs:
    sGRBs.loc[obs["idx"], "Other UVOT Filters"] = [obs.copy()]

error 48; error 62; error 98; error 180; error 247; error 337; error 391; error 415; error 465; error 644; error 685; error 762; error 844; error 1170; error 1172; error 1239; error 1253; error 1406; error 1598; 

In [19]:
sGRBs["Redshift"] = ["".join([char for char in entry if char in numeric]) if type(entry) is str else entry for entry in sGRBs["Redshift"]]

## *Swift*-XRT lightcurves

In [None]:
xrt_data = pd.DataFrame(columns=['GRB', 'Time', 'Tpos', 'Tneg', 'Flux', 'Fluxpos', 'Fluxneg'])
for i,row in sGRBs.iterrows():
    GRB_ID = row["GRB"]
    print(GRB_ID+" "*(7-len(GRB_ID)),end=": ")
    mode = None
    try:
        Gamma,mode = get_photonIndex(GRB_ID,grb_list)
        sGRBs.loc[i,"Beta_X"] = Gamma.value - 1
        sGRBs.loc[i,"Beta_X_neg"] = Gamma.minus
        sGRBs.loc[i,"Beta_X_pos"] = Gamma.plus
        print("index ✓",end=", ")
    except:
        print("index ✗",end=", ")
    try:
        fluxdata = XRT_lightcurve(GRB_ID,grb_list)
        xrt_data = xrt_data.append(fluxdata,ignore_index=True)
        print("lightcurve ✓",end=" ")
    except:
        print("lightcurve ✗",end=" ")
    if mode=="WT":
        print("(used WT spectrum)")
    else:
        print()

# account for upper limits
xrt_data.loc[xrt_data["Fluxneg"]==0, "Fluxneg"] = np.inf

In [None]:
sGRBs.to_csv("./products/Swift_sGRB_catalog.csv",index=False)
xrt_data.to_csv("./products/Swift_XRT_lightcurves.csv",index=False)