# Create Swift Catalog


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# add grbttools to path
import sys

sys.path.append("../")

In [3]:
import os
import numpy as np
import pandas as pd
from grbtools.env import DIR_CATALOGS

## Methods

In [4]:
def update_cols(frame):
    cols = [col.lower().strip() for col in frame.columns]
    frame.columns = cols
    return frame


def strip_cols(frame, cols=None):
    if not cols:
        cols = frame.columns
    for col in cols:
        frame.loc[:, col] = frame.loc[:, col].str.strip()
    return frame


def set_col_type(frame, col, type):
    # first, remove N/A values
    frame.loc[:, col] = frame.loc[:, col].apply(
        lambda _x: type(_x) if not _x == "N/A" else np.nan
    )
    return frame


def read_file(file_name, skiprows):
    # create file path
    fpath = os.path.join(DIR_CATALOGS, "swift_data", file_name)
    # read data file
    frame = pd.read_csv(fpath, skiprows=skiprows, delimiter="|", skipinitialspace=True)
    # update col names
    frame = update_cols(frame)
    # update nan values in str typed columns
    if "comment" in frame.columns:
        frame.loc[:, "comment"] = frame.loc[:, "comment"].replace(np.nan, "")
    # return frame
    return frame


def save_frame(frame, file_name):
    # create file path
    fpath = os.path.join(DIR_CATALOGS, file_name)
    # save to excel
    frame.to_excel("{}.xlsx".format(fpath), index=False)


def check_dups(frame, ref_column="grbname"):
    n_obs = frame.shape[0]
    n_unique = frame[ref_column].unique().shape[0]
    print("Frame obs={}  unique={}".format(n_obs, n_unique))

    # if there are duplicates, return the indices
    if n_obs != n_unique:
        dups = frame[frame.duplicated(subset=[ref_column], keep=False)]
        indices = dups.index
        print("Duplicate entries: ", indices)
        return indices

    return None

## Read Text Files 

### summary_general.txt 

In [5]:
# read summary file
summary = read_file("summary_general.txt", skiprows=22)
# keep only grb name and t90
summary = summary[["grbname", "t90"]]
# strip columns
summary = strip_cols(summary)
# convert t90 values to numeric values
summary = set_col_type(summary, "t90", np.float32)

# print random samples
summary.sample(5)

  frame.loc[:, col] = frame.loc[:, col].apply(


Unnamed: 0,grbname,t90
1481,GRB050713B,121.695999
1492,GRB050505,58.852001
1386,GRB060510B,262.936005
226,GRB190331A,4.336
311,GRB180404B,111.708


Check duplicate entries

In [6]:
# how many obs? how many unique GRB name ?
dup_indices = check_dups(summary)
# print duplicate rows
summary.loc[dup_indices]

Frame obs=1527  unique=1525
Duplicate entries:  Int64Index([345, 346, 347, 348], dtype='int64')


Unnamed: 0,grbname,t90
345,GRB171027A,96.772003
346,GRB171027A,96.772003
347,GRB171020A,41.855999
348,GRB171020A,41.855999


Drop duplicates

In [7]:
# drop duplicates
summary.drop_duplicates(inplace=True)
assert summary["grbname"].duplicated().sum() == False

# print random samples
summary.sample(5)

Unnamed: 0,grbname,t90
867,GRB120213A,41.028
857,GRB120311B,29.688
1187,GRB080723A,17.336
1053,GRB091104,107.136002
96,GRB210209A,139.408005


### GRBlist_redshift_BAT.txt

In [8]:
# read redshift file
redshift = read_file("GRBlist_redshift_BAT.txt", skiprows=18)
# select relevant columns
redshift = redshift[["grbname", "z"]]
# strip columns
redshift = strip_cols(redshift)

# print random samples
redshift.sample(5)

Unnamed: 0,grbname,z
283,GRB090618,0.54
240,GRB101225A,0.847
121,GRB150314A,1.758
461,GRB050822,1.434
357,GRB071010B,0.947


Check duplicate entries

In [9]:
# how many obs? how many unique GRB name ?
dup_indices = check_dups(redshift)

#!! There is no duplicate GRB name in the dataframe
assert redshift["grbname"].duplicated().sum() == False

Frame obs=488  unique=488


Check redshift values

In [10]:
for idx in redshift.index:
    # get grb name
    grb_name = redshift.loc[idx, "grbname"]
    # get redshift value
    z_str = str(redshift.loc[idx, "z"])

    # default value for redshift
    z = np.nan
    # default comment for redshift
    z_comment = ""

    # first of all, remove question mark if available
    if "(?)" in z_str or "?" in z_str:
        z_str = z_str.replace("(?)", "").replace("?", "")
        z_comment = "Question mark removed. "

    # if it is a lower limit
    if "<" in z_str:
        # replace the symbol
        z_str = z_str.replace("<", "")
        # set comment
        z_comment = "Lower Limit"

        if "~" in z_str:
            z_str = z_str.replace("~", "")
            z_comment += " (approximate)"

        # set redshift value
        z = np.float32(z_str.replace("<", ""))

    # if it is in a range
    elif "-" in z_str:
        # set redshift value as the average of the range
        z1, z2 = z_str.split("-")
        z1 = np.float32(z1.strip())
        z2 = np.float32(z2.strip())
        z = (z1 + z2) / 2
        # set comment
        z_comment = "Range: {}.  Averaged.".format(z)

    # if it contains multiple values
    # in this case, we should handle it manually
    elif "or" in z_str:
        if grb_name == "GRB211211A":
            z_comment = "Waxman et al. (2022) https://arxiv.org/pdf/2206.10710.pdf"
            z = 0.459

        elif grb_name == "GRB160104A":
            z_comment = "Two values: {}.  Averaged.".format(z_str)
            z1, z2 = z_str.split("or")
            z1 = np.float32(z1.strip())
            z2 = np.float32(z2.strip())
            z = (z1 + z2) / 2

        elif grb_name == "GRB150424A":
            z_comment = "Multiple values: {}. ".format(z_str)
            z_comment += "The most decent one (Ref3: Knust et al. (2017)) is selected."
            z = 1.0

        elif grb_name == "GRB111117A":
            z_comment = "Multiple values: {}. ".format(z_str)
            z_comment += "The most decent one (Selsing et al. 2017) is selected."
            z = 2.211

        elif grb_name == "GRB061201":
            z_comment = "Multiple values: {}".format(z_str)
            z_comment += "Selected Ref: Ref 2: Berger et al. GCN Circ. 5952"
            z = 0.111

        elif grb_name == "GRB060123":
            z_comment = "Multiple values: {}".format(z_str)
            z_comment += (
                "Selected Ref: https://sites.astro.caltech.edu/grbhosts/redshifts.html"
            )
            z = 0.56

        else:
            raise Exception(
                f"Multiple values for redshift for {grb_name}. Please handle it manually."
            )

    # if none of the above, it is a single value
    else:
        # set redshift value
        z = np.float32(z_str)

    # set redshift value and comment
    redshift.loc[idx, "z"] = z
    redshift.loc[idx, "z_comment"] = z_comment

In [11]:
redshift.sample(10)

Unnamed: 0,grbname,z,z_comment
215,GRB120211A,2.4,
89,GRB160804A,0.736,
306,GRB081211B,0.216,
114,GRB150818A,0.282,
134,GRB140907A,1.21,
420,GRB060604,2.1357,
443,GRB060111A,2.32,Question mark removed.
99,GRB160131A,0.972,
304,GRB081222,2.77,
414,GRB060714,2.7108,


## Spectrum: 1-s Peak 

### t1s_best_model.txt

In [12]:
# read redshift file
t1_best = read_file("t1s_best_model.txt", skiprows=7)
# set column names
t1_best.columns = ["grbname", "trig_id", "t1s_best_model"]
# strip columns
t1_best = strip_cols(t1_best)

# drop irrelevant columns
t1_best = t1_best.drop(["trig_id"], axis=1)

# print random samples
t1_best.sample(5)

Unnamed: 0,grbname,t1s_best_model
82,GRB210318A,
1424,GRB060108,PL
1434,GRB051114,PL
267,GRB180924A,PL
1372,GRB060614,PL


Check duplicate entries

In [13]:
# how many obs? how many unique GRB name ?
dup_indices = check_dups(t1_best)

#!! There is no duplicate GRB name in the dataframe
assert t1_best["grbname"].duplicated().sum() == False

Frame obs=1524  unique=1524


### t1s_summary_pow_parameters.txt

In [14]:
t1s_pow = read_file("t1s_summary_pow_parameters.txt", skiprows=20)
# select relevant columns
t1s_pow = t1s_pow[["grbname", "alpha", "norm"]]
# rename columns
t1s_pow.columns = ["grbname", "t1s_pl_alpha", "t1s_pl_norm"]
# strip columns
t1s_pow = strip_cols(t1s_pow)

# convert numerical columns to float
t1s_pow = set_col_type(t1s_pow, "t1s_pl_alpha", np.float32)
t1s_pow = set_col_type(t1s_pow, "t1s_pl_norm", np.float32)

# print random samples
t1s_pow.sample(5)

  frame.loc[:, col] = frame.loc[:, col].apply(


Unnamed: 0,grbname,t1s_pl_alpha,t1s_pl_norm
511,GRB151122A,-1.94844,0.007062
1295,GRB070520A,-2.39475,0.001293
1322,GRB070129,-1.91253,0.003857
1156,GRB081029,-1.739,0.003318
440,GRB160912A,-1.26758,0.013562


Check duplicate entries

In [15]:
# how many obs? how many unique GRB name ?
dup_indices = check_dups(t1s_pow)
# print duplicate rows
t1s_pow.loc[dup_indices]

Frame obs=1527  unique=1525
Duplicate entries:  Int64Index([345, 346, 347, 348], dtype='int64')


Unnamed: 0,grbname,t1s_pl_alpha,t1s_pl_norm
345,GRB171027A,-1.1758,0.0259
346,GRB171027A,-1.1758,0.0259
347,GRB171020A,-1.30789,0.006114
348,GRB171020A,-1.30789,0.006114


Drop duplicates

In [16]:
# drop duplicates
t1s_pow.drop_duplicates(inplace=True)
assert t1s_pow["grbname"].duplicated().sum() == False

### t1s_summary_cutpow_parameters.txt

In [17]:
t1s_cutpow = read_file("t1s_summary_cutpow_parameters.txt", skiprows=24)
# select relevant columns
t1s_cutpow = t1s_cutpow[["grbname", "alpha", "norm", "epeak"]]
# rename columns
t1s_cutpow.columns = ["grbname", "t1s_cpl_alpha", "t1s_cpl_norm", "t1s_cpl_epeak"]
# strip columns
t1s_cutpow = strip_cols(t1s_cutpow)

# convert numerical columns to float
t1s_cutpow = set_col_type(t1s_cutpow, "t1s_cpl_alpha", np.float32)
t1s_cutpow = set_col_type(t1s_cutpow, "t1s_cpl_norm", np.float32)
t1s_cutpow = set_col_type(t1s_cutpow, "t1s_cpl_epeak", np.float32)

# print random samples
t1s_cutpow.sample(5)

  frame.loc[:, col] = frame.loc[:, col].apply(


Unnamed: 0,grbname,t1s_cpl_alpha,t1s_cpl_norm,t1s_cpl_epeak
141,GRB200716C,-0.816047,0.110176,428.291992
244,GRB190106A,-0.892926,0.089226,74.771202
851,GRB120328A,-1.18969,0.007641,204.311996
360,GRB170903A,-1.85523,0.027753,9999.360352
539,GRB150817A,-0.8487,0.091467,758.909973


Check duplicate entries

In [18]:
# how many obs? how many unique GRB name ?
dup_indices = check_dups(t1s_cutpow)
# print duplicate rows
t1s_cutpow.loc[dup_indices]

Frame obs=1527  unique=1525
Duplicate entries:  Int64Index([345, 346, 347, 348], dtype='int64')


Unnamed: 0,grbname,t1s_cpl_alpha,t1s_cpl_norm,t1s_cpl_epeak
345,GRB171027A,-0.506636,0.053876,120.228996
346,GRB171027A,-0.506636,0.053876,120.228996
347,GRB171020A,-0.116008,0.02294,82.728996
348,GRB171020A,-0.116008,0.02294,82.728996


Drop duplicates

In [19]:
# drop duplicates
t1s_cutpow.drop_duplicates(inplace=True)
assert t1s_cutpow["grbname"].duplicated().sum() == False

### Merge Dataframes

In [20]:
# create dataframe
merged_t1 = summary[["grbname"]].copy(deep=True)

# merge redshift values
merged_t1 = pd.merge(merged_t1, redshift, how="left", on="grbname")
# merge t90 values
merged_t1 = pd.merge(merged_t1, summary, how="left", on="grbname")
# merge best model
merged_t1 = pd.merge(merged_t1, t1_best, how="left", on="grbname")
# merge pow index params
merged_t1 = pd.merge(merged_t1, t1s_pow, how="left", on="grbname")
# merge cutpow index params
merged_t1 = pd.merge(merged_t1, t1s_cutpow, how="left", on="grbname")

# save dataframe
# save_frame(merged_t1, "swift_t1s")

# print dataframe
merged_t1

Unnamed: 0,grbname,z,z_comment,t90,t1s_best_model,t1s_pl_alpha,t1s_pl_norm,t1s_cpl_alpha,t1s_cpl_norm,t1s_cpl_epeak
0,GRB220715B,,,40.408001,,-0.78456,0.011339,-0.794540,0.011458,9999.360352
1,GRB220714B,,,49.040001,PL,-1.64781,0.029445,-1.448870,0.037503,131.455994
2,GRB220711B,,,87.056000,,-1.43626,0.024784,0.162297,0.137160,76.114601
3,GRB220708A,,,4.000000,PL,-1.91564,0.004836,-1.925260,0.004807,9995.040039
4,GRB220706A,,,85.996002,PL,-1.63229,0.006975,-1.643620,0.006933,9999.360352
...,...,...,...,...,...,...,...,...,...,...
1520,GRB041220,,,5.584000,PL,-1.31802,0.014645,-1.106840,0.018479,227.095993
1521,GRB041219C,,,10.000000,PL,-1.70236,0.017536,-1.275860,0.028918,85.261497
1522,GRB041219B,,,9.856000,PL,-1.34096,0.011391,-0.633827,0.025028,101.986000
1523,GRB041219A,,,,,,,,,


## Spectrum: T-100

### t100s_best_model.txt

In [21]:
# read file
t100_best = read_file("t100s_best_model.txt", skiprows=7)
# set column names
t100_best.columns = ["grbname", "trig_id", "t100s_best_model"]
# strip columns
t100_best = strip_cols(t100_best)

# drop irrelevant columns
t100_best = t100_best.drop(["trig_id"], axis=1)

# print random samples
t100_best.sample(5)

Unnamed: 0,grbname,t100s_best_model
9,GRB220518A,PL
1401,GRB060313,PL
516,GRB151027B,PL
866,GRB120211A,PL
938,GRB110312A,PL


Check duplicate entries

In [22]:
# how many obs? how many unique GRB name ?
dup_indices = check_dups(t100_best)

#!! There is no duplicate GRB name in the dataframe
assert t100_best["grbname"].duplicated().sum() == False

Frame obs=1524  unique=1524


### t100s_summary_pow_energy_fluence.txt

In [23]:
t100_pow_ef = read_file("t100s_summary_pow_energy_fluence.txt", skiprows=13)
# select relevant columns
t100_pow_ef = t100_pow_ef[["grbname", "25_50kev", "100_150kev"]]
# rename columns
t100_pow_ef.columns = [
    "grbname",
    "t100s_pl_fluence_25_50_kev",
    "t100s_pl_fluence_100_150_kev",
]
# strip columns
t100_pow_ef = strip_cols(t100_pow_ef)

# convert numerical columns to float
t100_pow_ef = set_col_type(t100_pow_ef, "t100s_pl_fluence_25_50_kev", np.float32)
t100_pow_ef = set_col_type(t100_pow_ef, "t100s_pl_fluence_100_150_kev", np.float32)

# print random samples
t100_pow_ef.sample(5)

  frame.loc[:, col] = frame.loc[:, col].apply(


Unnamed: 0,grbname,t100s_pl_fluence_25_50_kev,t100s_pl_fluence_100_150_kev
1302,GRB070420,3.587024e-06,3.859174e-06
1163,GRB081008,1.114319e-06,1.038842e-06
422,GRB161117B,1.876762e-06,9.580958e-07
769,GRB130327A,7.130107e-08,3.396222e-08
345,GRB171027A,1.877425e-06,2.142658e-06


Check duplicate entries

In [24]:
# how many obs? how many unique GRB name ?
dup_indices = check_dups(t100_pow_ef)

#!! There is no duplicate GRB name in the dataframe
assert t100_pow_ef["grbname"].duplicated().sum() == False

Frame obs=1525  unique=1525


### t100s_summary_cutpow_energy_fluence.txt

In [25]:
t100_cutpow_ef = read_file("t100s_summary_cutpow_energy_fluence.txt", skiprows=13)
# select relevant columns
t100_cutpow_ef = t100_cutpow_ef[["grbname", "25_50kev", "100_150kev"]]
# rename columns
t100_cutpow_ef.columns = [
    "grbname",
    "t100s_cpl_fluence_25_50_kev",
    "t100s_cpl_fluence_100_150_kev",
]
# strip columns
t100_cutpow_ef = strip_cols(t100_cutpow_ef)

# convert numerical columns to float
t100_cutpow_ef = set_col_type(t100_cutpow_ef, "t100s_cpl_fluence_25_50_kev", np.float32)
t100_cutpow_ef = set_col_type(
    t100_cutpow_ef, "t100s_cpl_fluence_100_150_kev", np.float32
)

# print random samples
t100_cutpow_ef.sample(5)

  frame.loc[:, col] = frame.loc[:, col].apply(


Unnamed: 0,grbname,t100s_cpl_fluence_25_50_kev,t100s_cpl_fluence_100_150_kev
582,GRB150204A,2.292627e-07,2.165628e-07
934,GRB110319B,3.108481e-07,3.968883e-07
1319,GRB070208,1.607126e-07,8.008109e-08
1171,GRB080913,1.277029e-07,1.530596e-07
1406,GRB060223A,1.948969e-07,1.132518e-07


Check duplicate entries

In [26]:
# how many obs? how many unique GRB name ?
dup_indices = check_dups(t100_cutpow_ef)

#!! There is no duplicate GRB name in the dataframe
assert t100_cutpow_ef["grbname"].duplicated().sum() == False

Frame obs=1525  unique=1525


### Merge Dataframes

In [27]:
merged_t100 = summary[["grbname"]].copy(deep=True)

# merge redshift values
merged_t100 = pd.merge(merged_t100, redshift, how="left", on="grbname")
# merge t90 values
merged_t100 = pd.merge(merged_t100, summary, how="left", on="grbname")
# merge best model
merged_t100 = pd.merge(merged_t100, t100_best, how="left", on="grbname")
# merge pow index params
merged_t100 = pd.merge(merged_t100, t100_pow_ef, how="left", on="grbname")
# merge cutpow index params
merged_t100 = pd.merge(merged_t100, t100_cutpow_ef, how="left", on="grbname")

# compute hardness
merged_t100["t100s_pl_hardness"] = (
    merged_t100["t100s_pl_fluence_100_150_kev"]
    / merged_t100["t100s_pl_fluence_25_50_kev"]
)
merged_t100["t100s_cpl_hardness"] = (
    merged_t100["t100s_cpl_fluence_100_150_kev"]
    / merged_t100["t100s_cpl_fluence_25_50_kev"]
)

# save dataframe
# save_frame(merged_t100, "swift_t100s")

# print dataframe
merged_t100

Unnamed: 0,grbname,z,z_comment,t90,t100s_best_model,t100s_pl_fluence_25_50_kev,t100s_pl_fluence_100_150_kev,t100s_cpl_fluence_25_50_kev,t100s_cpl_fluence_100_150_kev,t100s_pl_hardness,t100s_cpl_hardness
0,GRB220715B,,,40.408001,,2.745653e-07,3.548589e-07,2.841552e-07,3.089055e-07,1.292439,1.087101
1,GRB220714B,,,49.040001,PL,1.304739e-06,8.542476e-07,1.301649e-06,8.842270e-07,0.654727,0.679313
2,GRB220711B,,,87.056000,PL,1.585870e-06,1.284126e-06,1.624006e-06,1.146457e-06,0.809730,0.705944
3,GRB220708A,,,4.000000,PL,3.786866e-08,1.671822e-08,4.478472e-08,4.349708e-09,0.441479,0.097125
4,GRB220706A,,,85.996002,PL,3.715433e-07,2.936091e-07,3.685780e-07,3.140360e-07,0.790242,0.852020
...,...,...,...,...,...,...,...,...,...,...,...
1520,GRB041220,,,5.584000,PL,9.797765e-08,8.605123e-08,9.951924e-08,8.014509e-08,0.878274,0.805323
1521,GRB041219C,,,10.000000,PL,3.452719e-07,2.003278e-07,,,0.580203,
1522,GRB041219B,,,9.856000,PL,8.485209e-08,4.795263e-08,8.094358e-08,3.807786e-08,0.565132,0.470425
1523,GRB041219A,,,,,,,,,,


## Create SWIFT Catalog

In [28]:
from grbtools.swift.hardness import calculate_hardness
from grbtools.swift.luminosity import calculate_luminosity

# calculate hardness
df_swift_t100 = calculate_hardness(merged_t100)

# calculate luminosity
df_swift_t1 = calculate_luminosity(merged_t1)

# drop z, z_comment and t90 columns from df_swift_t100
df_swift_t100.drop(["z", "z_comment", "t90"], axis=1, inplace=True)

# merge dataframes
df_swift = pd.merge(df_swift_t1, df_swift_t100, on="grbname", how="left")

# rename first column
df_swift.rename(columns={"grbname": "name"}, inplace=True)

# save dataframe
save_frame(df_swift, "swift_catalog")

# display dataframe
df_swift

Unnamed: 0,name,z,z_comment,t90,t1s_best_model,t1s_pl_alpha,t1s_pl_norm,t1s_cpl_alpha,t1s_cpl_norm,t1s_cpl_epeak,...,flux,model_used,t100s_best_model,t100s_pl_fluence_25_50_kev,t100s_pl_fluence_100_150_kev,t100s_cpl_fluence_25_50_kev,t100s_cpl_fluence_100_150_kev,t100s_pl_hardness,t100s_cpl_hardness,hardness_ratio
0,GRB220715B,,,40.408001,,-0.78456,0.011339,-0.794540,0.011458,9999.360352,...,,,,2.745653e-07,3.548589e-07,2.841552e-07,3.089055e-07,1.292439,1.087101,1.292439
1,GRB220714B,,,49.040001,PL,-1.64781,0.029445,-1.448870,0.037503,131.455994,...,,,PL,1.304739e-06,8.542476e-07,1.301649e-06,8.842270e-07,0.654727,0.679313,0.654727
2,GRB220711B,,,87.056000,,-1.43626,0.024784,0.162297,0.137160,76.114601,...,,,PL,1.585870e-06,1.284126e-06,1.624006e-06,1.146457e-06,0.809730,0.705944,0.809730
3,GRB220708A,,,4.000000,PL,-1.91564,0.004836,-1.925260,0.004807,9995.040039,...,,,PL,3.786866e-08,1.671822e-08,4.478472e-08,4.349708e-09,0.441479,0.097125,0.441479
4,GRB220706A,,,85.996002,PL,-1.63229,0.006975,-1.643620,0.006933,9999.360352,...,,,PL,3.715433e-07,2.936091e-07,3.685780e-07,3.140360e-07,0.790242,0.852020,0.790242
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1520,GRB041220,,,5.584000,PL,-1.31802,0.014645,-1.106840,0.018479,227.095993,...,,,PL,9.797765e-08,8.605123e-08,9.951924e-08,8.014509e-08,0.878274,0.805323,0.878274
1521,GRB041219C,,,10.000000,PL,-1.70236,0.017536,-1.275860,0.028918,85.261497,...,,,PL,3.452719e-07,2.003278e-07,,,0.580203,,0.580203
1522,GRB041219B,,,9.856000,PL,-1.34096,0.011391,-0.633827,0.025028,101.986000,...,,,PL,8.485209e-08,4.795263e-08,8.094358e-08,3.807786e-08,0.565132,0.470425,0.565132
1523,GRB041219A,,,,,,,,,,...,,,,,,,,,,
