In [1]:
from burstextractor.burstlist import download_burst_list, process_burst_list, download_burst_data
from burstextractor.timeutils import extract_time, fix_typos_in_time, fix_24_hour_time, create_datetime, check_valid_date, adjust_year_month
from burstextractor.data_utils import explode_instruments_long_clean_instruments, keep_only_type_I_to_VI
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt 

## Create Dataframe with the Burstlists

In [2]:
burst_list = download_burst_data([2021, 2022, 2023], months=range(1, 13), folder="ecallisto_files")

  data = pd.read_csv(
  data = pd.read_csv(


In [3]:
burst_list

Unnamed: 0,date,time,type,instruments
0,20210119,02:42-02:42,III,Australia-ASSA
1,20210120,12:37-12:37,III,"AUSTRIA-UNIGRAZ, [HUMAIN], MRT1, SOUTHAFRICA-S..."
2,20210127,04:32-04:32,III,"Australia-ASSA, INDIA-GAURI, SOUTHAFRICA-SANSA"
3,20210127,09:27-09:27,III,"AUSTRIA-UNIGRAZ, INDIA-GAURI, INDIA-OOTY, MRT1..."
4,20210218,18:04-18:04,III,"GREENLAND, MEXART, ROSWELL-NM"
...,...,...,...,...
5278,20230219,16:50-16:59,III,"Arecibo-Observatory, MEXICO-LANCE"
5279,20230219,17:31-17:32,III,"Arecibo-Observatory, MEXICO-LANCE"
5280,20230219,17:56-17:56,III,"Arecibo-Observatory, MEXICO-LANCE, PARAGUAY, R..."
5281,20230219,19:23-19:24,III,"Arecibo-Observatory, MEXICO-LANCE"


In [6]:
burst_list.sample(5)

Unnamed: 0,date,time,type,instruments
1901,20220312,15:54-15:55,III,"Arecibo-Observatory, (BIR), GREENLAND"
67,20210424,13:03-13:04,III,GLASGOW
3052,20220628,01:18-01:24,III,"ALASKA-COHOE, ALASKA-HAARP, Australia-ASSA"
309,20210531,04:03-04:03,III,"ALASKA-COHOE, ALMATY, Australia-ASSA, INDIA-GA..."
2429,20220503,11:26-11:26,III,"Arecibo-Observatory, (BIR), GLASGOW, SWISS-Lan..."


In [8]:
burst_list.shape

(5283, 4)

## Fix typos

In [9]:
extracted_digits = burst_list['time'].str.extract(r'(\d+).(\d+).(\d+).(\d+)', expand=True).astype(int)

In [10]:
impossible_times_bool = (extracted_digits[1] > 59) | (extracted_digits[3] > 59) | (extracted_digits[0] > 23) | (extracted_digits[2] > 23)
extracted_digits[impossible_times_bool]

Unnamed: 0,0,1,2,3
1693,6,6,6,88
2238,24,32,14,33
3876,23,59,24,0
3890,0,0,24,0
3920,3,50,24,0
4179,23,59,24,0
4467,23,58,24,0
4557,0,0,24,0
4576,0,0,24,0


In [11]:
burst_list[impossible_times_bool]

Unnamed: 0,date,time,type,instruments
1693,20220210,06:06-06:88,V,"ALMATY, Australia-ASSA, INDIA-OOTY, INDIA-UDAI..."
2238,20220421,24:32-14:33,III,"AUSTRIA-MICHELBACH, Arecibo-Observatory, GLASG..."
3876,20220926,23:59:24:00,III,"ALASKA-COHOE, ALASKA-HAARP, Australia-ASSA"
3890,20220929,00:00-24:00,,CTM
3920,20221001,03:50-24:00,CTM,*
4179,20221110,23:59-24:00,III,"ALASKA-COHOE, Australia-ASSA"
4467,20221215,23:58-24:00,III,Arecibo-Observatory
4557,20221221,00:00-24:00,CTM,*
4576,20221222,00:00-24:00,CTM,*


In [12]:
burst_list.loc[4179]

date                               20221110
time                            23:59-24:00
type                                    III
instruments    ALASKA-COHOE, Australia-ASSA
Name: 4179, dtype: object

In [13]:
burst_list.loc[4179+1]

date                 20221111
time              00:02-00:14
type                       VI
instruments    Australia-ASSA
Name: 4180, dtype: object

In [14]:
burst_list = fix_typos_in_time(burst_list)
burst_list = extract_time(burst_list)
burst_list = fix_24_hour_time(burst_list)
burst_list = create_datetime(burst_list)
burst_list = explode_instruments_long_clean_instruments(burst_list)
burst_list = keep_only_type_I_to_VI(burst_list)

In [15]:
burst_list.sample(5)

Unnamed: 0,date,time,type,instruments,time_start,time_end,date_start,date_end,datetime_start,datetime_end
6592,20220205,13:16-13:18,III,Arecibo-Observatory,13:16,13:18,20220205,20220205,2022-02-05 13:16:00,2022-02-05 13:18:00
19626,20221025,18:58-19:02,V,MEXICO-LANCE,18:58,19:02,20221025,20221025,2022-10-25 18:58:00,2022-10-25 19:02:00
15728,20220722,13:04-13:06,III,SWISS-HEITERSWIL,13:04,13:06,20220722,20220722,2022-07-22 13:04:00,2022-07-22 13:06:00
1654,20210603,01:17-01:18,III,Australia-ASSA,01:17,01:18,20210603,20210603,2021-06-03 01:17:00,2021-06-03 01:18:00
16962,20220827,02:12-02:14,II,Australia-ASSA,02:12,02:14,20220827,20220827,2022-08-27 02:12:00,2022-08-27 02:14:00


## Fix wrong names

In [16]:
MIN_BURST_PER_INSTRUMENT = 30

In [17]:
low_appearance_instruments = burst_list.groupby('instruments').filter(lambda x: len(x) <= MIN_BURST_PER_INSTRUMENT).instruments.unique()
low_appearance_instruments

array(['INDIA-UAIPUR', 'SPAIN-ALCALA', 'ROSWELL-NW', 'Australia-LMRO', '',
       'INDIA-NASHIK', 'MRT3', 'DENMARK. GLASGOW', 'SWISS-BLEN5M',
       'SWISS-BLEN7M', 'URUGUAY', 'INPE?', 'MRT', 'SWISS-BLEN7M-E',
       'NORWAY-RANDABERG', 'INDIA-GAURI?', 'MRO?', 'AUSTRTIA-MICHELBACH',
       '/INDIA-UDAIPUR', 'l MONGOLIA-UB', 'SWISS-Landschlach',
       'HUMAIN. SWISS-Landschlacht', 'GASGOW', 'UNAM', 'GLSAGOW',
       'INDOENSIA', 'DENMAARK', 'Humain',
       'SWISS-LandschlachtEGYPT-Alexandria', 'INDIA-UDAIPUR MRT1',
       'AUSTRIA-Krumbach', 'NORWAY-NY-AALESUND', 'SP', 'IAIN-PERALEJOS',
       'ALASKA-ANCHORAGE', 'HUAMAIN', 'NDIA-GAURI', 'ROMANIA', 'HUMAIn',
       'MRT1?', 'HUMAI', 'NDIA-UDAIPUR', 'LASKA-COHOE', 'NDIA-OOTY',
       'MEXARFT', 'POLAND', 'USTRIA-UNIGRAZ', 'MRT21',
       'THAILAND-Pathumthan', 'INDIAMONGOLIA-UB', 'SSRT-UDAIPUR', '*',
       'Australia-ASSAArecibo-Observatory',
       'SSRT {more like drifting chain of type I}', 'INDIAALMATY',
       'SSRT-GAURI', 'INDO

In [18]:
import difflib

In [19]:
def find_closest_instrument(instrument):
    try:
        instruments = burst_list.instruments.unique().tolist()
        instruments = np.setdiff1d(instruments, low_appearance_instruments)
        close_instrument = difflib.get_close_matches(instrument, instruments, n=1, cutoff=0.7)[0]
        print(f"Looking for a close match for {instrument}. Found {close_instrument}")
        return close_instrument
    except IndexError:
        print(f"Could not find a close match for {instrument}. Returning NaN.")
        return pd.NA

In [20]:
burst_list['instruments'] = burst_list['instruments'].apply(lambda x: find_closest_instrument(x) if x in low_appearance_instruments else x)

Looking for a close match for INDIA-UAIPUR. Found INDIA-UDAIPUR
Looking for a close match for INDIA-UAIPUR. Found INDIA-UDAIPUR
Could not find a close match for SPAIN-ALCALA. Returning NaN.
Looking for a close match for ROSWELL-NW. Found ROSWELL-NM
Could not find a close match for SPAIN-ALCALA. Returning NaN.
Looking for a close match for ROSWELL-NW. Found ROSWELL-NM
Looking for a close match for Australia-LMRO. Found Australia-ASSA
Looking for a close match for Australia-LMRO. Found Australia-ASSA
Could not find a close match for . Returning NaN.
Could not find a close match for SPAIN-ALCALA. Returning NaN.
Could not find a close match for SPAIN-ALCALA. Returning NaN.
Could not find a close match for INDIA-NASHIK. Returning NaN.
Looking for a close match for MRT3. Found MRT2
Could not find a close match for SPAIN-ALCALA. Returning NaN.
Could not find a close match for SPAIN-ALCALA. Returning NaN.
Could not find a close match for DENMARK. GLASGOW. Returning NaN.
Could not find a close 

In [21]:
burst_list[burst_list['instruments'].isna()]

Unnamed: 0,date,time,type,instruments,time_start,time_end,date_start,date_end,datetime_start,datetime_end
267,20210426,13:56-13:56,III,,13:56,13:56,20210426,20210426,2021-04-26 13:56:00,2021-04-26 13:56:00
451,20210509,13:54-13:56,III,,13:54,13:56,20210509,20210509,2021-05-09 13:54:00,2021-05-09 13:56:00
811,20210522,01:00-02:48,VI,,01:00,02:48,20210522,20210522,2021-05-22 01:00:00,2021-05-22 02:48:00
868,20210522,06:05-06:05,III,,06:05,06:05,20210522,20210522,2021-05-22 06:05:00,2021-05-22 06:05:00
889,20210522,06:16-06:17,III,,06:16,06:17,20210522,20210522,2021-05-22 06:16:00,2021-05-22 06:17:00
...,...,...,...,...,...,...,...,...,...,...
23394,20230116,11:14-11:14,III,,11:14,11:14,20230116,20230116,2023-01-16 11:14:00,2023-01-16 11:14:00
23891,20230130,00:24-00:24,III,,00:24,00:24,20230130,20230130,2023-01-30 00:24:00,2023-01-30 00:24:00
23892,20230130,00:24-00:24,III,,00:24,00:24,20230130,20230130,2023-01-30 00:24:00,2023-01-30 00:24:00
24248,20230209,07:16-07:18,III,,07:16,07:18,20230209,20230209,2023-02-09 07:16:00,2023-02-09 07:18:00


In [22]:
burst_list = burst_list.dropna(subset=['instruments'])

In [23]:
burst_list[burst_list.instruments.str.contains('HB9SCT')]

Unnamed: 0,date,time,type,instruments,time_start,time_end,date_start,date_end,datetime_start,datetime_end
66,20210301,08:06-08:09,VI,SWISS-HB9SCT,08:06,08:09,20210301,20210301,2021-03-01 08:06:00,2021-03-01 08:09:00
925,20210522,06:47-06:54,VI,SWISS-HB9SCT,06:47,06:54,20210522,20210522,2021-05-22 06:47:00,2021-05-22 06:54:00
995,20210522,10:22-10:22,III,SWISS-HB9SCT,10:22,10:22,20210522,20210522,2021-05-22 10:22:00,2021-05-22 10:22:00
1032,20210522,11:18-11:26,VI,SWISS-HB9SCT,11:18,11:26,20210522,20210522,2021-05-22 11:18:00,2021-05-22 11:26:00
1377,20210523,10:53-11:12,VI,SWISS-HB9SCT,10:53,11:12,20210523,20210523,2021-05-23 10:53:00,2021-05-23 11:12:00
1705,20210609,12:02-12:07,II,SWISS-HB9SCT,12:02,12:07,20210609,20210609,2021-06-09 12:02:00,2021-06-09 12:07:00
1993,20210716,05:52-05:53,III,SWISS-HB9SCT,05:52,05:53,20210716,20210716,2021-07-16 05:52:00,2021-07-16 05:53:00
2038,20210716,08:34-08:36,III,SWISS-HB9SCT,08:34,08:36,20210716,20210716,2021-07-16 08:34:00,2021-07-16 08:36:00
2190,20210723,10:52-11:00,III,SWISS-HB9SCT,10:52,11:00,20210723,20210723,2021-07-23 10:52:00,2021-07-23 11:00:00
2293,20210821,11:43-11:45,III,SWISS-HB9SCT,11:43,11:45,20210821,20210821,2021-08-21 11:43:00,2021-08-21 11:45:00


In [25]:
burst_list.to_excel('burst_list.xlsx', index=False)