In [42]:
from burstextractor.burstlist import download_burst_list, process_burst_list
from burstextractor.timeutils import extract_time, fix_typos_in_time, fix_24_hour_time, create_datetime, check_valid_date, adjust_year_month
from burstextractor.data_utils import explode_instruments_long_clean_instruments, keep_only_type_I_to_VI
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt 

## Create Dataframe with the Burstlists

In [43]:
years = [2021, 2022, 2023]
months = range(1, 12+1)
burst_list = []
for year in years:
    for month in months:
        if month > datetime.datetime.now().month and year == datetime.datetime.now().year:
            break
        download_burst_list(year, month)
        burst_list.append(process_burst_list(f"e-CALLISTO_{year}_{month:02}.txt"))
burst_list = pd.concat(burst_list).reset_index(drop=True)

  data = pd.read_csv(filename, sep="\t", index_col=False, encoding=ENCODING, names=col_names, engine="python", skiprows=skip_row_idxs, dtype=str)
  data = pd.read_csv(filename, sep="\t", index_col=False, encoding=ENCODING, names=col_names, engine="python", skiprows=skip_row_idxs, dtype=str)


In [44]:
burst_list.sample(5)

Unnamed: 0,date,time,type,instruments
2079,20220402,09:45-09:48,III,"AUSTRIA-OE3FLB, AUSTRIA-UNIGRAZ, BIR, DENMARK,..."
1610,20220204,21:54-21:56,III,"ALASKA-COHOE, ALASKA-HAARP, Australia-ASSA"
1422,20220115,07:39-07:40,CTM,AUSTRIA-UNIGRAZ
2132,20220411,05:41-05:42,III,Australia-ASSA
1064,20211031,15:26-15:27,III,"AUSTRIA-UNIGRAZ, Arecibo-Observatory, BIR, GLA..."


In [45]:
burst_list.shape

(4895, 4)

## Fix typos

In [46]:
extracted_digits = burst_list['time'].str.extract(r'(\d+).(\d+).(\d+).(\d+)', expand=True).astype(int)

In [47]:
impossible_times_bool = (extracted_digits[1] > 59) | (extracted_digits[3] > 59) | (extracted_digits[0] > 23) | (extracted_digits[2] > 23)
extracted_digits[impossible_times_bool]

Unnamed: 0,0,1,2,3
1693,6,6,6,88
2238,24,32,14,33
3876,23,59,24,0
3890,0,0,24,0
3920,3,50,24,0
4179,23,59,24,0
4467,23,58,24,0
4557,0,0,24,0
4576,0,0,24,0


In [48]:
burst_list[impossible_times_bool]

Unnamed: 0,date,time,type,instruments
1693,20220210,06:06-06:88,V,"ALMATY, Australia-ASSA, INDIA-OOTY, INDIA-UDAI..."
2238,20220421,24:32-14:33,III,"AUSTRIA-MICHELBACH, Arecibo-Observatory, GLASG..."
3876,20220926,23:59:24:00,III,"ALASKA-COHOE, ALASKA-HAARP, Australia-ASSA"
3890,20220929,00:00-24:00,,CTM
3920,20221001,03:50-24:00,CTM,*
4179,20221110,23:59-24:00,III,"ALASKA-COHOE, Australia-ASSA"
4467,20221215,23:58-24:00,III,Arecibo-Observatory
4557,20221221,00:00-24:00,CTM,*
4576,20221222,00:00-24:00,CTM,*


In [49]:
burst_list.loc[4179]

date                               20221110
time                            23:59-24:00
type                                    III
instruments    ALASKA-COHOE, Australia-ASSA
Name: 4179, dtype: object

In [50]:
burst_list.loc[4179+1]

date                 20221111
time              00:02-00:14
type                       VI
instruments    Australia-ASSA
Name: 4180, dtype: object

In [51]:
burst_list = fix_typos_in_time(burst_list)
burst_list = extract_time(burst_list)
burst_list = fix_24_hour_time(burst_list)
burst_list = create_datetime(burst_list)
burst_list = explode_instruments_long_clean_instruments(burst_list)
burst_list = keep_only_type_I_to_VI(burst_list)

In [52]:
burst_list.sample(5)

Unnamed: 0,date,time,type,instruments,time_start,time_end,date_start,date_end,datetime_start,datetime_end
10450,20220504,10:48-10:56,VI,BIR,10:48,10:56,20220504,20220504,2022-05-04 10:48:00,2022-05-04 10:56:00
3480,20210921,09:42-09:50,VI,SPAIN-PERALEJOS,09:42,09:50,20210921,20210921,2021-09-21 09:42:00,2021-09-21 09:50:00
4103,20211005,07:30-07:31,III,MRO?,07:30,07:31,20211005,20211005,2021-10-05 07:30:00,2021-10-05 07:31:00
7907,20220329,06:42-06:47,III,INDIA-UDAIPUR,06:42,06:47,20220329,20220329,2022-03-29 06:42:00,2022-03-29 06:47:00
10241,20220502,01:45-01:46,III,INDIA-UDAIPUR,01:45,01:46,20220502,20220502,2022-05-02 01:45:00,2022-05-02 01:46:00


## Fix wrong names

In [53]:
MIN_BURST_PER_INSTRUMENT = 2

In [54]:
low_appearance_instruments = burst_list.groupby('instruments').filter(lambda x: len(x) <= MIN_BURST_PER_INSTRUMENT).instruments.unique()
low_appearance_instruments

array(['INDIA-UAIPUR', 'ROSWELL-NW', 'DENMARK. GLASGOW', 'INPE?', 'MRT',
       'INDIA-GAURI?', 'AUSTRTIA-MICHELBACH', '/INDIA-UDAIPUR',
       'l MONGOLIA-UB', 'SWISS-Landschlach', 'HUMAIN. SWISS-Landschlacht',
       'GASGOW', 'INDOENSIA', 'DENMAARK',
       'SWISS-LandschlachtEGYPT-Alexandria', 'INDIA-UDAIPUR MRT1',
       'NORWAY-NY-AALESUND', 'SP', 'IAIN-PERALEJOS', 'HUAMAIN',
       'NDIA-GAURI', 'HUMAIn', 'MRT1?', 'HUMAI', 'NDIA-UDAIPUR',
       'LASKA-COHOE', 'NDIA-OOTY', 'MEXARFT', 'POLAND', 'USTRIA-UNIGRAZ',
       'MRT21', 'THAILAND-Pathumthan', 'INDIAMONGOLIA-UB', 'SSRT-UDAIPUR',
       '*', 'Australia-ASSAArecibo-Observatory',
       'SSRT {more like drifting chain of type I}', 'INDIAALMATY',
       'SSRT-GAURI', 'INDOALASKA-COHOE', 'ROSWELL-NMNESIA',
       'Australia-ASSA {followed by blackout}', 'SSRTMalaysia-Banting',
       'c'], dtype=object)

In [55]:
import difflib

In [56]:
def find_closest_instrument(instrument):
    try:
        instruments = burst_list.instruments.unique().tolist()
        instruments = np.setdiff1d(instruments, low_appearance_instruments)
        close_instrument = difflib.get_close_matches(instrument, instruments, n=1, cutoff=0.7)[0]
        print(f"Looking for a close match for {instrument}. Found {close_instrument}")
        return close_instrument
    except IndexError:
        print(f"Could not find a close match for {instrument}. Returning NaN.")
        return pd.NA

In [57]:
burst_list['instruments'] = burst_list['instruments'].apply(lambda x: find_closest_instrument(x) if x in low_appearance_instruments else x)

Looking for a close match for INDIA-UAIPUR. Found INDIA-UDAIPUR
Looking for a close match for INDIA-UAIPUR. Found INDIA-UDAIPUR
Looking for a close match for ROSWELL-NW. Found ROSWELL-NM
Looking for a close match for ROSWELL-NW. Found ROSWELL-NM
Could not find a close match for DENMARK. GLASGOW. Returning NaN.
Looking for a close match for INPE?. Found INPE
Looking for a close match for MRT. Found MRT3
Looking for a close match for INDIA-GAURI?. Found INDIA-GAURI
Looking for a close match for AUSTRTIA-MICHELBACH. Found AUSTRIA-MICHELBACH
Looking for a close match for AUSTRTIA-MICHELBACH. Found AUSTRIA-MICHELBACH
Looking for a close match for /INDIA-UDAIPUR. Found INDIA-UDAIPUR
Looking for a close match for l MONGOLIA-UB. Found MONGOLIA-UB
Looking for a close match for l MONGOLIA-UB. Found MONGOLIA-UB
Looking for a close match for SWISS-Landschlach. Found SWISS-Landschlacht
Looking for a close match for HUMAIN. SWISS-Landschlacht. Found SWISS-Landschlacht
Looking for a close match for G

In [58]:
burst_list[burst_list['instruments'].isna()]

Unnamed: 0,date,time,type,instruments,time_start,time_end,date_start,date_end,datetime_start,datetime_end
982,20210522,10:22-10:22,III,,10:22,10:22,20210522,20210522,2021-05-22 10:22:00,2021-05-22 10:22:00
7347,20220309,13:19-13:19,III,,13:19,13:19,20220309,20220309,2022-03-09 13:19:00,2022-03-09 13:19:00
7855,20220328,12:49-12:50,III,,12:49,12:50,20220328,20220328,2022-03-28 12:49:00,2022-03-28 12:50:00
15846,20220723,20:33-20:35,III,,20:33,20:35,20220723,20220723,2022-07-23 20:33:00,2022-07-23 20:35:00
18015,20220920,11:20-11:24,III,,11:20,11:24,20220920,20220920,2022-09-20 11:20:00,2022-09-20 11:24:00
18155,20220921,10:06-10:08,III,,10:06,10:08,20220921,20220921,2022-09-21 10:06:00,2022-09-21 10:08:00
20078,20221111,03:15-03:19,III,,03:15,03:19,20221111,20221111,2022-11-11 03:15:00,2022-11-11 03:19:00
21179,20221212,07:20-07:37,VI,,07:20,07:37,20221212,20221212,2022-12-12 07:20:00,2022-12-12 07:37:00
21493,20221217,00:02-23:59,VI,,00:02,23:59,20221217,20221217,2022-12-17 00:02:00,2022-12-17 23:59:00
21609,20221218,16:22-16:23,III,,16:22,16:23,20221218,20221218,2022-12-18 16:22:00,2022-12-18 16:23:00


In [59]:
burst_list = burst_list.dropna(subset=['instruments'])

In [60]:
burst_list.to_excel('burst_list.xlsx', index=False)