In [2]:
from burstextractor.burstlist import download_burst_data
from burstextractor.timeutils import extract_time, fix_typos_in_time, fix_24_hour_time, create_datetime
from burstextractor.data_utils import explode_instruments_long_clean_instruments, keep_only_type_I_to_VI
from ecallisto_ng.data_fetching.get_data import get_data
from ecallisto_ng.plotting.utils import plot_spectogram
import pandas as pd
import numpy as np
import difflib

## Create Dataframe with the Burstlists

In [42]:
burst_list = download_burst_data([2021, 2022, 2023], months=range(1, 8), folder="ecallisto_files")

In [4]:
burst_list

Unnamed: 0,date,time,type,instruments
0,20210119,02:42-02:42,III,Australia-ASSA
1,20210120,12:37-12:37,III,"AUSTRIA-UNIGRAZ, [HUMAIN], MRT1, SOUTHAFRICA-S..."
2,20210127,04:32-04:32,III,"Australia-ASSA, INDIA-GAURI, SOUTHAFRICA-SANSA"
3,20210127,09:27-09:27,III,"AUSTRIA-UNIGRAZ, INDIA-GAURI, INDIA-OOTY, MRT1..."
4,20210218,18:04-18:04,III,"GREENLAND, MEXART, ROSWELL-NM"
...,...,...,...,...
4253,20230629,17:00-23:59,CTM,e-Callisto
4254,20230629,17:20-17:20,III,e-Callisto
4255,20230630,00:00-04:30,CTM,e-Callisto
4256,20230630,08:42-08:42,III,e-Callisto


In [5]:
burst_list.sample(5)

Unnamed: 0,date,time,type,instruments
2330,20230113,16:54-16:56,II?,Arecibo-Observatory
1020,20220327,21:26-21:26,III,ALASKA-HAARP
4071,20230616,22:25-22:25,III,"ALASKA-COHOE, ALASKA-HAARP, MEXART, MEXICO-LANCE"
1315,20220426,08:27-08:28,III,"ALMATY, AUSTRIA-MICHELBACH, INDIA-GAURI, INDIA..."
3959,20230604,17:34-17:34,III,"ALASKA-COHOE, ALASKA-HAARP, Arecibo-Observator..."


In [6]:
burst_list.shape

(4258, 4)

## Fix typos

In [7]:
extracted_digits = burst_list['time'].str.extract(r'(\d+).(\d+).(\d+).(\d+)', expand=True).astype(int)

In [8]:
impossible_times_bool = (extracted_digits[1] > 59) | (extracted_digits[3] > 59) | (extracted_digits[0] > 23) | (extracted_digits[2] > 23)
extracted_digits[impossible_times_bool]

Unnamed: 0,0,1,2,3
728,6,6,6,88
1273,24,32,14,33
3660,21,18,212,19


In [9]:
burst_list[impossible_times_bool]

Unnamed: 0,date,time,type,instruments
728,20220210,06:06-06:88,V,"ALMATY, Australia-ASSA, INDIA-OOTY, INDIA-UDAI..."
1273,20220421,24:32-14:33,III,"AUSTRIA-MICHELBACH, Arecibo-Observatory, GLASG..."
3660,20230510,21:18-212:19,III,"(ALASKA-ANCHORAGE), ALASKA-COHOE, ALASKA-HAARP..."


In [10]:
burst_list.loc[4179]

date                                                    20230623
time                                                 17:15-17:41
type                                                          VI
instruments    ALASKA-COHOE, ALASKA-HAARP, Arecibo-Observator...
Name: 4179, dtype: object

In [11]:
burst_list.loc[4179+1]

date                                                  20230623
time                                               17:52-17:53
type                                                       III
instruments    ALASKA-HAARP, Arecibo-Observatory, MEXICO-LANCE
Name: 4180, dtype: object

In [12]:
burst_list = fix_typos_in_time(burst_list)
burst_list = extract_time(burst_list)
burst_list = fix_24_hour_time(burst_list)
burst_list = create_datetime(burst_list)

In [13]:
burst_list[impossible_times_bool]

Unnamed: 0,date,time,type,instruments,time_start,time_end,date_start,date_end,datetime_start,datetime_end
728,20220210,06:06-06:08,V,"ALMATY, Australia-ASSA, INDIA-OOTY, INDIA-UDAI...",06:06,06:08,20220210,20220210,2022-02-10 06:06:00,2022-02-10 06:08:00
1273,20220421,14:32-14:33,III,"AUSTRIA-MICHELBACH, Arecibo-Observatory, GLASG...",14:32,14:33,20220421,20220421,2022-04-21 14:32:00,2022-04-21 14:33:00
3660,20230510,21:18-21:19,III,"(ALASKA-ANCHORAGE), ALASKA-COHOE, ALASKA-HAARP...",21:18,21:19,20230510,20230510,2023-05-10 21:18:00,2023-05-10 21:19:00


In [14]:
burst_list = explode_instruments_long_clean_instruments(burst_list)
burst_list = keep_only_type_I_to_VI(burst_list)

## Fix wrong names

In [None]:
MIN_BURST_PER_INSTRUMENT = 5

In [None]:
low_appearance_instruments = burst_list.groupby('instruments').filter(lambda x: len(x) <= MIN_BURST_PER_INSTRUMENT).instruments.unique().copy()
low_appearance_instruments

array(['INDIA-UAIPUR', 'ROSWELL-NW', 'DENMARK. GLASGOW', 'INPE?', 'MRT',
       'INDIA-GAURI?', 'MRO?', 'AUSTRTIA-MICHELBACH', '/INDIA-UDAIPUR',
       'l MONGOLIA-UB', 'SWISS-Landschlach', 'HUMAIN. SWISS-Landschlacht',
       'GASGOW', 'GLSAGOW', 'INDOENSIA', 'DENMAARK', 'Humain',
       'SWISS-LandschlachtEGYPT-Alexandria', 'INDIA-UDAIPUR MRT1',
       'NORWAY-NY-AALESUND', 'SP', 'IAIN-PERALEJOS', 'HUAMAIN',
       'NDIA-GAURI', 'HUMAIn', 'MRT1?', 'HUMAI', 'NDIA-UDAIPUR',
       'LASKA-COHOE', 'NDIA-OOTY', 'MEXARFT', 'POLAND', 'USTRIA-UNIGRAZ',
       'MRT21', 'THAILAND-Pathumthan', 'INDIAMONGOLIA-UB', 'SSRT-UDAIPUR',
       '*', 'Australia-ASSAArecibo-Observatory',
       'SSRT {more like drifting chain of type I}', 'INDIAALMATY',
       'SSRT-GAURI', 'INDOALASKA-COHOE', 'ROSWELL-NMNESIA',
       'Australia-ASSA {followed by blackout}', 'SSRTMalaysia-Banting',
       'INDONESIAINDIA-OOTY', 'ALMYTY', 'INDIA-OOTY?', 'SSRTFIN',
       'LAND-Siuntio', 'USA-ARIZONA-ERA', 'FINLAND-Siunti'

In [None]:
burst_list.instruments.unique()

array(['Australia-ASSA', 'AUSTRIA-UNIGRAZ', 'HUMAIN', 'MRT1',
       'SOUTHAFRICA-SANSA', 'SWISS-Landschlacht', 'TRIEST', 'INDIA-GAURI',
       'INDIA-OOTY', 'GREENLAND', 'MEXART', 'ROSWELL-NM', 'KRIM',
       'GLASGOW', 'ALASKA-HAARP', 'ALMATY', 'INDIA-UDAIPUR', 'MRO',
       'INDIA-UAIPUR', 'INDONESIA', 'AUSTRIA-OE3FLB', 'HURBANOVO',
       'MONGOLIA-UB', 'MRT2', 'SWISS-HB9SCT', 'SWISS-IRSOL',
       'SWISS-MUHEN', 'SPAIN-PERALEJOS', 'BIR', 'DENMARK',
       'SWISS-HEITERSWIL', 'KASI', 'ALGERIA-CRAAG', 'SPAIN-ALCALA',
       'ROSWELL-NW', 'ALASKA-COHOE', 'AUSTRIA-MICHELBACH',
       'Australia-LMRO', '', 'INDIA-NASHIK', 'MRT3', 'DENMARK. GLASGOW',
       'SWISS-BLEN5M', 'SWISS-BLEN7M', 'URUGUAY', 'INPE', 'INPE?',
       'SPAIN-SIGUENZA', 'POLAND-Grotniki', 'MRT', 'GERMANY-DLR',
       'SWISS-BLEN7M-E', 'EGYPT-Alexandria', 'NORWAY-RANDABERG',
       'INDIA-GAURI?', 'SRI-Lanka', 'MRO?', 'AUSTRTIA-MICHELBACH',
       '/INDIA-UDAIPUR', 'l MONGOLIA-UB', 'SWISS-Landschlach',
       'Malays

In [None]:
def find_closest_instrument(instrument):
    try:
        instruments = burst_list.instruments.unique().tolist()
        # Get list of instrument which are NOT in the low_appearance_instruments list
        instruments = np.array([i for i in instruments if i not in low_appearance_instruments])
        close_instrument = difflib.get_close_matches(instrument, instruments, n=1, cutoff=0.8)[0]
        print(f"Looking for a close match for {instrument}. Found {close_instrument}")
        return close_instrument
    except IndexError:
        print(f"Could not find a close match for {instrument}. Returning NaN.")
        return pd.NA

In [None]:
burst_list['instruments'] = burst_list['instruments'].apply(lambda x: find_closest_instrument(x) if x in low_appearance_instruments else x)

Looking for a close match for INDIA-UAIPUR. Found INDIA-UDAIPUR
Looking for a close match for INDIA-UAIPUR. Found INDIA-UDAIPUR
Looking for a close match for ROSWELL-NW. Found ROSWELL-NM
Looking for a close match for ROSWELL-NW. Found ROSWELL-NM
Could not find a close match for DENMARK. GLASGOW. Returning NaN.
Looking for a close match for INPE?. Found INPE
Looking for a close match for MRT. Found MRT3
Looking for a close match for INDIA-GAURI?. Found INDIA-GAURI
Looking for a close match for MRO?. Found MRO
Looking for a close match for MRO?. Found MRO
Looking for a close match for MRO?. Found MRO
Looking for a close match for AUSTRTIA-MICHELBACH. Found AUSTRIA-MICHELBACH
Looking for a close match for AUSTRTIA-MICHELBACH. Found AUSTRIA-MICHELBACH
Looking for a close match for /INDIA-UDAIPUR. Found INDIA-UDAIPUR
Looking for a close match for l MONGOLIA-UB. Found MONGOLIA-UB
Looking for a close match for l MONGOLIA-UB. Found MONGOLIA-UB
Looking for a close match for SWISS-Landschlach. F

In [None]:
burst_list[burst_list['instruments'].isna()]

Unnamed: 0,date,time,type,instruments,time_start,time_end,date_start,date_end,datetime_start,datetime_end
982,20210522,10:22-10:22,III,,10:22,10:22,20210522,20210522,2021-05-22 10:22:00,2021-05-22 10:22:00
7213,20220306,15:46-15:56,VI,,15:46,15:56,20220306,20220306,2022-03-06 15:46:00,2022-03-06 15:56:00
7234,20220307,13:36-13:36,III,,13:36,13:36,20220307,20220307,2022-03-07 13:36:00,2022-03-07 13:36:00
7239,20220307,14:50-14:50,III,,14:50,14:50,20220307,20220307,2022-03-07 14:50:00,2022-03-07 14:50:00
7347,20220309,13:19-13:19,III,,13:19,13:19,20220309,20220309,2022-03-09 13:19:00,2022-03-09 13:19:00
7818,20220328,11:23-11:38,II,,11:23,11:38,20220328,20220328,2022-03-28 11:23:00,2022-03-28 11:38:00
7855,20220328,12:49-12:50,III,,12:49,12:50,20220328,20220328,2022-03-28 12:49:00,2022-03-28 12:50:00
15846,20220723,20:33-20:35,III,,20:33,20:35,20220723,20220723,2022-07-23 20:33:00,2022-07-23 20:35:00
18015,20220920,11:20-11:24,III,,11:20,11:24,20220920,20220920,2022-09-20 11:20:00,2022-09-20 11:24:00
18155,20220921,10:06-10:08,III,,10:06,10:08,20220921,20220921,2022-09-21 10:06:00,2022-09-21 10:08:00


In [None]:
burst_list = burst_list.dropna(subset=['instruments'])

In [None]:
burst_list['type'] = burst_list['type'].replace({'I': 1, 'II': 2, 'III': 3, 'IV': 4, 'V': 5, 'VI': 6}).astype(int)

In [None]:
burst_list[burst_list.instruments.str.contains('Australia-ASSA') & burst_list.type.isin([5, 6])]

Unnamed: 0,date,time,type,instruments,time_start,time_end,date_start,date_end,datetime_start,datetime_end
113,20210420,03:19-03:21,6,Australia-ASSA,03:19,03:21,20210420,20210420,2021-04-20 03:19:00,2021-04-20 03:21:00
168,20210423,07:16-07:16,6,Australia-ASSA,07:16,07:16,20210423,20210423,2021-04-23 07:16:00,2021-04-23 07:16:00
332,20210506,03:00-03:04,6,Australia-ASSA,03:00,03:04,20210506,20210506,2021-05-06 03:00:00,2021-05-06 03:04:00
492,20210512,05:43-05:46,6,Australia-ASSA,05:43,05:46,20210512,20210512,2021-05-12 05:43:00,2021-05-12 05:46:00
534,20210513,06:28-06:30,6,Australia-ASSA,06:28,06:30,20210513,20210513,2021-05-13 06:28:00,2021-05-13 06:30:00
...,...,...,...,...,...,...,...,...,...,...
33134,20230622,06:01-06:15,6,Australia-ASSA,06:01,06:15,20230622,20230622,2023-06-22 06:01:00,2023-06-22 06:15:00
33578,20230624,01:57-03:20,6,Australia-ASSA,01:57,03:20,20230624,20230624,2023-06-24 01:57:00,2023-06-24 03:20:00
33877,20230704,00:25-00:36,6,Australia-ASSA,00:25,00:36,20230704,20230704,2023-07-04 00:25:00,2023-07-04 00:36:00
34146,20230706,23:19-23:31,6,Australia-ASSA,23:19,23:31,20230706,20230706,2023-07-06 23:19:00,2023-07-06 23:31:00


# Manually fix the rest. 

## Negative burst duration

In [None]:
burst_list.loc[:, 'duration'] = burst_list.datetime_end - burst_list.datetime_start
# Get negative duration
burst_list[burst_list['duration'] < pd.Timedelta(0)]


In [None]:

mapping_to_instrument_name = {'Australia-ASSA': 'australia_assa_02'}

###

burst_list_filtered = 

# TODO: Fix negative time deltas. Maybe with something below?

In [None]:

# Iterate over each row of the dataframe 'burst_list_filtered'.
for index, row in burst_list_filtered.iterrows():
    
    # Check if 'end_datetime' is before 'start_datetime' for the current row.
    if row['datetime_end'] < row['datetime_start']:
        # If true, set 'end_datetime' to 'start_datetime' plus 60 minutes.
        df.loc[index, 'end_datetime'] = row['start_datetime'] + timedelta(minutes=60)

    # Construct the parameters dictionary for the current row.
    params = {
        "instrument_name": row['instruments'],
        "start_datetime": row['datetime_start'].strftime('%Y-%m-%d %H:%M:%S'),
        "end_datetime": row['datetime_end'].strftime('%Y-%m-%d %H:%M:%S'),
        "timebucket": "1s",
        "agg_function": "MAX",
    }
    try:
        # Get the data for plotting based on the constructed parameters.
        data = get_data(**params)
    except Exception as e:
        continue
        
    # Plot the obtained data.
    plot_spectogram(data, params['instrument_name'], params['start_datetime'], params['end_datetime'])

    # Continuously prompt the user to provide new datetime values until they give a valid input.
    while True:
        # Ask user for new 'start_datetime'.
        new_start = input("Enter new start datetime: ")
        
        # Ask user for new 'end_datetime'.
        new_end = input("Enter new end datetime: ")
        
        try:
            # Try to parse the input datetime strings to ensure they're in the correct format.
            datetime.strptime(new_start, '%Y-%m-%d %H:%M:%S')
            datetime.strptime(new_end, '%Y-%m-%d %H:%M:%S')
            
            # If parsing succeeds without any errors, update the dataframe with the new values.
            burst_list_filtered.loc[index, 'datetime_start'] = new_start
            burst_list_filtered.loc[index, 'datetime_end'] = new_end
            
            # Break out of the loop since the input is valid.
            break  
        except ValueError:
            # If the datetime format is incorrect, notify the user and prompt again.
            print("Invalid datetime format. Use %Y-%m-%d %H:%M:%S. Please reenter.")


In [None]:
burst_list.to_excel('burst_list.xlsx', index=False)