In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta
from ecallisto_ng.data_fetching.get_data import extract_instrument_name, get_data
from ecallisto_ng.data_fetching.get_information import get_tables

# Data Generation Radio Sunburst Detector
## Create images with bursts

In [2]:
burst_list = pd.read_excel('burst_list.xlsx').dropna(subset=['instruments'])

In [3]:
burst_list.loc[:, 'instruments'] = burst_list.instruments.apply(extract_instrument_name)

In [4]:
burst_list

Unnamed: 0,date,time,type,instruments,time_start,time_end,date_start,date_end,datetime_start,datetime_end
0,20210119,02:42-02:42,3,australia_assa,02:42,02:42,20210119,20210119,2021-01-19 02:42:00,2021-01-19 02:42:00
1,20210120,12:37-12:37,3,austria_unigraz,12:37,12:37,20210120,20210120,2021-01-20 12:37:00,2021-01-20 12:37:00
2,20210120,12:37-12:37,3,humain,12:37,12:37,20210120,20210120,2021-01-20 12:37:00,2021-01-20 12:37:00
3,20210120,12:37-12:37,3,mrt1,12:37,12:37,20210120,20210120,2021-01-20 12:37:00,2021-01-20 12:37:00
4,20210120,12:37-12:37,3,southafrica_sansa,12:37,12:37,20210120,20210120,2021-01-20 12:37:00,2021-01-20 12:37:00
...,...,...,...,...,...,...,...,...,...,...
32464,20230707,10:15-10:16,4,germany_dlr,10:15,10:16,20230707,20230707,2023-07-07 10:15:00,2023-07-07 10:16:00
32465,20230707,10:15-10:16,4,norway_egersund,10:15,10:16,20230707,20230707,2023-07-07 10:15:00,2023-07-07 10:16:00
32466,20230707,10:15-10:16,4,swiss_heiterswil,10:15,10:16,20230707,20230707,2023-07-07 10:15:00,2023-07-07 10:16:00
32467,20230707,10:15-10:16,4,swiss_landschlacht,10:15,10:16,20230707,20230707,2023-07-07 10:15:00,2023-07-07 10:16:00


In [8]:
### PARAMETERS ###
IMAGE_LENGTH = timedelta(minutes=1)
PIXEL_PER_IMAGE_OVER_TIME = 200
PIXEL_PER_IMAGE_OVER_FREQUENCY = 200
INSTRUMENTS_TO_EXTRACT_BURSTS_FROM = ['australia_assa']
###
time_bucket = IMAGE_LENGTH.total_seconds
# Filter burst list
burst_list_filtered = burst_list[burst_list.instruments.isin(INSTRUMENTS_TO_EXTRACT_BURSTS_FROM)]
burst_list_filtered

Unnamed: 0,date,time,type,instruments,time_start,time_end,date_start,date_end,datetime_start,datetime_end
0,20210119,02:42-02:42,3,australia_assa,02:42,02:42,20210119,20210119,2021-01-19 02:42:00,2021-01-19 02:42:00
7,20210127,04:32-04:32,3,australia_assa,04:32,04:32,20210127,20210127,2021-01-27 04:32:00,2021-01-27 04:32:00
89,20210419,06:55-06:57,3,australia_assa,06:55,06:57,20210419,20210419,2021-04-19 06:55:00,2021-04-19 06:57:00
108,20210419,23:39-23:42,2,australia_assa,23:39,23:42,20210419,20210419,2021-04-19 23:39:00,2021-04-19 23:42:00
109,20210419,23:39-23:43,3,australia_assa,23:39,23:43,20210419,20210419,2021-04-19 23:39:00,2021-04-19 23:43:00
...,...,...,...,...,...,...,...,...,...,...
32421,20230707,00:53-00:54,3,australia_assa,00:53,00:54,20230707,20230707,2023-07-07 00:53:00,2023-07-07 00:54:00
32424,20230707,01:33-01:34,5,australia_assa,01:33,01:34,20230707,20230707,2023-07-07 01:33:00,2023-07-07 01:34:00
32427,20230707,01:36-01:36,3,australia_assa,01:36,01:36,20230707,20230707,2023-07-07 01:36:00,2023-07-07 01:36:00
32431,20230707,04:43-04:46,3,australia_assa,04:43,04:46,20230707,20230707,2023-07-07 04:43:00,2023-07-07 04:46:00


In [9]:
# Because the instrument in the burst_list are usually a subset of the full name, we need to get the full name
instruments_to_extract_from = []

for instrument in INSTRUMENTS_TO_EXTRACT_BURSTS_FROM:
    for table in get_tables():
        if instrument in table:
            instruments_to_extract_from.append(table)
instruments_to_extract_from

['australia_assa_02',
 'australia_assa_63',
 'australia_assa_01',
 'australia_assa_56',
 'australia_assa_57',
 'australia_assa_62',
 'australia_assa_60']

In [13]:
def get_data_save_as_img(instrument, start_datetime, end_datetime, time_bucket, agg_function='MAX', burst_type="no_burst", data_folder="data"):
    """
    Retrieves data for a specific instrument within a given time range, aggregates it using the specified function,
    normalizes the data, and saves it as an image file.

    Args:
        instrument (str): Name of the instrument for which data is to be retrieved.
        start_datetime (datetime.datetime): Start date and time of the data range.
        end_datetime (datetime.datetime): End date and time of the data range.
        time_bucket (str): Time granularity for data aggregation (e.g., '1H' for hourly, '30T' for every 30 minutes).
        agg_function (str, optional): Aggregation function to apply to the data. Defaults to 'MAX'.
        burst_type (str, optional): Label to be included in the file name. Defaults to 'no_burst'.
        data_folder (str, optional): Folder path where the data will be saved. Defaults to 'data'.

    Returns:
        None

    Raises:
        None

    Examples:
        # Retrieve data for instrument 'instrument_name' from 'start_datetime' to 'end_datetime' and save it as an image
        get_data_save_as_img('instrument_name', start_datetime, end_datetime, '1H', 'MAX', 'no_burst', 'data')

    """
    sd_str = start_datetime.strftime("%Y-%m-%d %H:%M:%S")
    ed_str = end_datetime.strftime("%Y-%m-%d %H:%M:%S")
    df = get_data(instrument_name=instrument, 
                    start_datetime=sd_str, 
                    end_datetime=ed_str, 
                    timebucket=time_bucket, 
                    agg_function=agg_function)
    
    img_data = df.to_numpy().astype(np.int16)
    #img_data = (img_data - np.min(img_data)) / (np.max(img_data) - np.min(img_data))*255 #normalize ata to [0,255] range
    # Generate path
    path = os.path.join(data_folder, burst_type)
    if not os.path.exists(path):
        os.makedirs(path)
    file_path = os.path.join(path, sd_str + "_" + ed_str + "_" + instrument + "_" + str(time_bucket) + ".png")
    plt.imsave(file_path, img_data.T, cmap="gray")

In [14]:
for index, row in burst_list_filtered.iterrows():
    for table in get_tables():
        if row.instruments in table:
            # Get start and end time
            start_datetime = row.datetime_start
            end_datetime = row.datetime_end 
            date_range = pd.date_range(start_datetime, end_datetime, freq=IMAGE_LENGTH, inclusive='left')
            # Get label
            burst_type = str(row.type)
            # Get data
            for date in date_range:
                try:
                    get_data_save_as_img(table, date, date + timedelta(minutes=1), None, None, burst_type, 'data')
                except ValueError as e:
                    print(e)
                    print(f"Skipping {table} from {date} to {date + timedelta(minutes=1)}")

No data found. Check your request?
Skipping australia_assa_02 from 2021-01-19 02:42:00 to 2021-01-19 02:42:00
No data found. Check your request?
Skipping australia_assa_63 from 2021-01-19 02:42:00 to 2021-01-19 02:42:00
No data found. Check your request?
Skipping australia_assa_01 from 2021-01-19 02:42:00 to 2021-01-19 02:42:00
