In [1]:
from ecallisto_ng.data_download.downloader import get_ecallisto_data
from ecallisto_ng.burst_list.utils import load_burst_list
from datetime import timedelta
from PIL import Image
import random
import os

In [2]:
def random_duration(min_start, min_end):
    """
    Generate a random duration between a specified range in minutes.

    Parameters:
    min_start (int): The start of the range in minutes.
    min_end (int): The end of the range in minutes.

    Returns:
    datetime.timedelta: A timedelta object representing the duration.
    """
    # Generate a random duration in minutes (including fractions) within the specified range
    total_minutes = random.randint(min_start, min_end)

    return timedelta(minutes=total_minutes)

def save_image(df, path):
    """
    Save an image to a file.

    Parameters:
    df (pandas.DataFrame): The dataframe containing the image data.
    path (str): The path to save the image to.
    """
    # Get the image data from the dataframe
    image_data = df.values

    # Convert the image data to a PIL Image
    image = Image.fromarray(image_data)

    # Save the image to the specified path
    image.save(path)

# Example usage
duration = random_duration(3, 7)
print("Random Duration between 0 and 7 minutes:", duration)


Random Duration between 0 and 7 minutes: 0:05:00


In [3]:
burst_list = load_burst_list()

# Some Filtering for specific instruments

In [4]:
instruments = ['Australia-ASSA_02']
burst_generated = 0

In [5]:
burst_list = burst_list[burst_list['instruments'].isin([x.split('_')[0] for x in instruments])] # Burstliste hat nur der Ort der Antenna, aber nicht die ID, darum #pythonmagic

In [6]:
FOLDER = 'data'
RESOLUTION = (256, 256) 

In [7]:
resample_delta = timedelta(minutes=15) / RESOLUTION[0] # Ist nicht perfekt, aber geht
resample_delta

datetime.timedelta(seconds=3, microseconds=515625)

In [None]:
for i, row in burst_list.iterrows():
    datetime_start = row['datetime_start'] - random_duration(0, 11)
    end_time = datetime_start + timedelta(minutes=15)
    dfs = get_ecallisto_data(datetime_start, end_time, instrument_name=row['instruments'])
    for _, df in dfs.items():
        try:
            instrument = df.attrs['FULLNAME']
            if instrument not in instruments:
                print(instruments)
                continue
            # Resample
            df = df.resample(resample_delta).max()
            # Maybe keep only good frequencies?
            # Background sub?
            ## Path to save the image to
            # It's FOLDER / instrument / burst type / datetime_start.png
            path = os.path.join(FOLDER, instrument, str(row['type']), row['datetime_start'].strftime('%Y-%m-%d_%H-%M-%S') + '.png')
            os.makedirs(os.path.dirname(path), exist_ok=True)
            save_image(df.T, path)
            burst_generated += 1
        except Exception as e:
            print(e)
            print(row['datetime_start'])
            print(row['datetime_end'])
            print(row['instruments'])
            print(instrument)

## Non Bursts
Machen wir ähnlich, aber halt andersrum.

In [None]:
non_burst_generated = 0
burst_non_burst_ratio = 5 # 5: There are 5x more non bust than burst images.

In [11]:
def return_random_datetime_between(start_datetime, end_datetime):
    """
    Generate a random datetime between two specified datetimes.

    Parameters:
    start_datetime (datetime.datetime): The start of the range.
    end_datetime (datetime.datetime): The end of the range.

    Returns:
    datetime.datetime: A random datetime between the two specified datetimes.
    """
    # Calculate the total number of seconds between the two datetimes
    total_seconds = (end_datetime - start_datetime).total_seconds()

    # Generate a random number of seconds within the specified range
    random_seconds = random.randint(0, total_seconds)

    # Return the start datetime plus the random number of seconds
    # Rounded to minutes
    return (start_datetime + timedelta(seconds=random_seconds)).replace(second=0, microsecond=0)

In [12]:
min_datetime, max_datetime = burst_list['datetime_start'].min(), burst_list['datetime_start'].max()
print("Start Datetime:", min_datetime)
print("End Datetime:", max_datetime)

Start Datetime: 2021-01-19 02:42:00
End Datetime: 2023-10-31 23:36:00


In [16]:
while non_burst_generated < burst_generated * 5:
    start_datetime = return_random_datetime_between(min_datetime, max_datetime)
    # Now we need to check that the start_datetime is not in a burst
    non_burst_in_burst_df = burst_list[
        (burst_list.datetime_end <= start_datetime)
        & (start_datetime <= burst_list.datetime_end)
    ]
    if not non_burst_in_burst_df.empty:
        print("Datetime is in a burst, trying again...")
        continue
    end_datetime = start_datetime + timedelta(minutes=15)
    for instrument in instruments:
        dfs = get_ecallisto_data(start_datetime, end_datetime, instrument_name=instrument)
        for _, df in dfs.items():
            try:
                instrument = df.attrs['FULLNAME']
                if instrument not in instruments:
                    continue
                # Resample
                df = df.resample(resample_delta).max()
                # Maybe keep only good frequencies?
                # Background sub?
                ## Path to save the image to
                # It's FOLDER / instrument / burst type / start_datetime.png
                path = os.path.join(FOLDER, instrument, "0", start_datetime.strftime('%Y-%m-%d_%H-%M-%S') + '.png')
                os.makedirs(os.path.dirname(path), exist_ok=True)
                save_image(df.T, path)
                break
            except Exception as e:
                print(e)
                print(row['datetime_start'])
                print(row['datetime_end'])
                print(row['instruments'])
                print(instrument)

No files found for Australia-ASSA_02 between 2022-06-23 15:02:00 and 2022-06-23 15:17:00.
No files found for Australia-ASSA_02 between 2021-11-27 08:52:00 and 2021-11-27 09:07:00.
No files found for Australia-ASSA_02 between 2021-10-12 14:35:00 and 2021-10-12 14:50:00.
No files found for Australia-ASSA_02 between 2022-09-11 13:57:00 and 2022-09-11 14:12:00.


Downloading and processing files: 100%|██████████| 3/3 [00:00<00:00,  8.90it/s]


No files found for Australia-ASSA_02 between 2023-01-09 21:49:00 and 2023-01-09 22:04:00.
No files found for Australia-ASSA_02 between 2022-09-02 20:14:00 and 2022-09-02 20:29:00.
No files found for Australia-ASSA_02 between 2022-01-25 09:54:00 and 2022-01-25 10:09:00.
No files found for Australia-ASSA_02 between 2023-07-12 03:20:00 and 2023-07-12 03:35:00.
No files found for Australia-ASSA_02 between 2022-12-12 23:14:00 and 2022-12-12 23:29:00.
No files found for Australia-ASSA_02 between 2022-06-21 18:30:00 and 2022-06-21 18:45:00.
No files found for Australia-ASSA_02 between 2022-10-27 17:11:00 and 2022-10-27 17:26:00.
No files found for Australia-ASSA_02 between 2022-04-13 01:37:00 and 2022-04-13 01:52:00.
No files found for Australia-ASSA_02 between 2023-07-27 21:31:00 and 2023-07-27 21:46:00.
No files found for Australia-ASSA_02 between 2021-06-07 12:14:00 and 2021-06-07 12:29:00.
No files found for Australia-ASSA_02 between 2023-03-18 05:51:00 and 2023-03-18 06:06:00.


Downloading and processing files: 100%|██████████| 3/3 [00:00<00:00,  8.65it/s]


No files found for Australia-ASSA_02 between 2023-04-24 12:46:00 and 2023-04-24 13:01:00.
No files found for Australia-ASSA_02 between 2023-08-27 06:18:00 and 2023-08-27 06:33:00.
No files found for Australia-ASSA_02 between 2022-07-24 14:51:00 and 2022-07-24 15:06:00.
No files found for Australia-ASSA_02 between 2022-12-08 21:19:00 and 2022-12-08 21:34:00.


Downloading and processing files: 100%|██████████| 3/3 [00:00<00:00,  8.20it/s]


No files found for Australia-ASSA_02 between 2022-10-01 07:36:00 and 2022-10-01 07:51:00.
No files found for Australia-ASSA_02 between 2022-08-18 01:26:00 and 2022-08-18 01:41:00.


KeyboardInterrupt: 