In [6]:
# globals

# import standard libraries
import pandas as pd
import sys

# import custom libraries
if "./src" not in sys.path:
    sys.path.append("./src")
from goes_file_parser import GOESFileParser
from sdo_image_fetcher import SDOImageFetcher


In [5]:
# get 2015 GOES events as df

directory_path = "/Users/aishsk6/My Drive/ML_project/2015_events"
df = GOESFileParser.get_goes_events_df_for_interval(directory_path, "20150101", "20151231")
df

Unnamed: 0,Event,Obs,Q,Type,Loc/Freq,Particulars_a,begin_datetime,max_datetime,end_datetime
0,3690,LEA,3,FLA,S08E54,SF,2015-01-01 01:38:00,2015-01-01 01:39:00,2015-01-01 01:41:00
1,3700,LEA,3,FLA,S08E54,SF,2015-01-01 02:25:00,2015-01-01 02:27:00,2015-01-01 02:29:00
2,3710,LEA,3,FLA,S08E54,SF,2015-01-01 03:16:00,2015-01-01 03:16:00,2015-01-01 03:20:00
3,3720,LEA,3,FLA,S08E54,SF,2015-01-01 03:40:00,2015-01-01 03:41:00,2015-01-01 03:44:00
4,3730,LEA,C,RSP,109-171,CTM/1,2015-01-01 03:51:00,NaT,2015-01-01 04:17:00
...,...,...,...,...,...,...,...,...,...
9409,9930,LEA,1,FLA,N08E57,SF,NaT,NaT,NaT
9410,9940,HOL,2,DSF,N17W24,4,NaT,NaT,NaT
9411,9980,LEA,2,EPL,////,0.26,NaT,NaT,NaT
9412,9980,SVI,3,FLA,S23W52,1F,NaT,NaT,NaT


In [8]:
# fetch the solar images
# - can take long depending on connection
# - code is fault tolerant; if connection is lost, it will resume from where it left off

# common variables
s3_root_for_sdoml_year_zarr = (
    "s3://gov-nasa-hdrl-data1/contrib/fdl-sdoml/fdl-sdoml-v2/sdomlv2.zarr/2015/"
)
sav_folder_path = "/Users/aishsk6/gd_archive/2024_flare_finder/sdo_images"

# fetch solar images for year 2015 for every hour-start
desired_times = pd.date_range(
    start="2015-01-01 00:00:00", end="2015-12-31 23:59:59", freq="60T", tz="UTC"
)
SDOImageFetcher.get_sdo_solar_images_from_aws(
    s3_root_for_sdoml_year_zarr,
    desired_times,
    sav_folder_path,
    tolerance=pd.Timedelta(days=1000),
    is_verbose=False,
)

# fetch solar images for goes big flare events for year 2015
# - Big flare means M/X class flare
# - We specify a 10 min tolerance for the flare event time
goes_events_data = pd.read_csv("goes_events_clean_2015.csv")
goes_MX_events = goes_events_data[
    goes_events_data["Particulars_a"].str.lower().str.startswith(("m", "x"))
]
goes_MX_event_times = pd.to_datetime(
    goes_MX_events["max_datetime"].fillna(goes_MX_events["begin_datetime"]), utc=True
).tolist()
SDOImageFetcher.get_sdo_solar_images_from_aws(
    s3_root_for_sdoml_year_zarr,
    goes_MX_event_times,
    sav_folder_path,
    tolerance=pd.Timedelta(minutes=10),
    is_verbose=False,
)



['/Users/aishsk6/gd_archive/2024_flare_finder/sdo_images/2015-03-02 09:48:12.340000+00:00.png']