In [None]:
#Import libraries
import drms
from drms import DrmsQueryError
import time
import json
from datetime import datetime as dt_obj
import urllib
from astropy.io import fits
from sunpy.visualization.colormaps import color_tables as ct
from matplotlib.dates import *
from sunpy.time import TimeRange
from sunpy.net import Fido
from sunpy.net import attrs as a
import pandas as pd
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import csv
from os.path import exists
import re
import os
from glob import glob


Download SHARP data

In [None]:
#Initisalise drms client and display available data series
c = drms.Client()
c.series(r'hmi\.sharp_')

In [None]:
# Set a series
si = c.info('hmi.sharp_720s')

In [None]:
#Initialise SHARP metadata features.
fields = [
    "T_REC", 
    "HARPNUM", 
    "NOAA_NUM", 
    "NOAA_ARS", 
    "NOAA_AR", 
    "QUALITY", 
    "TOTUSJH", 
    "TOTUSJZ", 
    "SAVNCPP", 
    "USFLUX", 
    "ABSNJZH", 
    "TOTPOT",
    "SIZE_ACR", 
    "NACR", 
    "MEANPOT", 
    "SIZE", 
    "MEANJZH", 
    "SHRGT45", 
    "MEANSHR",
    "MEANJZD", 
    "MEANALP", 
    "MEANGBT", 
    "MEANGBL", 
    "MEANGAM", 
    "MEANGBZ", 
    "MEANGBH", 
    "NPIX"
]

query_string = ",".join(fields)

In [None]:
#SHARP metatdata overview
sharp_subset = si.keywords.loc[si.keywords.index.intersection(fields)]
sharp_subset

In [None]:
#Define log and error files for download

log_file = "query_log.csv"
error_file = "query_error.csv"


def log_success(log_file, t1_str, t2_str, n_rows):
    write_header = not exists(log_file)
    with open(log_file, 'a', newline='') as f:
        writer = csv.writer(f)
        if write_header:
            writer.writerow(['start_time', 'end_time', 'rows_written'])
        writer.writerow([t1_str, t2_str, n_rows])

def log_error(error_file, t1_str, t2_str, error):
    write_header = not exists(error_file)
    with open(error_file, 'a', newline='') as f:
        writer = csv.writer(f)
        if write_header:
            writer.writerow(['start_time', 'end_time', 'error_message'])
        writer.writerow([t1_str, t2_str, str(error)])

In [None]:
#Download data to csv incrementally by day.

start = datetime(2010, 5, 1)
end = datetime(2025, 8, 21)

first_write = True
t_1 = end - timedelta(days=1)
t_2 = end
i = 1
while t_1 >= start:
    print(f"Downloading day {i} of 5501")
    t_1_str = t_1.strftime("%Y.%m.%d_%H:%M:%S_TAI") 
    t_2_str = t_2.strftime("%Y.%m.%d_%H:%M:%S_TAI") 
    try:
        extract = c.query(f'hmi.sharp_720s[1-13278][{t_1_str}-{t_2_str}]', key=query_string)

        if not extract.empty:
            extract.to_csv("sharp_metadata_dump_daily.csv", mode='a', index=False, header=first_write)
            first_write = False
            print(f"Wrote {len(extract)} rows for {t_1_str} - {t_2_str}")
            log_success(log_file, t_1, t_2, len(extract))
        else:
            print(f"No records available for {t_1_str} - {t_2_str}")
            log_success(log_file, t_1, t_2, 0)

    
    except (DrmsQueryError, TimeoutError) as e:
        print(f"JSOC query failed for {t_1_str}-{t_2_str}: {e}")
        log_error(error_file, t_1, t_2, e)

    t_1 = t_1 - timedelta(days=1)
    t_2 = t_2 - timedelta(days=1)
    i+=1

Download GOES flare data

In [None]:
#Define download functions (Sun et al. 2022)

def download_goes_per_year(t_start, t_end):
    print("query started")
    results = Fido.search(
        a.Time(t_start, t_end),
        a.hek.EventType("FL"),
        # a.hek.FL.GOESCls > "M1.0",
        a.hek.OBS.Observatory == "GOES"
    )
    print("query complete")
    if not results.all_colnames: # no columns / no results
        return None
    
    event_table = results['hek']["event_starttime", "event_peaktime", "event_endtime", "fl_goescls", "ar_noaanum"]
    event_df = event_table.to_pandas().rename(columns={
        'event_starttime': 'start_time',
        'event_peaktime': 'peak_time',
        'event_endtime': 'end_time',
        'fl_goescls': 'goes_class',
        #'hgc_coord': 'goes_location',
        'ar_noaanum': 'noaa_active_region',
    })
    event_df = event_df[event_df['noaa_active_region'] != 0]
    if len(event_df) == 0:
        return None

    return event_df


def download_goes(t_start, t_end, first_write):

    goes = download_goes_per_year(t_start, t_end)

    if goes is not None and not goes.empty:
        goes = goes[goes['goes_class'] != '']
        goes.to_csv("goes_dump.csv", mode='a', index=False, header=first_write)
        first_write = False
        print(f"Wrote {len(goes)} rows for {t_start} - {t_end}")
        log_success(log_file, t_start, t_end, len(goes))
    else:
        print(f"No records available for {t_start} - {t_end}")
        log_success(log_file, t_start, t_end, 0)

In [None]:
#Download GOES data 

batch_start = datetime(2010, 5, 1)
batch_end = datetime(2025, 5, 22)

first_write = True
t_start = batch_start
t_end = t_start + relativedelta(months=1)
while t_start < batch_end:
    print(f"Downoading {t_start} - {t_end}")
    download_goes(t_start, t_end, first_write)
    t_start = t_start + relativedelta(months=1)
    t_end = t_end + relativedelta(months=1)
    first_write = False