In [10]:
import os
import pandas as pd
import numpy as np
import re

import matplotlib.pyplot as plt

from datetime import datetime
from astropy.time import Time
from astropy.time import TimeDelta

from sunpy import timeseries as ts
from sunpy.net import attrs as a
from sunpy.net import hek
from sunpy.net import Fido
from sunpy.time import parse_time, find_time

In [11]:
data_dir = r"D:\MastersProj\Data\goes15"

### Get a list of files that are available (since we have them all downloaded), and then search the HEK database for these. Don't need to rerun this cell

In [12]:
dates = []
for root, dirs, files in os.walk(data_dir):
    for f in files:
        if f.__contains__('.nc') and f.__contains__('sci_gxrs-l2-irrad_g15_d'):
            match = re.search(r'd(\d{8})', f)
            date_str = match.group(1)
            date = datetime.strptime(date_str, '%Y%m%d')
            dates.append(date)

datetime_array = np.array(dates, dtype='datetime64[s]')

first_date = min(datetime_array)
last_date = Time(max(datetime_array)) + TimeDelta(1, format = 'jd') - TimeDelta(1, format ='sec')
fl_search = a.Time(first_date, last_date)

In [13]:
#### Search database for ALL goes flares
res = Fido.search(fl_search, a.hek.EventType("FL"), a.hek.FRM.Name == "SSW Latest Events")
fullres = res["hek"]

#### Reduce the table down to just the info we want
srch_res = fullres["event_starttime", "event_peaktime",
                        "event_endtime", "fl_goescls", "hpc_x", "hpc_y", "hgc_x", "hgc_y","hgs_x", "hgs_y","event_score","sum_overlap_scores","ar_noaanum"]
                        # More stuff that could be useful, Heliographic lat lon, and where from
                        #["hgc_x", "hgc_y", "frm_name"]

print(f"Total Number of flares in period: {len(srch_res)}")
print("")
srch_res.write("GOES15_HEK_Data_SSW.csv", overwrite = True, format="csv")

Total Number of flares in period: 16099



In [76]:
#### Search database for ALL goes flares
res = Fido.search(fl_search, a.hek.EventType("FL"), a.hek.FRM.Name == "SSW Latest Events")
fullres = res["hek"]

#### Reduce the table down to just the info we want
srch_res = fullres["event_starttime", "event_peaktime",
                        "event_endtime", "fl_goescls", "hpc_x", "hpc_y", "hgc_x", "hgc_y","hgs_x", "hgs_y","event_score","sum_overlap_scores","ar_noaanum"]
                        # More stuff that could be useful, Heliographic lat lon, and where from
                        #["hgc_x", "hgc_y", "frm_name"]

print(f"Total Number of flares in period: {len(srch_res)}")
print("")
srch_res.write("GOES15_HEK_Data_NOTSSW.csv", overwrite = True, format="csv")

KeyboardInterrupt: 

In [14]:
#### Search database for ALL goes flares
res = Fido.search(fl_search, a.hek.EventType("FL"), a.hek.OBS.Observatory == "GOES")
fullres = res["hek"]

#### Reduce the table down to just the info we want
srch_res = fullres["event_starttime", "event_peaktime",
                        "event_endtime", "fl_goescls","ar_noaanum"]
                        # More stuff that could be useful, Heliographic lat lon, and where from
                        #["hgc_x", "hgc_y", "frm_name"]

print(f"Total Number of flares in period: {len(srch_res)}")
print("")
srch_res.write("GOES15_HEK_Data_GOES.csv", overwrite = True, format="csv")

Total Number of flares in period: 14751



In [15]:
srch_res.write("GOES15_HEK_Data.csv", overwrite = True, format="csv")

#### Have a look at how many of each class type there were in this period.

In [27]:
data = pd.read_csv("GOES15_HEK_Data_SSW.csv")
data = data.drop_duplicates(['event_peaktime', 'fl_goescls'])

b_count = 0
c_count = 0
m_count = 0
x_count = 0
for fl in data['fl_goescls'].astype(str):
    if fl.__contains__("B"):
        b_count = b_count + 1
    elif fl.__contains__("C"):
        c_count = c_count + 1
    elif fl.__contains__("M"):
        m_count = m_count + 1
    elif fl.__contains__("X"):
        x_count = x_count + 1

print(f"X-class: {x_count}")
print(f"M-class: {m_count}")
print(f"C-class: {c_count}")
print(f"B-class: {b_count}")

X-class: 49
M-class: 734
C-class: 7716
B-class: 6321


In [48]:
data_SSW = pd.read_csv("GOES15_HEK_Data_SSW.csv")
data_SSW = data_SSW.drop_duplicates(['event_peaktime', 'fl_goescls'])
data_SSW['Origin'] = "SSW"

data_GOES = pd.read_csv("GOES15_HEK_Data_GOES.csv")
data_GOES = data_GOES.drop_duplicates(['event_peaktime', 'fl_goescls'])
data_GOES['Origin'] = "GOES"

In [71]:
data_full = pd.concat([data_SSW, data_GOES],axis = 0)
data_full = data_full.sort_values(by = ['event_starttime', 'Origin'])
data_full = data_full.drop_duplicates(['event_peaktime', 'fl_goescls'], keep = "last")
data_full = data_full.sort_values(by = ['event_starttime'])
data_full = data_full.reset_index(drop = True)
data_full.to_csv("GOES15_HEK_Data.csv")


In [72]:
data = data_full

### Want to Process this data.
- Dropping duplicates
- Flagging Overlapped flares
- Flagging nan flares


#### Dropping dupes

#### Setting up a proximity flag for overlapped flares

In [73]:
data['Proximity Flag'] = False
data.at[0, 'Proximity Flag'] = False  # add this line to set the first row to False

for i in range(1, len(data)):
    if (pd.to_datetime(data.iloc[i]['event_starttime']) - pd.to_datetime(data.iloc[i-1]['event_endtime'])).total_seconds() / 60 <= 30:
        data.at[i, 'Proximity Flag'] = True

In [74]:
data1 = data.drop_duplicates(subset = ['event_peaktime','fl_goescls'])
data1 = data1.reset_index(drop=True)
print(f"There were: {len(data) - len(data1)} duplicates removed")

There were: 0 duplicates removed


In [75]:
data1.to_csv("GOES15_HEK_Data.csv")

In [20]:
plt.plot(data1['event_score'][~data1['fl_goescls'].astype(str).str.contains("B")], data1['sum_overlap_scores'][~data1['fl_goescls'].astype(str).str.contains("B")], '.')
plt.plot(data1['event_score'][data1['fl_goescls'].astype(str).str.contains("B")], data1['sum_overlap_scores'][data1['fl_goescls'].astype(str).str.contains("B")], '.')

plt.show()

KeyError: 'event_score'