In [1]:
import urllib
import pandas as pd 
from sunpy.time import TimeRange
from flarelist_utils import read_swpc_reports, read_ngdc_goes_reports
from dateutil.relativedelta import relativedelta
import pandas as pd 
import numpy as np
import datetime
import glob
import h5netcdf
import matplotlib.pyplot as plt
from scipy.io import readsav
import warnings
warnings.filterwarnings("ignore")

## GEV flare list

Lets look at the flares from the GOES IDL workbench

```
a = ogoes()
gev = a->get_gev('01-Jan-2010', '31-Dec-2018', /struct)
save, gev, filename="goes_flares_from_idl.sav"
```

In [127]:
gev_data = readsav("goes_flares_from_idl.sav")["gev"]
gev_dict = {k : gev_data[k].astype(str) for k in gev_data.dtype.names}
gev_df = pd.DataFrame(gev_dict)

gev_df["goes_class_ind"] = [x[0] for x in gev_df["CLASS"]]
gev_df["datetime"] = pd.to_datetime(gev_df["GSTART"])

gev_df_c = gev_df[gev_df["goes_class_ind"].isin(["C", "M", "X"])]
print(gev_df_c.datetime.min(), gev_df_c.datetime.max())

gev_df_c = gev_df_c.drop_duplicates(subset="datetime")

2010-01-02 07:09:00 2018-07-06 19:41:00


## SWPC flare list 

Flare list from daily reports

In [128]:
filedir = "./goes_files/%Y_events/%Y*events*"#%Y%m%devents.txt"
timerange = TimeRange("2010-01-01", "2018-12-31")
years = [timerange.start.datetime + relativedelta(years=i) for i in range(9)]
all_files = []
for y in years:
    all_files += glob.glob(y.strftime(filedir))

all_files.sort()

df_flares = read_swpc_reports(all_files[0])
for f in all_files[1:]:
    df = read_swpc_reports(f)
    df_flares = df_flares.append(df)
df_flares.reset_index(inplace=True, drop=True)
df_flares["ts"] = df_flares.date + df_flares.start_time
df_flares.drop_duplicates(subset="ts")

swpc_c = df_flares[df_flares["goes_class_ind"].isin(["C", "X", "M"])]
swpc_c = swpc_c.drop_duplicates(subset="ts")

## HEK flares

Flare list by querying the HEK for flares from `a.hek.OBS.Observatory == "GOES"`

In [129]:
hek_flares_all = pd.read_csv("solar_cycle24_flares.csv")

In [130]:
hek_flares_all["goes_class_ind"] = [x[0] for x in hek_flares_all["fl_goescls"]]
hek_flares_all["datetime"] = pd.to_datetime(hek_flares_all["event_starttime"])
hek_flares = hek_flares_all[hek_flares_all["goes_class_ind"].isin(["X", "M", "C"])]

hek_flares = hek_flares[(hek_flares.datetime>="2010-01-01")&(hek_flares.datetime<="2018-12-31")]
hek_flares = hek_flares.drop_duplicates(subset="event_starttime")
hek_flares.reset_index(drop=True, inplace=True)

In [131]:
hek_flares_swpc = pd.read_csv("solar_cycle24_flares_swpc.csv")
hek_flares_swpc["datetime"] = pd.to_datetime(hek_flares_swpc["event_starttime"])
hek_flares_swpc["goes_class_ind"] = [x[0] for x in hek_flares_swpc["fl_goescls"]]

## Lets print the different numbers of events etc

In [133]:
print_flares(gev_df_c, "gev")
print_flares(swpc_c, "swpc")
print_flares(hek_flares, "hek")

gev: X: 49, M: 740, C: 7733
swpc: X: 49, M: 748, C: 7798
hek: X: 49, M: 756, C: 7814


In [134]:
print(len(gev_df_c), 49+740+7733)
print(len(swpc_c), 49+748+7798)

8522 8522
8595 8595


In [135]:
swpc_c["goes_class_ind"].unique()

array(['C', 'M', 'X'], dtype=object)

## Lets match on start time

In [136]:
gev_df_c["match_time"] = gev_df_c.datetime.dt.strftime("%Y-%m-%d %H:%M")

In [137]:
swpc_c["datetime"] = pd.to_datetime(swpc_c["date"] + swpc_c["start_time"])
swpc_c["match_time"] = swpc_c.datetime.dt.strftime("%Y-%m-%d %H:%M")

In [138]:
print(len(swpc_c), len(gev_df_c))
print(len(pd.merge(swpc_c, gev_df_c, on="match_time")))

8595 8522
8505


In [139]:
common = pd.merge(swpc_c, gev_df_c, on="match_time")
np.all(testy["goes_class_ind_x"]== testy["goes_class_ind_y"])
common["goes_class_ind"] = common["goes_class_ind_x"]

In [149]:
8522-8505

17

In [141]:
# This is looking all right!

In [142]:
print_flares(common, "merged")

merged: X: 49, M: 739, C: 7717


In [146]:
len(swpc_c) - len(common)

90

In [143]:
len(gev_df_c) - len(common)

17

In [150]:
gev_df_c[~gev_df_c["match_time"].isin(common["match_time"])]

Unnamed: 0,GSTART,GEND,GPEAK,CLASS,LOC,NOAA_AR,goes_class_ind,datetime,match_time
7626,31-Dec-2013 02:29:00,31-Dec-2013 02:46:00,31-Dec-2013 02:40:00,C5.6,,11936.0,C,2013-12-31 02:29:00,2013-12-31 02:29
7627,31-Dec-2013 05:21:00,31-Dec-2013 05:38:00,31-Dec-2013 05:32:00,C2.0,,11938.0,C,2013-12-31 05:21:00,2013-12-31 05:21
7628,31-Dec-2013 07:33:00,31-Dec-2013 07:47:00,31-Dec-2013 07:40:00,C2.2,,11936.0,C,2013-12-31 07:33:00,2013-12-31 07:33
7629,31-Dec-2013 10:23:00,31-Dec-2013 12:32:00,31-Dec-2013 11:50:00,C8.8,,,C,2013-12-31 10:23:00,2013-12-31 10:23
7630,31-Dec-2013 16:42:00,31-Dec-2013 16:55:00,31-Dec-2013 16:49:00,C2.0,S13E16,11938.0,C,2013-12-31 16:42:00,2013-12-31 16:42
7631,31-Dec-2013 18:20:00,31-Dec-2013 18:31:00,31-Dec-2013 18:25:00,C1.8,S16W16,11936.0,C,2013-12-31 18:20:00,2013-12-31 18:20
7632,31-Dec-2013 18:53:00,31-Dec-2013 19:03:00,31-Dec-2013 18:59:00,C4.0,S13W41,11940.0,C,2013-12-31 18:53:00,2013-12-31 18:53
7633,31-Dec-2013 19:49:00,31-Dec-2013 19:56:00,31-Dec-2013 19:53:00,C2.9,S13W41,11940.0,C,2013-12-31 19:49:00,2013-12-31 19:49
7634,31-Dec-2013 21:45:00,31-Dec-2013 22:20:00,31-Dec-2013 21:58:00,M6.4,S16W35,11936.0,M,2013-12-31 21:45:00,2013-12-31 21:45
9835,31-Dec-2014 01:50:00,31-Dec-2014 02:01:00,31-Dec-2014 01:55:00,C1.6,S12E19,12251.0,C,2014-12-31 01:50:00,2014-12-31 01:50


In [153]:
swpc_c[~swpc_c["match_time"].isin(common["match_time"])][["match_time", "goes_class_ind"]]

Unnamed: 0,match_time,goes_class_ind
159,2010-02-06 06:59,C
162,2010-02-06 10:34,C
164,2010-02-06 15:25,C
166,2010-02-06 18:47,M
167,2010-02-06 21:31,M
...,...,...
10015,2015-01-15 14:49,C
10016,2015-01-15 16:39,C
10017,2015-01-15 21:49,C
10018,2015-01-16 03:18,C


In [158]:
swpc_excess = swpc_c[~swpc_c["match_time"].isin(common["match_time"])][["match_time", "goes_class_ind"]]
swpc_excess[swpc_excess["goes_class_ind"].isin(["M"])]

Unnamed: 0,match_time,goes_class_ind
166,2010-02-06 18:47,M
167,2010-02-06 21:31,M
8667,2014-06-03 03:58,M
8684,2014-06-06 19:26,M
9277,2014-09-23 23:03,M
9293,2014-09-27 08:32,M
9993,2015-01-13 04:13,M
9994,2015-01-13 04:46,M
10008,2015-01-14 12:30,M


## READ IN HELIOOOO

In [154]:
helio_flarelist = pd.read_csv("full_sswlatest.csv")
ssw = helio_flarelist[helio_flarelist["goes_class_ind"].isin(["X", "M", "C"])]
ssw["datetime"] = pd.to_datetime(ssw["time_start"])

In [157]:
print_flares(ssw, "helio")
print_flares(swpc_c, "swpc")

helio: X: 49, M: 735, C: 7989
swpc: X: 49, M: 748, C: 7798


In [None]:
df1[~df1.isin(df2)].dropna()

In [102]:
hek_flares_swpc["obs_observatory"].unique()

array(['GOES'], dtype=object)

In [103]:
def print_flares(df, name):
    x = np.sum(df["goes_class_ind"].isin(["X"]))
    m = np.sum(df["goes_class_ind"].isin(["M"]))
    c = np.sum(df["goes_class_ind"].isin(["C"]))
    print("{:s}: X: {:d}, M: {:d}, C: {:d}".format(name, x, m, c))

In [104]:
swpc_c.head(2)

Unnamed: 0,date,event_no,start_time,max_time,end_time,goes_sat,goes_channel,goes_class_ind,goes_class,integrated_flux,noaa_ar,ts,datetime,match_time
4,20100102,4300,709,724,740,G14,1-8A,C,C1.0,0.0012,11039.0,201001020709,2010-01-02 07:09:00,2010-01-02 07:09
8,20100102,4370,1408,1416,1424,G14,1-8A,C,C2.6,0.0014,,201001021408,2010-01-02 14:08:00,2010-01-02 14:08


In [70]:
gev_df_c.head(2)

Unnamed: 0,GSTART,GEND,GPEAK,CLASS,LOC,NOAA_AR,goes_class_ind,datetime,match_time
4,2-Jan-2010 07:09:00,2-Jan-2010 07:40:00,2-Jan-2010 07:24:00,C1.0,S29W37,11039.0,C,2010-01-02 07:09:00,2010-01-02 07:09
8,2-Jan-2010 14:08:00,2-Jan-2010 14:24:00,2-Jan-2010 14:16:00,C2.6,,,C,2010-01-02 14:08:00,2010-01-02 14:08
