In [255]:
import pandas as pd 
import matplotlib.pyplot as plt 
from matplotlib import dates
from matplotlib.colors import LogNorm
import numpy as np
import scipy.stats
import warnings
warnings.filterwarnings("ignore")

%matplotlib widget

## Read in the SWPC flare data from 1996-2018

In [144]:
swpc_flares = pd.read_csv("/Users/laurahayes/ml_project_flares/flare_analysis/goes_flare_list/swpc_flarelist_19960731_20181231.csv")
swpc_flares["start_time"] = pd.to_datetime(swpc_flares["start_time"], format="%Y-%m-%d %H:%M:%S")
swpc_flares["matchtime"] = swpc_flares.start_time.dt.strftime("%Y-%m-%d 00:30")
swpc_flares["match_noaa_ar"] = swpc_flares["noaa_ar"].astype(str)

def get_goes_class(x):
    try:
        if x[0]=="C":
            return float(x[1:])*1e-6
        elif x[0] == "M":
            return float(x[1:])*(1e-5)
        elif x[0] == "X":
            return float(x[1:])*(1e-4)
        else:
            return
    except:
        return np.nan
    
swpc_flares["goes_class_val"] = swpc_flares["goes_class"].map(get_goes_class)

In [229]:
swpc_flares.head()

Unnamed: 0,date,event_no,start_time,max_time,end_time,goes_sat,goes_channel,goes_class_ind,goes_class,integrated_flux,noaa_ar,ts,matchtime,match_noaa_ar,goes_class_val
0,19960806,4370,1996-08-06 19:10:00,2201,1996-08-06 22:43:00,GO9,1-8A,C,C1.5,8.23,0,199608061910,1996-08-06 00:30,0,2e-06
1,19960807,4420,1996-08-07 23:16:00,2320,1996-08-07 23:24:00,GO9,1-8A,C,C1.0,2.9,0,199608072316,1996-08-07 00:30,0,1e-06
2,19960822,4580,1996-08-22 07:34:00,750,1996-08-22 07:56:00,GO8,1-8A,C,C1.5,1.18,0,199608220734,1996-08-22 00:30,0,2e-06
3,19960822,4590,1996-08-22 08:05:00,845,1996-08-22 08:52:00,GO9,1-8A,C,C3.6,6.11,0,199608220805,1996-08-22 00:30,0,4e-06
4,19960822,4620,1996-08-22 08:50:00,1003,1996-08-22 11:06:00,GO9,1-8A,C,C4.5,3.32,0,199608220850,1996-08-22 00:30,0,5e-06


## Read in the AR data from 1996-2018

In [230]:
ar_data = pd.read_csv("/Users/laurahayes/ml_project_flares/flare_analysis/AR_analysis/SRS_data_new_19960101_20181231.csv")
ar_data["matchtime"] = pd.to_datetime(ar_data["date"]).dt.strftime("%Y-%m-%d 00:30")
ar_data["ar_noaanum"] = ar_data["ar_noaanum"].map(lambda x: x + 10000 if x<4000 else x)
ar_data["match_noaa_ar"] = ar_data["ar_noaanum"].astype(int).astype(str)
ar_data.drop(columns=["NM", "NONE", "BETA"], inplace=True)



In [232]:
ar_data.head()

Unnamed: 0,ar_noaanum,LOCATION,Carrington_long,AREA,Z,Longitude_extent,No_sunspots,MAGTYPE,date,matchtime,match_noaa_ar
0,7935,N11W06,205,10,BXO,8,4.0,BETA,1996-01-02 00:30,1996-01-02 00:30,7935
1,7937,S09E49,150,0,AXX,1,1.0,ALPHA,1996-01-02 00:30,1996-01-02 00:30,7937
2,7934,S08W60,245,0,AXX,0,1.0,ALPHA,1996-01-03 00:30,1996-01-03 00:30,7934
3,7935,N11W15,200,0,AXX,1,2.0,ALPHA,1996-01-03 00:30,1996-01-03 00:30,7935
4,7937,S08E35,150,0,AXX,0,1.0,ALPHA,1996-01-03 00:30,1996-01-03 00:30,7937


In [234]:
fig, ax = plt.subplots()

plt.plot(pd.to_datetime(ar_data["matchtime"]), ar_data["ar_noaanum"], marker='.', ls='')
plt.plot(pd.to_datetime(swpc_flares["matchtime"]), swpc_flares["noaa_ar"], marker='.', ls='', ms=0.5)

ax.xaxis.set_major_formatter(dates.DateFormatter("%Y-%m-%d"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Lets now merge and play around with these

In [235]:
swpc_flares2 = swpc_flares[swpc_flares["noaa_ar"]!=0]

In [236]:
flare_ar_df = pd.merge(swpc_flares2, ar_data, how="left", on=["matchtime", "match_noaa_ar"])

In [238]:
flare_ar_df.head()

Unnamed: 0,date_x,event_no,start_time,max_time,end_time,goes_sat,goes_channel,goes_class_ind,goes_class,integrated_flux,...,goes_class_val,ar_noaanum,LOCATION,Carrington_long,AREA,Z,Longitude_extent,No_sunspots,MAGTYPE,date_y
0,19960825,4730,1996-08-25 12:10:00,1236,1996-08-25 12:39:00,GO9,1-8A,C,C1.0,8.36,...,1e-06,7986.0,S12E64,258.0,90.0,HSX,2.0,1.0,ALPHA,1996-08-25 00:30
1,19961124,6350,1996-11-24 17:31:00,1747,1996-11-24 17:59:00,GO9,1-8A,C,C2.8,4.03,...,3e-06,7999.0,S04E33,169.0,170.0,DAO,8.0,21.0,BETA,1996-11-24 00:30
2,19961124,6370,1996-11-24 19:25:00,1934,1996-11-24 19:49:00,GO9,1-8A,C,C1.3,1.52,...,1e-06,7999.0,S04E33,169.0,170.0,DAO,8.0,21.0,BETA,1996-11-24 00:30
3,19961124,6380,1996-11-24 20:00:00,2004,1996-11-24 20:06:00,GO9,1-8A,C,C1.8,3.96,...,2e-06,7999.0,S04E33,169.0,170.0,DAO,8.0,21.0,BETA,1996-11-24 00:30
4,19961124,6400,1996-11-24 21:05:00,2113,1996-11-24 21:19:00,GO8,1-8A,C,C1.0,5.8,...,1e-06,7999.0,S04E33,169.0,170.0,DAO,8.0,21.0,BETA,1996-11-24 00:30


In [239]:
len(flare_ar_df[~flare_ar_df["date_y"].isnull()])/len(flare_ar_df)

0.9036406448107227

In [240]:
flare_w_ar = flare_ar_df[~flare_ar_df["date_y"].isnull()]
flare_no_ar = flare_ar_df[flare_ar_df["date_y"].isnull()]

In [241]:
fig, ax = plt.subplots()
# plt.plot(flare_w_ar.start_time, flare_w_ar.goes_class_val, marker='.', ls='', alpha=0.2)
plt.plot(flare_no_ar.start_time, flare_no_ar.goes_class_val, marker='.', ls='', alpha=0.2)
plt.yscale("log")
ax.xaxis.set_major_formatter(dates.DateFormatter("%Y-%m-%d"))



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [242]:
fig, ax = plt.subplots()
plt.plot(flare_w_ar.start_time, flare_w_ar.goes_class_val, marker='.', ls='', alpha=0.2)
plt.yscale("log")
ax.xaxis.set_major_formatter(dates.DateFormatter("%Y-%m-%d"))



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [256]:
fig, ax = plt.subplots()
ax.scatter(flare_w_ar["No_sunspots"], flare_w_ar["goes_class_val"], marker='.', c=flare_w_ar["goes_class_val"], norm=LogNorm())
ax.set_yscale("log")
# ax.set_xscale("log")
print(scipy.stats.spearmanr(flare_w_ar["No_sunspots"], flare_w_ar["goes_class_val"]))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

SpearmanrResult(correlation=nan, pvalue=nan)


In [263]:
flare_w_ar[flare_w_ar["goes_class_val"].isnull()]

Unnamed: 0,date_x,event_no,start_time,max_time,end_time,goes_sat,goes_channel,goes_class_ind,goes_class,integrated_flux,...,goes_class_val,ar_noaanum,LOCATION,Carrington_long,AREA,Z,Longitude_extent,No_sunspots,MAGTYPE,date_y
1792,19991221,3310,1999-12-21 02:36:00,255,1999-12-21 03:00:00,GO8,1-8A,C,C,0.0018,...,,8807.0,N10E44,108.0,40.0,CAO,6.0,6.0,BETA,1999-12-21 00:30
1793,19991221,3400,1999-12-21 08:22:00,830,1999-12-21 08:37:00,GO8,1-8A,C,C,0.0012,...,,8806.0,N19E37,115.0,1040.0,FKI,19.0,28.0,BETA-GAMMA-DELTA,1999-12-21 00:30
4749,20020113,8620,2002-01-13 15:12:00,1533,2002-01-13 15:55:00,GO8,1-8A,C,"C7,4",0.016,...,,9773.0,N15W46,327.0,500.0,FKI,16.0,16.0,BETA-GAMMA-DELTA,2002-01-13 00:30


In [268]:
is_NaN = flare_w_ar.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = flare_w_ar[row_has_NaN]

In [269]:
rows_with_NaN

Unnamed: 0,date_x,event_no,start_time,max_time,end_time,goes_sat,goes_channel,goes_class_ind,goes_class,integrated_flux,...,goes_class_val,ar_noaanum,LOCATION,Carrington_long,AREA,Z,Longitude_extent,No_sunspots,MAGTYPE,date_y
1792,19991221,3310,1999-12-21 02:36:00,255,1999-12-21 03:00:00,GO8,1-8A,C,C,0.0018,...,,8807.0,N10E44,108.0,40.0,CAO,6.0,6.0,BETA,1999-12-21 00:30
1793,19991221,3400,1999-12-21 08:22:00,830,1999-12-21 08:37:00,GO8,1-8A,C,C,0.0012,...,,8806.0,N19E37,115.0,1040.0,FKI,19.0,28.0,BETA-GAMMA-DELTA,1999-12-21 00:30
4749,20020113,8620,2002-01-13 15:12:00,1533,2002-01-13 15:55:00,GO8,1-8A,C,"C7,4",0.016,...,,9773.0,N15W46,327.0,500.0,FKI,16.0,16.0,BETA-GAMMA-DELTA,2002-01-13 00:30


In [270]:
row_has_NaN

0        False
1        False
2        False
3        False
4        False
         ...  
16555    False
16556    False
16557    False
16560    False
16562    False
Length: 14967, dtype: bool