In [3]:
import sunpy

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#from sunpy.instr.goes import get_goes_event_list
#from sunpy.time import parse_time, TimeRange, is_time_in_given_format
#from sunpy.instr.goes import flareclass_to_flux
#from sunpy.lightcurve import GOESLightCurve

import sunpy.lightcurve as lc
from sunpy.instr.goes import calculate_temperature_em

Read in data from the original balch list and from Doug's students SEP list

In [5]:
#read in the shuffled dataframe of original sep+ctrl events from 1986 - 2004
#orig_data = pd.read_csv("AllEvtsShuffled_1986_2004.csv")

#read in the original Balch SEP event list with added CME speed and GOES T and EM
orig_data = pd.read_excel("ctrlevents.v8p.xls")

#adding y label column indicating positive and negative SEP events - SEP events have Association = ProtonFlare
orig_data['sep'] = orig_data.Association.str.contains('^Proton').astype(int)

In [6]:
#read in corresponding student data
stud_data0 = pd.read_excel("ControlEvents_student.xls")
stud_data0['sep'] = stud_data0.Association.str.contains('^Proton').astype(int)

stud_data0 = stud_data0.drop_duplicates()

In [75]:
print(len(orig_data))
print(len(stud_data0))

3783
4787


In [7]:
mask = np.asarray(stud_data0.FlrOnset.isin(orig_data.FlrOnset))
stud_data = stud_data0.iloc[mask]
stud_data.reset_index(inplace=True)

In [8]:
print(len(orig_data))
print(len(stud_data))

3783
3783


In [9]:
print(len(orig_data.query('sep == 0')))
print(len(orig_data.query('sep == 1')))

3656
127


In [10]:
print(len(stud_data.query('sep == 0')))
print(len(stud_data.query('sep == 1')))

3655
128


In [11]:
orig_sep = orig_data.query('sep == 1')
orig_ctrl = orig_data.query('sep == 0')

stud_sep = stud_data.query('sep == 1')
stud_ctrl = stud_data.query('sep == 0')

Some events are labeled differently in the balch vs student SEP list - investigate here

In [12]:
#mask2 = (orig_data[['FlrOnset','sep','TypeII','TypeIV']] != stud_data[['FlrOnset','sep','TypeII','TypeIV']]).any(1)

In [19]:
mask2 = orig_data.FlrOnset.isin(stud_data.FlrOnset)

In [21]:
np.where(mask2 == False)

(array([2983]),)

In [24]:
orig_data[['FlrOnset','sep','TypeII','TypeIV']].iloc[2983]

FlrOnset    2001-08-09T10:15:00.000
sep                               1
TypeII                           No
TypeIV                           No
Name: 2983, dtype: object

----------

Events that are in orig balch data set but not listed as SEPs in the students list

In [29]:
mask2 = orig_sep.FlrOnset.isin(stud_sep.FlrOnset)

In [30]:
np.where(mask2 == False)

(array([ 82,  91, 104, 106]),)

In [35]:
orig_sep[['FlrOnset','sep','TypeII','TypeIV']].iloc[np.where(mask2 == False)[0]]

Unnamed: 0,FlrOnset,sep,TypeII,TypeIV
2824,2000-11-25T00:32:00.000,1,Yes,Yes
2983,2001-08-09T10:15:00.000,1,No,No
3286,2002-04-17T07:36:00.000,1,Yes,Yes
3307,2002-05-22T03:17:00.000,1,No,No


-----------

Events that are listed SEP in the students data set but not listed as SEPs in the original balch list

In [36]:
mask2 = stud_sep.FlrOnset.isin(orig_sep.FlrOnset)

In [37]:
np.where(mask2 == False)

(array([ 74,  83,  92,  96, 107]),)

In [38]:
stud_sep[['FlrOnset','sep','TypeII','TypeIV']].iloc[np.where(mask2 == False)[0]]

Unnamed: 0,FlrOnset,sep,TypeII,TypeIV
2591,2000-02-17T20:15:00.000,1,Yes,No
2827,2000-11-25T18:33:00.000,1,Yes,No
2970,2001-06-15T10:00:00.000,1,Yes,Yes
3073,2001-10-19T00:44:00.000,1,Yes,Yes
3305,2002-05-21T21:11:00.000,1,Yes,No


In [73]:
orig_data.iloc[3305]

FlrOnset        2002-05-21T21:11:00.000
Flrmaxtime      2002-05-21T21:39:00.000
Flrendtime      2002-05-21T23:13:00.000
FlrPeakFlux                     1.5e-05
xrscls                             M1.4
ocls                                 2F
optlocation                      N17E38
region                             9960
Rpatrol                             Yes
TypeII                              Yes
TII_duration                         28
TypeIV                               No
TIV_duration                         -1
CME_Patrol                          Yes
CME_onset       2002-05-21T21:50:05.000
CME_speed                           853
Satellite                            G8
FlrHpTime       2002-05-21T22:00:00.000
FlrIntFlux                       0.0243
FlrIntFlux2                      0.0217
Association                        None
S1onset                             NaN
S2onset                             NaN
S3onset                             NaN
S4onset                             NaN


-------------

Pull the Temp and EM values from the students dataset into the original dataset

In [95]:
 orig_data_new = pd.merge(orig_data, stud_data[['FlrOnset','tchianti','emchianti']], how='left', left_on='FlrOnset', right_on='FlrOnset')

In [108]:
#shuffle the events so they are not organized 
orig_data_new = shuffle(orig_data_new)

#save the shuffled dataframe -- commented out to prevent resaving
orig_data_new.to_csv("AllEvtsShuffled_1986_2004_t_em.csv")

NameError: name 'shuffle' is not defined

In [105]:
#s1[['FlrOnset','sep','tchianti','emchianti']].iloc[0:10]

In [107]:
#stud_data[['FlrOnset','sep','tchianti','emchianti']].iloc[0:20]

---------

Pulling Temp and EM from sunpy GOES module - currently has issues with some bad data points

In [None]:

i = 1
t1 = orig_data.FlrOnset.iloc[i]
t2 = orig_data.Flrendtime.iloc[i]
goeslc = lc.GOESLightCurve.create(t1, t2)  
      
plt.plot(goeslc.data.xrsa)
plt.plot(goeslc.data.xrsb)
plt.show()

#calculate the temperature and emission measure
goeslc_new = calculate_temperature_em(goeslc)  

#flare max temp and em
temp.append(goeslclc_new.data.temperature.max())
em.append(goeslc_new.data.em.max())



In [None]:
temp = []
em = []
for i in range(len(orig_data)):


    
    try:
        t1 = orig_data.FlrOnset.iloc[i]
        t2 = orig_data.Flrendtime.iloc[i]
        goeslc = lc.GOESLightCurve.create(t1, t2)  
        
        #calculate the temperature and emission measure
        goeslc_new = calculate_temperature_em(goeslc)  

        #flare max temp and em
        temp.append(goeslclc_new.data.temperature.max())
        em.append(goeslc_new.data.em.max())

    except:
        
        print("error %i", i)



In [None]:
plt.plot(goeslc.data.xrsa)
plt.show()

In [None]:
lc_new = calculate_temperature_em(goeslc) 