# The thinking behind this code
Here, I'm just looking to combine the JEDI catalog with the DEMON dimming catalog. I want to see if the big and small dimmings match up in both.

In [1]:
# Standard modules
import numpy as np
import pandas as pd
from astropy.time import Time
import matplotlib.pyplot as plt
from matplotlib import dates
import seaborn as sns

# Custom modules
from jpm_time_conversions import *
from jpm_logger import JpmLogger
%matplotlib inline
sns.set()
plt.style.use('jpm-dark')

## First things first: I've got to read in the catalogs and do a bit of cleaning
and then take a look at the resultant dataframes

In [46]:
# Read in the JEDI and CDAW catalogs
jedi = pd.read_csv('/Users/jmason86/Dropbox/Research/Postdoc_NASA/Analysis/Coronal Dimming Analysis/JEDI Catalog/jedi_v1.csv', low_memory=False)
demon = pd.read_csv('/Users/jmason86/Dropbox/Research/Data/DEMON/DEMON_20180709.csv', parse_dates=[['Year', 'Month', 'Day', 'Peak Time']])

In [47]:
# Clean the DEMON catalog, strip out the columns I don't care about, convert to numeric data type wherever appropriate
demon.index = pd.DatetimeIndex(demon['Year_Month_Day_Peak Time'])
demon.index.rename('Peak Datetime', inplace=True)
demon.drop(['Year_Month_Day_Peak Time'], inplace=True, axis=1)
demon.sort_index(inplace=True)
demon = demon.convert_objects(convert_numeric=True)

For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.


In [57]:
# More cleaning: restricting the time range of DEMON to that of JEDI
demon = demon[jedi['GOES Flare Start Time'][0]: jedi['GOES Flare Start Time'][len(jedi) - 1]]
demon.head()

Unnamed: 0_level_0,Intensity,Start Time,End Time,#,Max Drop,Latitude [º],Longitude [º],Dist R_Sun,NOAA AR #,Count,Flare Class
Peak Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2010-05-23 16:46:00,-252.0,16:46:00,17:48:00,4321,44.0,28.0,-3.0,0.5,,40,
2010-05-24 13:24:00,-94.0,13:22:00,13:52:00,4322,62.0,17.0,33.0,0.61,,24,
2010-05-31 19:54:00,-281.0,19:38:00,20:34:00,4323,41.0,24.0,29.0,0.62,,37,
2010-06-12 01:08:00,-98.0,01:04:00,01:50:00,4324,84.0,29.0,52.0,0.85,AR 1081,32,M4
2010-06-12 09:28:00,-61.0,09:28:00,09:52:00,4325,104.0,25.0,58.0,0.88,AR 1081,21,M2


In [59]:
jedi.head()

Unnamed: 0,Event #,GOES Flare Start Time,GOES Flare Peak Time,GOES Flare Class,Pre-Flare Start Time,Pre-Flare End Time,Flare Interrupt,9.4 Pre-Flare Irradiance [W/m2],13.1 Pre-Flare Irradiance [W/m2],13.3 Pre-Flare Irradiance [W/m2],...,103.2 by 63.0 Fitting Score,103.2 by 71.9 Fitting Score,103.2 by 72.2 Fitting Score,103.2 by 77.0 Fitting Score,103.2 by 79.0 Fitting Score,103.2 by 83.6 Fitting Score,103.2 by 95.0 Fitting Score,103.2 by 97.3 Fitting Score,103.2 by 97.7 Fitting Score,103.2 by 102.6 Fitting Score
0,1.0,2010-05-04 16:15:00.000,2010-05-04 16:29:00.000,C3.6,2010-05-04 08:29:00.000,2010-05-04 16:29:00.000,True,,,,...,,,,,,,,,,
1,2.0,2010-05-05 07:09:00.000,2010-05-05 07:16:00.000,C2.3,2010-05-04 23:16:00.000,2010-05-05 07:16:00.000,True,4e-06,2e-06,,...,,,,,,,,,,
2,3.0,2010-05-05 11:37:00.000,2010-05-05 11:52:00.000,C8.8,2010-05-04 23:16:00.000,2010-05-05 07:16:00.000,True,4e-06,2e-06,,...,,,,,,,,,,
3,4.0,2010-05-05 17:13:00.000,2010-05-05 17:19:00.000,M1.2,2010-05-04 23:16:00.000,2010-05-05 07:16:00.000,False,4e-06,2e-06,,...,,,,,,,,,,
4,5.0,2010-05-07 07:29:00.000,2010-05-07 07:42:00.000,C2.0,2010-05-06 23:42:00.000,2010-05-07 07:42:00.000,True,,,,...,,,,,,,,,,


## Make a merged catalog (DataFrame)
I am using JEDI as the baseline and will fill in what I can from DEMON. This merged set will of course contain columns in addition to what's in JEDI.

In [58]:
jedi_demon = jedi.copy()
jedi_demon['Has DEMON Dimming'] = False
jedi_demon['Intensity'] = np.nan
jedi_demon['Max Drop'] = np.nan

## Matching up rows in JEDI and DEMON
To match up the rows in the two catalogs, I only need the peak GOES flare time from each. It is extremely unlikely that two different flares will have exactly the same peak time down to the second. 

In [62]:
jedi_time = Time(jedi['GOES Flare Peak Time'].values.astype(str))
demon_time = Time(demon.index.values.astype(str))

In [74]:
demon_time[5].iso

'2010-06-13 05:46:00.000'

In [75]:
jedi.iloc[10]

Event #                                                  12
GOES Flare Start Time               2010-06-13 05:30:00.000
GOES Flare Peak Time                2010-06-13 05:39:00.000
GOES Flare Class                                       M1.0
Pre-Flare Start Time                2010-06-12 21:39:00.000
Pre-Flare End Time                  2010-06-13 05:39:00.000
Flare Interrupt                                        True
9.4 Pre-Flare Irradiance [W/m2]                         NaN
13.1 Pre-Flare Irradiance [W/m2]                        NaN
13.3 Pre-Flare Irradiance [W/m2]                        NaN
17.1 Pre-Flare Irradiance [W/m2]                5.35636e-05
17.7 Pre-Flare Irradiance [W/m2]                        NaN
18.0 Pre-Flare Irradiance [W/m2]                        NaN
19.5 Pre-Flare Irradiance [W/m2]                        NaN
20.2 Pre-Flare Irradiance [W/m2]                        NaN
21.1 Pre-Flare Irradiance [W/m2]                        NaN
25.6 Pre-Flare Irradiance [W/m2]        

In [63]:
for jedi_row_index in range(10): #range(len(jedi)):
    ind = np.where(demon_time.jd == jedi_time.jd)
    print(ind)

(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)


  from ipykernel import kernelapp as app


In [None]:
if ind[0].size == 1:
        cme_pa = cdaw['Position Angle [º]'].iloc[ind[0]].values[0]
        flare_pa = jedicdaw['GOES Converted Position Angle [º]'].iloc[jedi_row_index]
        if np.abs(cme_pa - flare_pa) < 45:  # Events should be close to each other
            if cdaw['Linear Speed [km/s]'].iloc[ind[0]].values[0] > 200:  # Events shouldn't be super slow (Barbara Thompson suggests these are really jets)
                if cdaw['Width'].iloc[ind[0]].values[0] > 30:  # Events shouldn't be super narrow (Barbara Thompson suggests these are really jets)
                    jedicdaw['Has CME'].iloc[jedi_row_index] = True
                    jedicdaw['CME Time'].iloc[jedi_row_index] = cdaw_time[ind[0]].iso[0]
                    jedicdaw['CME Width [º]'].iloc[jedi_row_index] = cdaw['Width'].iloc[ind[0]].values[0]
                    jedicdaw['CME Speed [km/s]'].iloc[jedi_row_index] = cdaw['Linear Speed [km/s]'].iloc[ind[0]].values[0]
                    jedicdaw['CME Mass [g]'].iloc[jedi_row_index] = cdaw['Mass [g]'].iloc[ind[0]].values[0]