# Create CDAW CME Merge To LASCO Catalog
This notebook is for creating a merged CME/LASCO catalogs of both CME events and time intervals when there are no CMEs (depends on telescope). Requires content from 'build_cme_catalog' notebook.

In [1]:
import requests
import pandas as pd

Need_To_Redownload = False

#inputs
LASCO_Catalog_Filename = '../lasco_img_catalog_lvl_1.txt'
CME_Catalog_Filename = '../cme_event_catalog.csv'
Raw_CME_Catalog_Filename = '../raw_cme_event_catalog.csv'

# the outputs
Merged_Catalog_Filename = '../merged_cdaw_lasco_catalog.csv'

# sampling of non-events
C2_non_CME_Catalog_Filename = '../c2_non_events.csv'
C3_non_CME_Catalog_Filename = '../c3_non_events.csv'

In [2]:
# pull the lasco catalog to local disk
if Need_To_Redownload:
    
    # download LASCO catalog now
    lasco_hdr_url = 'https://lasco-www.nrl.navy.mil/lz/level_1/img_hdr.txt'
    response = requests.get(lasco_hdr_url, allow_redirects=True)
    
    # cache locally
    open(LASCO_Catalog_Filename, 'wb').write(response.content)


In [3]:
cme_data = pd.read_csv(CME_Catalog_Filename)
#cme_data = cme_data.rename(columns={'Unnamed: 0':'CDAW_Id'})
#cme_data
raw_cme_data = pd.read_csv(Raw_CME_Catalog_Filename)

len(cme_data), len(raw_cme_data)

(11098, 31628)

In [4]:
# now work to merge in information from LASCO img catalog
lasco_catalog = pd.read_fwf(LASCO_Catalog_Filename, index=False,
                            names=['filename', 'date-obs', 'time-obs','telescope', 'expTime',
                                   'NumCols', 'NumRows', 'StartCol', 'StartRow', 'Filter', 
                                   'Polarizer','Observ.Seq', 'LEB Program', 'junk'])
#lasco_catalog

In [5]:
# Drop C1, EIT observations (this speeds up datetime calc below)
lasco_catalog = lasco_catalog[lasco_catalog['telescope'] != 'C1']
lasco_catalog = lasco_catalog[lasco_catalog['telescope'] != 'EIT']

#date_filt = lasco_catalog[lasco_catalog['date-obs']=='2020/05/28']
#date_filt[date_filt['time-obs']=='21:12:11']
len(lasco_catalog)

913208

In [6]:
from datetime import datetime, date, time, timezone, timedelta

def add_datetime_to_catalog(catalog:pd.DataFrame)->pd.DataFrame:
    
    # add column of python datetime for easier handling
    datetime_col = []
    for row in catalog.iterrows():
        date_obs = row[1]['date-obs']
        time_obs = row[1]['time-obs']

        date_obs = [int(t) for t in row[1]['date-obs'].split('/')]
        time_obs = [int(t) for t in row[1]['time-obs'].split(':')]

        datetime_col.append(datetime(year=date_obs[0], month=date_obs[1], day=date_obs[2], hour=time_obs[0], minute=time_obs[1], second=time_obs[2]))

    catalog['datetime'] = datetime_col
    catalog.index = lasco_catalog.datetime
    return catalog

lasco_catalog = add_datetime_to_catalog(lasco_catalog)

In [7]:
# Do the merge of catalogs, using CME events for lookup of filename in LASCO.
# We are not just getting CME images, howerver. For purposes of background subtraction we need to 
# capture the image before first CME event image

def merge (lasco_catalog:pd.DataFrame, cme_data:pd.DataFrame)->pd.DataFrame:
    
    cme_ids=[]
    filenames=[]
    filters = []
    dates=[]
    times=[]

    lasco_keepCols = ['filename', 'telescope', 'NumCols', 'NumRows', 'Filter', 'Polarizer', 'datetime', 'expTime', 'date-obs']
    cdaw_keepCols = ['Remarks', 'Central', 'Width', 'Linear', 'CDAW_Id']
    bkgnd_keepCols = ['filename', 'NumCols', 'NumRows', 'Filter', 'datetime', 'expTime', 'Polarizer']
    bckgnd_fileinfo = [ 'bkgnd_'+colname for colname in bkgnd_keepCols ]

    merged_data = {  col:[] for col in lasco_keepCols + cdaw_keepCols + bckgnd_fileinfo }
    
    # split up catalog by C2 and C3
    c2_lasco = lasco_catalog[lasco_catalog['telescope']=='C2']
    c3_lasco = lasco_catalog[lasco_catalog['telescope']=='C3']

    for event in cme_data.iterrows():

        # grab date-time-duration info for our event
        cme_start_date = [int(t) for t in event[1]['Date'].split('/')]
        cme_start_time = [int(t) for t in event[1]['Time'].split(':')]
        cme_c3_delta = timedelta(seconds=event[1]['StartTime_C3_Delta'])
        cme_c2_duration = timedelta(seconds=float(event[1]['Duration_C2']))
        cme_c3_duration = timedelta(seconds=float(event[1]['Duration_C3']))

        # convert to Python datetime for easier slicing
        cme_date = date (year=cme_start_date[0], month=cme_start_date[1], day=cme_start_date[2])
        cme_time = time(hour=cme_start_time[0],minute=cme_start_time[1],second=cme_start_time[2])

        c2_start = datetime.combine(cme_date, cme_time)
        c2_end   = c2_start + cme_c2_duration

        c3_start = c2_start + cme_c3_delta
        c3_end   = c3_start + cme_c3_duration

        # get C2 entries

        # capture prior image, store with cme image id
        previous_c2_image = c2_lasco[c2_lasco['datetime']<c2_start].iloc[-1]

        # and a slice of all of the images which follow
        c2_slice = c2_lasco[c2_lasco['datetime']>c2_start]
        for lasco_row in c2_slice[c2_slice['datetime']<=c2_end].iterrows():

            for col in lasco_keepCols:
                merged_data[col].append(lasco_row[1][col])

            for col in cdaw_keepCols:
                merged_data[col].append(event[1][col])

            #  add in background file to use
            for col in bkgnd_keepCols: 
                merged_data['bkgnd_'+col].append(previous_c2_image[col])


        # get C3 entries

        # capture prior image, store with cme image id
        previous_c3_image = c3_lasco[c3_lasco['datetime']<c3_start].iloc[-1]

        c3_slice = c3_lasco[c3_lasco['datetime']>c3_start]
        for lasco_row in c3_slice[c3_slice['datetime']<=c3_end].iterrows():

            for col in lasco_keepCols:
                merged_data[col].append(lasco_row[1][col])

            for col in cdaw_keepCols:
                merged_data[col].append(event[1][col])

            #  add in background file to use
            for col in bkgnd_keepCols: 
                merged_data['bkgnd_'+col].append(previous_c3_image[col])

    # convert results to DataFrame
    result = pd.DataFrame(merged_data)
    return result


In [8]:
# calculation for positive events
result = merge(lasco_catalog, cme_data)
result

Unnamed: 0,filename,telescope,NumCols,NumRows,Filter,Polarizer,datetime,expTime,date-obs,Remarks,...,Width,Linear,CDAW_Id,bkgnd_filename,bkgnd_NumCols,bkgnd_NumRows,bkgnd_Filter,bkgnd_datetime,bkgnd_expTime,bkgnd_Polarizer
0,25000294.fts,C2,1024,1024,Orang,Clear,1996-01-31 08:01:38,25.4,1996/01/31,Poor Event; Only 3 points\n,...,47,158,5,25000292.fts,1024,1024,Orang,1996-01-31 05:42:43,25.1,Clear
1,25000295.fts,C2,1024,1024,Orang,Clear,1996-01-31 09:11:14,25.0,1996/01/31,Poor Event; Only 3 points\n,...,47,158,5,25000292.fts,1024,1024,Orang,1996-01-31 05:42:43,25.1,Clear
2,25000296.fts,C2,1024,1024,Orang,Clear,1996-01-31 10:20:39,25.0,1996/01/31,Poor Event; Only 3 points\n,...,47,158,5,25000292.fts,1024,1024,Orang,1996-01-31 05:42:43,25.1,Clear
3,25000297.fts,C2,1024,1024,Orang,Clear,1996-01-31 11:30:19,25.9,1996/01/31,Poor Event; Only 3 points\n,...,47,158,5,25000292.fts,1024,1024,Orang,1996-01-31 05:42:43,25.1,Clear
4,25000298.fts,C2,1024,1024,Orang,Clear,1996-01-31 12:40:17,25.1,1996/01/31,Poor Event; Only 3 points\n,...,47,158,5,25000292.fts,1024,1024,Orang,1996-01-31 05:42:43,25.1,Clear
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593204,35513585.fts,C3,1024,1024,Clear,Clear,2017-08-31 23:04:45,17.6,2017/08/31,Poor Event\n,...,54,274,28858,35513550.fts,1024,1024,Clear,2017-08-31 16:17:54,17.6,Clear
593205,35513586.fts,C3,1024,1024,Clear,Clear,2017-08-31 23:16:36,17.5,2017/08/31,Poor Event\n,...,54,274,28858,35513550.fts,1024,1024,Clear,2017-08-31 16:17:54,17.6,Clear
593206,35513587.fts,C3,1024,1024,Clear,Clear,2017-08-31 23:28:54,17.9,2017/08/31,Poor Event\n,...,54,274,28858,35513550.fts,1024,1024,Clear,2017-08-31 16:17:54,17.6,Clear
593207,35513588.fts,C3,1024,1024,Clear,Clear,2017-08-31 23:40:50,17.6,2017/08/31,Poor Event\n,...,54,274,28858,35513550.fts,1024,1024,Clear,2017-08-31 16:17:54,17.6,Clear


In [9]:
# save the merged catalog (positive events)
result.to_csv(Merged_Catalog_Filename)

In [10]:
# test it out, try to pull a few intervals for events
C2_data = result[result['telescope']=='C2']
C2_data[:10]

Unnamed: 0,filename,telescope,NumCols,NumRows,Filter,Polarizer,datetime,expTime,date-obs,Remarks,...,Width,Linear,CDAW_Id,bkgnd_filename,bkgnd_NumCols,bkgnd_NumRows,bkgnd_Filter,bkgnd_datetime,bkgnd_expTime,bkgnd_Polarizer
0,25000294.fts,C2,1024,1024,Orang,Clear,1996-01-31 08:01:38,25.4,1996/01/31,Poor Event; Only 3 points\n,...,47,158,5,25000292.fts,1024,1024,Orang,1996-01-31 05:42:43,25.1,Clear
1,25000295.fts,C2,1024,1024,Orang,Clear,1996-01-31 09:11:14,25.0,1996/01/31,Poor Event; Only 3 points\n,...,47,158,5,25000292.fts,1024,1024,Orang,1996-01-31 05:42:43,25.1,Clear
2,25000296.fts,C2,1024,1024,Orang,Clear,1996-01-31 10:20:39,25.0,1996/01/31,Poor Event; Only 3 points\n,...,47,158,5,25000292.fts,1024,1024,Orang,1996-01-31 05:42:43,25.1,Clear
3,25000297.fts,C2,1024,1024,Orang,Clear,1996-01-31 11:30:19,25.9,1996/01/31,Poor Event; Only 3 points\n,...,47,158,5,25000292.fts,1024,1024,Orang,1996-01-31 05:42:43,25.1,Clear
4,25000298.fts,C2,1024,1024,Orang,Clear,1996-01-31 12:40:17,25.1,1996/01/31,Poor Event; Only 3 points\n,...,47,158,5,25000292.fts,1024,1024,Orang,1996-01-31 05:42:43,25.1,Clear
5,25000299.fts,C2,1024,1024,Orang,Clear,1996-01-31 13:50:01,25.1,1996/01/31,Poor Event; Only 3 points\n,...,47,158,5,25000292.fts,1024,1024,Orang,1996-01-31 05:42:43,25.1,Clear
25,25000487.fts,C2,1024,1024,Orang,Clear,1996-02-12 07:01:57,25.1,1996/02/12,Poor Event\n,...,53,160,8,25000485.fts,1024,1024,Orang,1996-02-12 04:32:50,25.1,Clear
26,25000488.fts,C2,1024,1024,Orang,Clear,1996-02-12 08:16:27,25.0,1996/02/12,Poor Event\n,...,53,160,8,25000485.fts,1024,1024,Orang,1996-02-12 04:32:50,25.1,Clear
27,25000489.fts,C2,1024,1024,Orang,Clear,1996-02-12 09:30:48,25.1,1996/02/12,Poor Event\n,...,53,160,8,25000485.fts,1024,1024,Orang,1996-02-12 04:32:50,25.1,Clear
28,25000490.fts,C2,1024,1024,Orang,Clear,1996-02-12 10:45:46,25.1,1996/02/12,Poor Event\n,...,53,160,8,25000485.fts,1024,1024,Orang,1996-02-12 04:32:50,25.1,Clear


In [11]:
#c2_lasco

# now build out the catalog of NON-CME times

In [12]:
# now build out the catalog of NON-CME times
keepCols = ['filename', 'datetime', 'date-obs', 'NumCols', 'NumRows', 'Filter', 'Polarizer', 'expTime', 'telescope']

# split up catalog by C2 and C3
c2_lasco = lasco_catalog[lasco_catalog['telescope']=='C2']
c3_lasco = lasco_catalog[lasco_catalog['telescope']=='C3']
    
c2_intervals = c2_lasco[keepCols]
c2_intervals.set_index('datetime')

c3_intervals = c3_lasco[keepCols]
c3_intervals.set_index('datetime')

c2_intervals

Unnamed: 0_level_0,filename,datetime,date-obs,NumCols,NumRows,Filter,Polarizer,expTime,telescope
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1996-01-23 17:47:06,25000161.fts,1996-01-23 17:47:06,1996/01/23,512,512,Orang,Clear,74.0,C2
1996-01-23 20:01:08,25000174.fts,1996-01-23 20:01:08,1996/01/23,512,512,Orang,Clear,3.8,C2
1996-01-23 20:06:58,25000175.fts,1996-01-23 20:06:58,1996/01/23,512,512,Orang,Clear,3.8,C2
1996-01-23 21:36:17,25000185.fts,1996-01-23 21:36:17,1996/01/23,512,512,Orang,Clear,3.1,C2
1996-01-24 15:43:29,25000186.fts,1996-01-24 15:43:29,1996/01/24,1024,1024,Orang,Clear,17.1,C2
...,...,...,...,...,...,...,...,...,...
2017-08-31 22:34:39,25649100.fts,2017-08-31 22:34:39,2017/08/31,1024,1024,Orang,Clear,25.1,C2
2017-08-31 23:10:42,25649101.fts,2017-08-31 23:10:42,2017/08/31,1024,1024,Orang,Clear,25.1,C2
2017-08-31 23:22:37,25649102.fts,2017-08-31 23:22:37,2017/08/31,1024,1024,Orang,Clear,25.5,C2
2017-08-31 23:34:50,25649103.fts,2017-08-31 23:34:50,2017/08/31,1024,1024,Orang,Clear,25.1,C2


In [28]:
#Check data on 7-10-2000 (should not be any after ~04 UT)
foo =  raw_cme_data
foo = [foo[foo['Date']== '2000/07/10']]
foo

[      CDAW_Id        Date      Time Central  Width Linear   2nd order speed  \
 3230     3230  2000/07/10  02:26:05     285     12    610   597   624   703   
 3231     3231  2000/07/10  04:50:05      99     59    623   496   756   923   
 3232     3232  2000/07/10  13:27:29     314     43    327   228   425   718   
 3233     3233  2000/07/10  17:50:05      16     31    616   798   422     0   
 3234     3234  2000/07/10  20:26:05      24     72    354   211   492   996   
 3235     3235  2000/07/10  20:50:05     264     49    426   379   472   472   
 3236     3236  2000/07/10  21:50:06      67    289   1352  1168  1538  1456   
 3237     3237  2000/07/10  23:50:05     236     18    390   371   409   434   
 
       Accel  ...                           Remarks width  linear  mpa  \
 3230   5.9*  ...                               NaN    12   610.0  283   
 3231   25.5  ...  Difficult to measure the width\n    59   623.0   99   
 3232  19.6*  ...                               NaN    4

In [14]:
cme_data[:1]

Unnamed: 0,CDAW_Id,Date,Time,Central,Width,Linear,2nd,order,speed,Accel,...,Remarks,width,linear,mpa,mass,kinetic,accel,Duration_C2,Duration_C3,StartTime_C3_Delta
0,5,1996/01/31,06:52:13,274,47,158,219,99,0,-12.3*,...,Poor Event; Only 3 points\n,47,158,272,250000000000000.0,3.2e+28,-12.3,26582.278481,116518.987342,9746.835443


In [15]:
# step through CDAW catalog and calculate when there is NOT an event

# Because calculated durations are only ballpark, multiply by a factor to create more duration to CME events
# to better insure we dont pick up the beginning of a CME
extra_duration_factor = 1.5
    
c2_non_events = { k:[] for k in c2_intervals.columns }
c3_non_events = { k:[] for k in c3_intervals.columns }

sformat = "%Y-%m-%d %H:%M:%S"
    
last_c2_end = None
last_c3_end = None

count = 0
# we need to use the *raw* cme data (e.g. data where we didnt filter out poor events so that we
# insure that we dont mark intervals which actually have cme)
for event in raw_cme_data.iterrows():

    # grab date-time-duration info for our event
    cme_start_date = [int(t) for t in event[1]['Date'].split('/')]
    cme_start_time = [int(t) for t in event[1]['Time'].split(':')]
    
    # check if we have poor data, usually shows up as a very small linear speed
    # if its extremely low, lets skip this event
    speed = event[1]['linear']
    if speed > 0.000000001:
        cme_c3_delta = timedelta(seconds=float(event[1]['StartTime_C3_Delta']))
    
        cme_c2_duration = timedelta(seconds=float(event[1]['Duration_C2']) * extra_duration_factor)
        cme_c3_duration = timedelta(seconds=float(event[1]['Duration_C3']) * extra_duration_factor)

        # convert to Python datetime for easier slicing
        cme_date = date (year=cme_start_date[0], month=cme_start_date[1], day=cme_start_date[2])
        cme_time = time(hour=cme_start_time[0],minute=cme_start_time[1],second=cme_start_time[2])

        c2_start = datetime.combine(cme_date,cme_time)
        c2_end   = c2_start + cme_c2_duration

        c3_start = c2_start + cme_c3_delta
        c3_end   = c3_start + cme_c3_duration

        # we have free interval for C2, C3 based on prior event end and 
        # the start of the new event
        if last_c2_end != None:
            for c2_ne in c2_intervals.loc[last_c2_end.strftime(sformat):c2_start.strftime(sformat)].iterrows():
                for col in c2_intervals.columns:
                    c2_non_events[col].append(c2_ne[1][col])

        if last_c3_end != None:
            for c3_ne in c3_intervals.loc[last_c3_end.strftime(sformat):c3_start.strftime(sformat)].iterrows():
                for col in c3_intervals.columns:
                    c3_non_events[col].append(c3_ne[1][col])

        last_c2_end = c2_end
        last_c3_end = c3_end
        
    else:
        # we had a poor event. Its better to just 
        # restart trying to find a new interval, beginning with the next event
        last_c2_end = None
        last_c3_end = None


In [35]:
# check for data on July 12, 2000 (there should be none)
foo = c2_intervals[c2_intervals['datetime']>'2000-07-06']
bar = foo[foo['datetime']<'2000-07-15']
bar

Unnamed: 0_level_0,filename,datetime,date-obs,NumCols,NumRows,Filter,Polarizer,expTime,telescope
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2000-07-06 00:05:20,25071924.fts,2000-07-06 00:05:20,2000/07/06,1024,1024,Orang,Clear,25.5,C2
2000-07-06 00:25:28,25071925.fts,2000-07-06 00:25:28,2000/07/06,1024,1024,Orang,Clear,25.1,C2
2000-07-06 00:49:20,25071926.fts,2000-07-06 00:49:20,2000/07/06,1024,1024,Orang,Clear,25.1,C2
2000-07-06 01:26:40,25071927.fts,2000-07-06 01:26:40,2000/07/06,1024,1024,Orang,Clear,25.1,C2
2000-07-06 01:49:29,25071928.fts,2000-07-06 01:49:29,2000/07/06,1024,1024,Orang,Clear,25.1,C2
...,...,...,...,...,...,...,...,...,...
2000-07-14 21:33:26,25072522.fts,2000-07-14 21:33:26,2000/07/14,1024,1024,Orang,Clear,25.6,C2
2000-07-14 22:00:57,25072523.fts,2000-07-14 22:00:57,2000/07/14,1024,1024,Orang,Clear,12.9,C2
2000-07-14 22:51:38,25072524.fts,2000-07-14 22:51:38,2000/07/14,1024,1024,Orang,Clear,12.2,C2
2000-07-14 23:05:55,25072525.fts,2000-07-14 23:05:55,2000/07/14,1024,1024,Orang,Clear,24.3,C2


In [17]:
# check for data on July 14
foo = c2_intervals[c2_intervals['datetime']>'2000-07-04']
bar = foo[foo['datetime'] < '2000-07-31']
bar[:30]

Unnamed: 0_level_0,filename,datetime,date-obs,NumCols,NumRows,Filter,Polarizer,expTime,telescope
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2000-07-04 00:05:53,25071787.fts,2000-07-04 00:05:53,2000/07/04,1024,1024,Orang,Clear,25.1,C2
2000-07-04 00:25:59,25071788.fts,2000-07-04 00:25:59,2000/07/04,1024,1024,Orang,Clear,25.1,C2
2000-07-04 00:49:52,25071789.fts,2000-07-04 00:49:52,2000/07/04,1024,1024,Orang,Clear,25.1,C2
2000-07-04 01:27:10,25071790.fts,2000-07-04 01:27:10,2000/07/04,1024,1024,Orang,Clear,25.1,C2
2000-07-04 01:49:57,25071791.fts,2000-07-04 01:49:57,2000/07/04,1024,1024,Orang,Clear,25.1,C2
2000-07-04 02:05:51,25071792.fts,2000-07-04 02:05:51,2000/07/04,1024,1024,Orang,Clear,25.1,C2
2000-07-04 02:25:58,25071793.fts,2000-07-04 02:25:58,2000/07/04,1024,1024,Orang,Clear,25.1,C2
2000-07-04 02:49:52,25071794.fts,2000-07-04 02:49:52,2000/07/04,1024,1024,Orang,Clear,25.1,C2
2000-07-04 03:05:57,25071795.fts,2000-07-04 03:05:57,2000/07/04,1024,1024,Orang,Clear,25.1,C2
2000-07-04 03:25:50,25071796.fts,2000-07-04 03:25:50,2000/07/04,1024,1024,Orang,Clear,25.1,C2


In [18]:
# c2_intervals.loc['1996-01-31 00:00:00':'2000-02-12 00:00:00']

In [19]:
c2_non_events = pd.DataFrame(c2_non_events)
c2_non_events

Unnamed: 0,filename,datetime,date-obs,NumCols,NumRows,Filter,Polarizer,expTime,telescope
0,25000161.fts,1996-01-23 17:47:06,1996/01/23,512,512,Orang,Clear,74.0,C2
1,25000174.fts,1996-01-23 20:01:08,1996/01/23,512,512,Orang,Clear,3.8,C2
2,25000175.fts,1996-01-23 20:06:58,1996/01/23,512,512,Orang,Clear,3.8,C2
3,25000185.fts,1996-01-23 21:36:17,1996/01/23,512,512,Orang,Clear,3.1,C2
4,25000186.fts,1996-01-24 15:43:29,1996/01/24,1024,1024,Orang,Clear,17.1,C2
...,...,...,...,...,...,...,...,...,...
218254,25649100.fts,2017-08-31 22:34:39,2017/08/31,1024,1024,Orang,Clear,25.1,C2
218255,25649101.fts,2017-08-31 23:10:42,2017/08/31,1024,1024,Orang,Clear,25.1,C2
218256,25649102.fts,2017-08-31 23:22:37,2017/08/31,1024,1024,Orang,Clear,25.5,C2
218257,25649103.fts,2017-08-31 23:34:50,2017/08/31,1024,1024,Orang,Clear,25.1,C2


In [37]:
# check for july-10, july-12
foo = c2_non_events
foo = foo[foo['datetime']>'2000-07-09']
bar = foo[foo['datetime']<'2000-07-11']
bar

Unnamed: 0,filename,datetime,date-obs,NumCols,NumRows,Filter,Polarizer,expTime,telescope
35117,25072163.fts,2000-07-09 13:48:45,2000/07/09,1024,1024,Orang,Clear,25.1,C2
35118,25072164.fts,2000-07-09 14:04:54,2000/07/09,1024,1024,Orang,Clear,25.2,C2
35119,25072165.fts,2000-07-09 14:24:46,2000/07/09,1024,1024,Orang,Clear,25.0,C2
35120,25072166.fts,2000-07-09 14:48:53,2000/07/09,1024,1024,Orang,Clear,25.1,C2
35121,25072167.fts,2000-07-09 15:04:45,2000/07/09,1024,1024,Orang,Clear,26.6,C2
35122,25072168.fts,2000-07-09 15:24:52,2000/07/09,1024,1024,Orang,Clear,25.7,C2
35123,25072169.fts,2000-07-09 15:48:44,2000/07/09,1024,1024,Orang,Clear,25.3,C2
35124,25072170.fts,2000-07-09 16:04:50,2000/07/09,1024,1024,Orang,Clear,25.4,C2
35125,25072171.fts,2000-07-09 16:24:54,2000/07/09,1024,1024,Orang,Clear,25.1,C2
35126,25072172.fts,2000-07-09 16:48:48,2000/07/09,1024,1024,Orang,Clear,25.0,C2


In [20]:
c3_non_events = pd.DataFrame(c3_non_events)
c3_non_events

Unnamed: 0,filename,datetime,date-obs,NumCols,NumRows,Filter,Polarizer,expTime,telescope
0,35000706.fts,1996-01-11 22:54:31,1996/01/11,128,128,Clear,Clear,3.8,C3
1,35000707.fts,1996-01-11 23:00:29,1996/01/11,512,512,Clear,Clear,3.1,C3
2,35000708.fts,1996-01-11 23:15:34,1996/01/11,512,256,Clear,Clear,15.1,C3
3,35002640.fts,1996-01-16 00:58:18,1996/01/16,1024,1024,Clear,Clear,19.1,C3
4,35002645.fts,1996-01-16 02:10:12,1996/01/16,1024,1024,Clear,Clear,19.1,C3
...,...,...,...,...,...,...,...,...,...
26044,35512902.fts,2017-08-25 22:17:48,2017/08/25,1024,1024,Clear,Clear,17.6,C3
26045,35512903.fts,2017-08-25 22:29:47,2017/08/25,1024,1024,Clear,Clear,17.6,C3
26046,35512908.fts,2017-08-25 23:05:57,2017/08/25,1024,1024,Clear,Clear,17.6,C3
26047,35512909.fts,2017-08-25 23:17:47,2017/08/25,1024,1024,Clear,Clear,17.8,C3


In [21]:
# save it 
c2_non_events.to_csv(C2_non_CME_Catalog_Filename, index=False)
c3_non_events.to_csv(C3_non_CME_Catalog_Filename, index=False)

In [22]:
event_catalog  = result
test_time = event_catalog['datetime'][1]
test_time

Timestamp('1996-01-31 09:11:14')

In [23]:
len(event_catalog), len(c2_intervals)

(593209, 554838)

In [24]:
# build out a filtered catlog so that we match only stuff we can accept
# for differencing later on (e.g. same params as before)
c2_a = c2_intervals[c2_intervals['expTime']>24.]
c2_b = c2_a[c2_a['expTime']<27.]
c2_c = c2_b[c2_b['Filter'].str.match('Orang')]
c2_d = c2_c[c2_c['NumCols'] == 1024]
c2_filt_cat = c2_d[c2_d['NumRows'] == 1024]

c2_filt_cat

Unnamed: 0_level_0,filename,datetime,date-obs,NumCols,NumRows,Filter,Polarizer,expTime,telescope
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1996-01-28 17:18:10,25000254.fts,1996-01-28 17:18:10,1996/01/28,1024,1024,Orang,Clear,25.0,C2
1996-01-30 03:54:18,25000278.fts,1996-01-30 03:54:18,1996/01/30,1024,1024,Orang,Clear,25.2,C2
1996-01-30 04:38:27,25000279.fts,1996-01-30 04:38:27,1996/01/30,1024,1024,Orang,Clear,25.0,C2
1996-01-30 05:56:07,25000280.fts,1996-01-30 05:56:07,1996/01/30,1024,1024,Orang,Clear,25.1,C2
1996-01-30 07:13:53,25000281.fts,1996-01-30 07:13:53,1996/01/30,1024,1024,Orang,Clear,25.1,C2
...,...,...,...,...,...,...,...,...,...
2017-08-31 22:34:39,25649100.fts,2017-08-31 22:34:39,2017/08/31,1024,1024,Orang,Clear,25.1,C2
2017-08-31 23:10:42,25649101.fts,2017-08-31 23:10:42,2017/08/31,1024,1024,Orang,Clear,25.1,C2
2017-08-31 23:22:37,25649102.fts,2017-08-31 23:22:37,2017/08/31,1024,1024,Orang,Clear,25.5,C2
2017-08-31 23:34:50,25649103.fts,2017-08-31 23:34:50,2017/08/31,1024,1024,Orang,Clear,25.1,C2


In [25]:
d = c2_filt_cat[c2_filt_cat['datetime']<test_time]
foo = d.iloc[0]
type(foo), foo

(pandas.core.series.Series,
 filename            25000254.fts
 datetime     1996-01-28 17:18:10
 date-obs              1996/01/28
 NumCols                     1024
 NumRows                     1024
 Filter                     Orang
 Polarizer                  Clear
 expTime                     25.0
 telescope                     C2
 Name: 1996-01-28 17:18:10, dtype: object)