# Extract & merge various interesting CME times from 
# SBO Catalog (Vourlidas and Wood 2018) of CME with our LASCO C2 times

Then compare with the LASCO C2 catalog times to get catalog of images we want to us

In [1]:
import pandas as pd

Need_To_Redownload = False

# path to the CSV files (by year) which contain Vourlidas&Wood SBO catalog
SBO_catalog_filename = '../sb_cmes_1996_2015.csv'

# name of the LASCO catalog 
LASCO_Catalog_Filename = '../lasco_img_catalog_lvl_1.txt'

# the number of files, sequentially, which we will difference
File_Seq_Size = 10

# output catalog
CME_Catalog_Filename = '../sbo_cme_event_catalog.csv'

## Load Data

In [2]:
# load SBO catalog into memory
sb_data = pd.read_csv(SBO_catalog_filename)
sb_data[:3]

Unnamed: 0,#,Date_Start,Time_Start,Date_End,Time_End,Date_C2,Time_C2,CPA,WIDTH,LSPEED,...,ACCEL,MASS,KENERGY,REMARKS,EVENT_NAME,FR,FE,QUAD,Unnamed: 21,Unnamed: 22
0,0.0,2/11/96,20:04,2/13/96,14:06,2/12/96,5:47,91,53,160.0,...,2.0,1300000000000000.0,1.6e+29,Poor Event,19960212.054726.p092g,,,E,,
1,1.0,4/2/96,23:08,4/6/96,8:45,4/3/96,14:30,257,36,-9999.0,...,-9999.0,-9999.0,-9999.0,Very Poor Event; Only C2; Unable to measure,19960403.143040.p255s,,,W,,
2,2.0,4/23/96,1:30,4/28/96,14:40,4/25/96,11:30,272,68,186.0,...,1.1,2100000000000000.0,3.7e+29,Very Poor Event; Only C3,19960425.113016.p284g,FR,,W,,


In [3]:
# pull the lasco catalog to local disk
if Need_To_Redownload:
    
    # download LASCO catalog now
    lasco_hdr_url = 'https://lasco-www.nrl.navy.mil/lz/level_1/img_hdr.txt'
    response = requests.get(lasco_hdr_url, allow_redirects=True)
    
    # cache locally
    open(LASCO_Catalog_Filename, 'wb').write(response.content)

In [4]:
lasco_data = pd.read_fwf(LASCO_Catalog_Filename, index=False,
                            names=['filename', 'date-obs', 'time-obs','telescope', 'expTime',
                                   'NumCols', 'NumRows', 'StartCol', 'StartRow', 'Filter', 
                                   'Polarizer','Observ.Seq', 'LEB Program', 'junk'])
#cme_data = cme_data.drop('Unnamed: 0', axis=1)

keepCols = ['filename', 'date-obs', 'time-obs', 'NumCols', 'NumRows', 'Filter', 'Polarizer', 'expTime', 'telescope']
lasco_catalog = lasco_data[keepCols]
lasco_catalog[:3]

Unnamed: 0,filename,date-obs,time-obs,NumCols,NumRows,Filter,Polarizer,expTime,telescope
0,35000706.fts,1996/01/11,22:54:31,128,128,Clear,Clear,3.8,C3
1,35000707.fts,1996/01/11,23:00:29,512,512,Clear,Clear,3.1,C3
2,35000708.fts,1996/01/11,23:15:34,512,256,Clear,Clear,15.1,C3


In [5]:
# Clean up LASCO
# Drop C1, EIT observations (this speeds up datetime calc below)
lasco_catalog = lasco_catalog[lasco_catalog['telescope'] != 'C1']
lasco_catalog = lasco_catalog[lasco_catalog['telescope'] != 'EIT']

In [6]:
from datetime import datetime, date, time, timezone, timedelta

# add column of python datetime for easier handling
datetime_col = []
for row in lasco_catalog.iterrows():
    date_obs = row[1]['date-obs']
    time_obs = row[1]['time-obs']
    
    date_obs = [int(t) for t in row[1]['date-obs'].split('/')]
    time_obs = [int(t) for t in row[1]['time-obs'].split(':')]
    
    datetime_col.append(datetime(year=date_obs[0], month=date_obs[1], day=date_obs[2], hour=time_obs[0], minute=time_obs[1], second=time_obs[2]))

lasco_catalog['datetime'] = datetime_col
lasco_catalog.index = lasco_catalog.datetime
lasco_catalog[:3]

Unnamed: 0_level_0,filename,date-obs,time-obs,NumCols,NumRows,Filter,Polarizer,expTime,telescope,datetime
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1996-01-11 22:54:31,35000706.fts,1996/01/11,22:54:31,128,128,Clear,Clear,3.8,C3,1996-01-11 22:54:31
1996-01-11 23:00:29,35000707.fts,1996/01/11,23:00:29,512,512,Clear,Clear,3.1,C3,1996-01-11 23:00:29
1996-01-11 23:15:34,35000708.fts,1996/01/11,23:15:34,512,256,Clear,Clear,15.1,C3,1996-01-11 23:15:34


In [7]:
# save processed
lasco_catalog.to_csv('../lasco_c2_c3_catalog.csv')

## Find matching rows between catalogs - C2 Telescope

In [8]:
# C2 data ONLY for now, limit number of rows, cols, filter, polarizer and exp times as well
#
C2_lasco_catalog = lasco_catalog[lasco_catalog['telescope']=='C2']
C2_lasco_catalog = C2_lasco_catalog[C2_lasco_catalog['Filter'].str.match('Orang')]
C2_lasco_catalog = C2_lasco_catalog[C2_lasco_catalog['Polarizer'].str.match('Clear')]
C2_lasco_catalog = C2_lasco_catalog[C2_lasco_catalog['NumRows'] == 1024]
C2_lasco_catalog = C2_lasco_catalog[C2_lasco_catalog['NumCols'] == 1024]
C2_lasco_catalog = C2_lasco_catalog[C2_lasco_catalog['expTime'] > 24.]
C2_lasco_catalog = C2_lasco_catalog[C2_lasco_catalog['expTime'] < 27.]

C2_lasco_catalog

Unnamed: 0_level_0,filename,date-obs,time-obs,NumCols,NumRows,Filter,Polarizer,expTime,telescope,datetime
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1996-01-28 17:18:10,25000254.fts,1996/01/28,17:18:10,1024,1024,Orang,Clear,25.0,C2,1996-01-28 17:18:10
1996-01-30 03:54:18,25000278.fts,1996/01/30,03:54:18,1024,1024,Orang,Clear,25.2,C2,1996-01-30 03:54:18
1996-01-30 04:38:27,25000279.fts,1996/01/30,04:38:27,1024,1024,Orang,Clear,25.0,C2,1996-01-30 04:38:27
1996-01-30 05:56:07,25000280.fts,1996/01/30,05:56:07,1024,1024,Orang,Clear,25.1,C2,1996-01-30 05:56:07
1996-01-30 07:13:53,25000281.fts,1996/01/30,07:13:53,1024,1024,Orang,Clear,25.1,C2,1996-01-30 07:13:53
...,...,...,...,...,...,...,...,...,...,...
2017-08-31 22:34:39,25649100.fts,2017/08/31,22:34:39,1024,1024,Orang,Clear,25.1,C2,2017-08-31 22:34:39
2017-08-31 23:10:42,25649101.fts,2017/08/31,23:10:42,1024,1024,Orang,Clear,25.1,C2,2017-08-31 23:10:42
2017-08-31 23:22:37,25649102.fts,2017/08/31,23:22:37,1024,1024,Orang,Clear,25.5,C2,2017-08-31 23:22:37
2017-08-31 23:34:50,25649103.fts,2017/08/31,23:34:50,1024,1024,Orang,Clear,25.1,C2,2017-08-31 23:34:50


In [9]:
def datetime_for_row(date_str, time_str):
    month, day, year = [int(i) for i in date_str.split('/')]
    hr, mn = [int(i) for i in time_str.split(':')]
    
    if year > 90:
        year += 1900
    else:
        year += 2000

    return datetime (year=year, month=month, day=day, hour=hr, minute=mn)

# step through SB catalog, try to find matching intervals in the LASCO catalog
c2_data = { i:[] for i in lasco_catalog.columns }
c2_data['CPA'] = []
c2_data['width'] = []
c2_data['type'] = []
for idx, sb_row in sb_data.iterrows():
    
    try:
        # find sb start datetime
        sb_start = datetime_for_row(sb_row['Date_Start'], sb_row['Time_Start'])
        sb_end = datetime_for_row(sb_row['Date_End'], sb_row['Time_End'])

        sb_start_str = str(sb_start)
        sb_end_str = str(sb_end)

        # find matching files
        matches = C2_lasco_catalog.loc[sb_start_str:sb_end_str][:File_Seq_Size]
        if len(matches) < File_Seq_Size:
            print ("skipping, too few files in sequence (need %s)" % File_Seq_Size)
            
        else:    
            # capture the metadata around this event, including the background     
            sb_bg_start = str(sb_start - timedelta(hours=3.))
            bgnd_matches = C2_lasco_catalog.loc[sb_bg_start:sb_start_str][-1:]
            # print (f'backgaound matches: %s' % len(bgnd_matches))
            for i, row in bgnd_matches.iterrows():
                for col in lasco_catalog.columns:
                    c2_data[col].append(row[col])
                c2_data['CPA'].append(int(sb_row['CPA']))
                c2_data['width'].append(int(sb_row['WIDTH']))
                c2_data['type'].append('bkgnd')
        
            for i, row in matches.iterrows():
                for col in lasco_catalog.columns:
                    c2_data[col].append(row[col])
                c2_data['CPA'].append(int(sb_row['CPA']))
                c2_data['width'].append(int(sb_row['WIDTH']))
                c2_data['type'].append('cme')
            
    except Exception as ex:
        print (f'Failed to parse/use row:{idx}')
    
sb_cme_c2_data = pd.DataFrame(c2_data)
sb_cme_c2_data

skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequence (need 10)
skipping, too few files in sequenc

Unnamed: 0,filename,date-obs,time-obs,NumCols,NumRows,Filter,Polarizer,expTime,telescope,datetime,CPA,width,type
0,25000477.fts,1996/02/11,17:20:42,1024,1024,Orang,Clear,25.1,C2,1996-02-11 17:20:42,91,53,bkgnd
1,25000478.fts,1996/02/11,20:04:38,1024,1024,Orang,Clear,25.1,C2,1996-02-11 20:04:38,91,53,cme
2,25000479.fts,1996/02/11,21:06:07,1024,1024,Orang,Clear,25.0,C2,1996-02-11 21:06:07,91,53,cme
3,25000480.fts,1996/02/11,22:20:57,1024,1024,Orang,Clear,25.1,C2,1996-02-11 22:20:57,91,53,cme
4,25000481.fts,1996/02/11,23:35:27,1024,1024,Orang,Clear,25.3,C2,1996-02-11 23:35:27,91,53,cme
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9693,25574989.fts,2015/12/22,23:57:43,1024,1024,Orang,Clear,26.2,C2,2015-12-22 23:57:43,181,130,cme
9694,25574990.fts,2015/12/23,00:09:43,1024,1024,Orang,Clear,25.1,C2,2015-12-23 00:09:43,181,130,cme
9695,25574991.fts,2015/12/23,00:21:42,1024,1024,Orang,Clear,26.2,C2,2015-12-23 00:21:42,181,130,cme
9696,25574992.fts,2015/12/23,00:33:43,1024,1024,Orang,Clear,25.1,C2,2015-12-23 00:33:43,181,130,cme


In [10]:
sb_cme_c2_data.to_csv('../sb_cme_c2_events.csv', index=None)

In [11]:
#sb_cme_c2_data[10:30]