# CDAW CME Catalog Merge To LASCO Catalog
This notebook is for creating a merged CME catalog (which contains filenames).

In [1]:
import requests
import pandas as pd

Need_To_Redownload = True
CDAW_Catalog_Filename = '../cdaw_catalog.txt'
LASCO_Catalog_Filename = '../lasco_img_catalog_lvl_1.txt'
CME_Catalog_Filename = '../cme_event_catlog.txt'

In [2]:
# pull the lasco catalog to local disk
if Need_To_Redownload:
    
    # download LASCO catalog now
    lasco_hdr_url = 'https://lasco-www.nrl.navy.mil/lz/img_hdr.txt'
    response = requests.get(lasco_hdr_url, allow_redirects=True)
    
    # cache locally
    open(LASCO_Catalog_Filename, 'wb').write(response.content)


In [3]:
cme_data = pd.read_csv(CME_Catalog_Filename)
cme_data = cme_data.rename(columns={'Unnamed: 0':'CDAW_Id'})
cme_data

Unnamed: 0,CDAW_Id,Date,Time,Central,Width,Linear,2nd,order,speed,Accel,Mass,Kinetic,MPA,Remarks,Duration_C2,Duration_C3,StartTime_C3_Delta
0,5,1996/01/31,06:52:13,274,47,158,219,99,0,-12.3*,2.5e+14*,3.2e+28*,272,Poor Event; Only 3 points,26582.278481,116518.987342,9746.835443
1,8,1996/02/12,05:47:26,91,53,160,100,211,236,2.0*,1.3e+15,1.6e+29,92,Poor Event,26250.000000,115062.500000,9625.000000
2,12,1996/03/02,04:11:53,88,36,108,95,120,177,1.0*,1.5e+14,8.7e+27,86,Poor Event,38888.888889,170462.962963,14259.259259
3,16,1996/03/07,07:51:56,91,43,60,60,61,75,0.1*,-------,-------,87,Poor Event,70000.000000,306833.333333,25666.666667
4,18,1996/03/09,04:27:49,84,39,93,102,82,0,-1.1*,-------,-------,82,Only 3 points,45161.290323,197956.989247,16559.139785
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7570,30300,2020/05/26,00:36:06,255,95,142,75,216,289,3.2*,-------,-------,251,Poor Event,29577.464789,129647.887324,10845.070423
7571,30301,2020/05/26,10:36:05,242,36,172,159,185,271,2.0*,-------,-------,262,Poor Event,24418.604651,107034.883721,8953.488372
7572,30303,2020/05/27,06:00:06,83,17,108,89,129,222,1.8*,-------,-------,89,Poor Event,38888.888889,170462.962963,14259.259259
7573,30309,2020/05/28,03:12:09,87,19,173,132,217,406,6.6*,-------,-------,87,Poor Event,24277.456647,106416.184971,8901.734104


In [4]:
# now work to merge in information from LASCO img catalog
lasco_catalog = pd.read_fwf(LASCO_Catalog_Filename, index=False,
                            names=['filename', 'date-obs', 'time-obs','telescope', 'expTime',
                                   'NumCols', 'NumRows', 'StartCol', 'StartRow', 'Filter', 
                                   'Polarizer','Observ.Seq', 'LEB Program', 'junk'])

In [5]:
lasco_catalog

Unnamed: 0,filename,date-obs,time-obs,telescope,expTime,NumCols,NumRows,StartCol,StartRow,Filter,Polarizer,Observ.Seq,LEB Program,junk
0,12000001.fts,1995/12/08,02:05:12,C1,0.0,512,512,20,1,Ca XV,Clear,Dark,0.0,1.0
1,12000002.fts,1995/12/08,02:12:28,C1,1.9,512,512,20,1,Ca XV,Clear,Dark,0.0,1.0
2,22000001.fts,1995/12/08,02:15:46,C2,0.0,512,512,20,1,Blue,Clear,Dark,0.0,1.0
3,22000002.fts,1995/12/08,02:19:24,C2,1.9,512,512,20,1,Blue,Clear,Dark,0.0,1.0
4,32000001.fts,1995/12/08,02:21:56,C3,0.0,512,512,20,1,Clear,Clear,Dark,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2147500,32633210.fts,2020/08/31,23:18:05,C3,17.6,1024,1024,20,1,Clear,Clear,Normal,0.0,0.0
2147501,22782448.fts,2020/08/31,23:24:05,C2,25.1,1024,1024,20,1,Orange,Clear,Normal,0.0,9.0
2147502,32633211.fts,2020/08/31,23:30:05,C3,17.6,1024,1024,20,1,Clear,Clear,Normal,0.0,0.0
2147503,22782449.fts,2020/08/31,23:36:05,C2,25.1,1024,1024,20,1,Orange,Clear,Normal,0.0,9.0


In [6]:
# Drop C1, EIT observations (this speeds up datetime calc below)
lasco_catalog = lasco_catalog[lasco_catalog['telescope'] != 'C1']
lasco_catalog = lasco_catalog[lasco_catalog['telescope'] != 'EIT']

#date_filt = lasco_catalog[lasco_catalog['date-obs']=='2020/05/28']
#date_filt[date_filt['time-obs']=='21:12:11']
len(lasco_catalog)

1416922

In [7]:
from datetime import datetime, date, time, timezone, timedelta

# add column of python datetime for easier handling
datetime_col = []
for row in lasco_catalog.iterrows():
    date_obs = row[1]['date-obs']
    time_obs = row[1]['time-obs']
    
    date_obs = [int(t) for t in row[1]['date-obs'].split('/')]
    time_obs = [int(t) for t in row[1]['time-obs'].split(':')]
    
    datetime_col.append(datetime(year=date_obs[0], month=date_obs[1], day=date_obs[2], hour=time_obs[0], minute=time_obs[1], second=time_obs[2]))

lasco_catalog['datetime'] = datetime_col
lasco_catalog

Unnamed: 0,filename,date-obs,time-obs,telescope,expTime,NumCols,NumRows,StartCol,StartRow,Filter,Polarizer,Observ.Seq,LEB Program,junk,datetime
2,22000001.fts,1995/12/08,02:15:46,C2,0.0,512,512,20,1,Blue,Clear,Dark,0.0,1.0,1995-12-08 02:15:46
3,22000002.fts,1995/12/08,02:19:24,C2,1.9,512,512,20,1,Blue,Clear,Dark,0.0,1.0,1995-12-08 02:19:24
4,32000001.fts,1995/12/08,02:21:56,C3,0.0,512,512,20,1,Clear,Clear,Dark,0.0,1.0,1995-12-08 02:21:56
5,32000002.fts,1995/12/08,02:25:57,C3,1.9,512,512,20,1,Clear,Clear,Dark,0.0,1.0,1995-12-08 02:25:57
7,22000003.fts,1995/12/08,21:43:48,C2,2.9,1024,1024,20,1,Lens,Clear,Lamp,0.0,0.0,1995-12-08 21:43:48
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2147500,32633210.fts,2020/08/31,23:18:05,C3,17.6,1024,1024,20,1,Clear,Clear,Normal,0.0,0.0,2020-08-31 23:18:05
2147501,22782448.fts,2020/08/31,23:24:05,C2,25.1,1024,1024,20,1,Orange,Clear,Normal,0.0,9.0,2020-08-31 23:24:05
2147502,32633211.fts,2020/08/31,23:30:05,C3,17.6,1024,1024,20,1,Clear,Clear,Normal,0.0,0.0,2020-08-31 23:30:05
2147503,22782449.fts,2020/08/31,23:36:05,C2,25.1,1024,1024,20,1,Orange,Clear,Normal,0.0,9.0,2020-08-31 23:36:05


In [8]:
#merge data, using CME events for lookup of filename.

cme_ids=[]
filenames=[]
filters = []
dates=[]
times=[]

lasco_keepCols = ['filename', 'telescope', 'NumCols', 'NumRows', 'Filter', 'Polarizer', 'datetime', 'expTime']
cdaw_keepCols = ['Remarks', 'Central', 'Width', 'Linear', 'CDAW_Id']

merged_data = {  col:[] for col in lasco_keepCols + cdaw_keepCols}

# split up catalog by C2 and C3
c2_lasco = lasco_catalog[lasco_catalog['telescope']=='C2']
c3_lasco = lasco_catalog[lasco_catalog['telescope']=='C3']

for event in cme_data.iterrows():
    
    # grab date-time-duration info for our event
    cme_start_date = [int(t) for t in event[1]['Date'].split('/')]
    cme_start_time = [int(t) for t in event[1]['Time'].split(':')]
    cme_c3_delta = timedelta(seconds=event[1]['StartTime_C3_Delta'])
    cme_c2_duration = timedelta(seconds=float(event[1]['Duration_C2']))
    cme_c3_duration = timedelta(seconds=float(event[1]['Duration_C3']))
    
    # convert to Python datetime for easier slicing
    cme_date = date (year=cme_start_date[0], month=cme_start_date[1], day=cme_start_date[2])
    cme_time = time(hour=cme_start_time[0],minute=cme_start_time[1],second=cme_start_time[2])
    
    c2_start = datetime.combine(cme_date,cme_time)
    c2_end   = c2_start + cme_c2_duration
    
    c3_start = c2_start + cme_c3_delta
    c3_end   = c3_start + cme_c3_duration
    
    # get C2 entries
    c2_slice = c2_lasco[c2_lasco['datetime']>c2_start]
    for lasco_row in c2_slice[c2_slice['datetime']<=c2_end].iterrows():
        
        for col in lasco_keepCols:
            merged_data[col].append(lasco_row[1][col])
          
        for col in cdaw_keepCols:
            merged_data[col].append(event[1][col])
                
    # get C3 entries
    c3_slice = c3_lasco[c3_lasco['datetime']>c3_start]
    for lasco_row in c3_slice[c3_slice['datetime']<=c3_end].iterrows():
        
        for col in lasco_keepCols:
            merged_data[col].append(lasco_row[1][col])
          
        for col in cdaw_keepCols:
            merged_data[col].append(event[1][col])
                

# convert results to DataFrame
result = pd.DataFrame(merged_data)
result

Unnamed: 0,filename,telescope,NumCols,NumRows,Filter,Polarizer,datetime,expTime,Remarks,Central,Width,Linear,CDAW_Id
0,22000294.fts,C2,1024,1024,Orange,Clear,1996-01-31 08:01:38,25.4,Poor Event; Only 3 points,274,47,158,5
1,22000295.fts,C2,1024,1024,Orange,Clear,1996-01-31 09:11:14,25.1,Poor Event; Only 3 points,274,47,158,5
2,22000296.fts,C2,1024,1024,Orange,Clear,1996-01-31 10:20:39,25.1,Poor Event; Only 3 points,274,47,158,5
3,22000297.fts,C2,1024,1024,Orange,Clear,1996-01-31 11:30:19,25.9,Poor Event; Only 3 points,274,47,158,5
4,22000298.fts,C2,1024,1024,Orange,Clear,1996-01-31 12:40:17,25.1,Poor Event; Only 3 points,274,47,158,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
568527,32623406.fts,C3,1024,1024,Clear,Clear,2020-05-30 01:54:06,17.6,Poor Event,260,23,187,30311
568528,32623407.fts,C3,1024,1024,Clear,Clear,2020-05-30 02:06:05,18.7,Poor Event,260,23,187,30311
568529,32623408.fts,C3,1024,1024,Clear,Clear,2020-05-30 02:18:05,17.5,Poor Event,260,23,187,30311
568530,32623409.fts,C3,1024,1024,Clear,Clear,2020-05-30 02:30:06,17.6,Poor Event,260,23,187,30311


In [9]:
result.to_csv('../merged_cdaw_lasco_catalog.csv')

In [10]:
# test it out, try to pull a few intervals for events
C2_data = result[result['telescope']=='C2']
C2_data

Unnamed: 0,filename,telescope,NumCols,NumRows,Filter,Polarizer,datetime,expTime,Remarks,Central,Width,Linear,CDAW_Id
0,22000294.fts,C2,1024,1024,Orange,Clear,1996-01-31 08:01:38,25.4,Poor Event; Only 3 points,274,47,158,5
1,22000295.fts,C2,1024,1024,Orange,Clear,1996-01-31 09:11:14,25.1,Poor Event; Only 3 points,274,47,158,5
2,22000296.fts,C2,1024,1024,Orange,Clear,1996-01-31 10:20:39,25.1,Poor Event; Only 3 points,274,47,158,5
3,22000297.fts,C2,1024,1024,Orange,Clear,1996-01-31 11:30:19,25.9,Poor Event; Only 3 points,274,47,158,5
4,22000298.fts,C2,1024,1024,Orange,Clear,1996-01-31 12:40:17,25.1,Poor Event; Only 3 points,274,47,158,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
568405,22770427.fts,C2,512,512,Orange,-60 D,2020-05-29 02:54:09,0.1,Poor Event,260,23,187,30311
568406,22770428.fts,C2,512,512,Orange,0 Deg,2020-05-29 02:58:01,0.1,Poor Event,260,23,187,30311
568407,22770429.fts,C2,512,512,Orange,+60 D,2020-05-29 03:01:52,0.1,Poor Event,260,23,187,30311
568408,22770430.fts,C2,1024,1024,Orange,Clear,2020-05-29 03:12:09,25.2,Poor Event,260,23,187,30311
