# Prepare Montserrat CSV catalog for AAA
---

**Last update:** 12/2021 - Glenn THOMPSON  
**Contact:** thompsong@usf.edu   
---

In [6]:
import os
import pandas as pd
import pickle
import numpy as np


# Change if you want your screen to keep quiet
# 0 = quiet
# 1 = in between
# 2 = detailed information
verbatim = 2

### PREPARE THE CATALOG DataFrame ###
SEISAN_DATA = os.path.join( os.getenv('HOME'),'DATA','MVO') # e.g. /home/user/seismo
pandaSeisDir = os.path.join(SEISAN_DATA, 'miniseed_c') # e.g. /home/user/seismo/pandaSeis
SEISAN_DB = 'MVOE_' # e.g. the seisan database name (e.g. MVOE_)
PROJECTDIR = os.path.join(os.getenv('HOME'),'src', 'kitchensinkGT', 'PROJECTS', 'MontserratML') # this dir
#csvfile_external = os.path.join(PROJECTDIR, 'MVO_labelled_events.csv')
csvfile_external = os.path.join(SEISAN_DATA, 'MachineLearning', SEISAN_DB, 'runAAA', 'MVOE_11_labelled_events.csv')
#csvfile_internal = './catalog/MVO_labelled_events_filtered.csv'
csvfile_internal = 'catalog/30_MVO_labelled_events_filtered.csv' # has to match that in AAA-master/config/general/newsettings_10.json
csvfile_internal = './AAA-master/MONTSERRAT/' + csvfile_internal
output_path_cat = csvfile_internal.replace('.csv', '.pd')
alltraces_file = '30_alltraceDFs.csv'

# copy the catalog CSV file from external to internal directory
os.system('cp %s %s' % (csvfile_external, csvfile_internal))

# read the catalog
cat = pd.read_csv(csvfile_external)

# count by (sub)class
print('%d events before filtering' % len(cat.index))
cat['class'].value_counts()

# fix f0 and f1 columns - not really sure what this does
#cat['f0'] = cat.apply(lambda x:eval(x['f0']), axis=1)
#cat['f1'] = cat.apply(lambda x:eval(x['f1']), axis=1)
cat['f0']=None
cat['f1']=None

#print(cat)

# List of traceID occurrences
frames = []
for i,row in cat.iterrows():

    # must have a symlink miniseed_c to correct directory in the current directory
    #if 'corrected_DSN_mseed' in cat.columns:
    #    mseedpath = 'miniseed_c' + row['corrected_DSN_mseed'].split('miniseed_c')[1]
    #else:
    #    mseedpath = os.path.join('miniseed_c', SEISAN_DB, "%d" % row['year'], "%02d" % int(row['month']), row['path'] + '.mseed')
    mseedpath = os.path.join('miniseed_c', SEISAN_DB, "%d" % row['year'], "%02d" % int(row['month']), row['path'] + '.mseed')
    print(mseedpath)
    if not os.path.exists(mseedpath):
        print('file not found :', mseedpath)
        print('Do you need to mount newton shares?')
        break
    cat.loc[i, 'path'] = mseedpath
    
    # load trace CSV file
    tracecsv = mseedpath.replace('.mseed','.csv')
    tracedf = pd.read_csv(tracecsv)
    tracedf['filetime'] = row['filetime']
    frames.append(tracedf)

# stitch all the trace CSV files together    
alltraces = pd.concat(frames, sort=True)
alltraces.to_csv(alltraces_file)

#alltraces.set_index('filetime', inplace=True) # we will need this later to remerge
#alltraces.sort_index(inplace=True)
print(alltraces['id'].value_counts())

# save the catalog to CSV and pickle file
cat.to_csv(csvfile_internal)

pickle.dump(cat, open(output_path_cat,'wb'))

522 events before filtering
miniseed_c/MVOE_/1996/10/9610-23-2235-49S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-24-0038-04S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-24-1647-18S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-25-0028-06S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-25-0753-29S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-25-1009-35S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-25-1406-09S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-26-0255-47S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-26-0533-53S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-26-0609-14S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-26-1647-36S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-27-0303-38S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-27-0523-04S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-27-0808-38S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-27-1024-38S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-28-0423-16S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-28-0805-50S.MVO_21_1.mseed
min

miniseed_c/MVOE_/1997/04/9704-03-0132-05S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-03-0155-44S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-03-0223-43S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-03-0305-53S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-05-1114-46S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-05-1156-54S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-07-0942-10S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-07-1352-44S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-07-1554-53S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-07-1834-13S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-08-0142-38S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-08-1247-36S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-08-1959-27S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-09-1734-49S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-09-1746-17S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-11-0319-47S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-11-0841-16S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-1

miniseed_c/MVOE_/1997/04/9704-29-1347-20S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-29-1449-48S.MVO_18_1.mseed
miniseed_c/MVOE_/1998/10/9810-10-0843-37S.MVO_13_1.mseed
miniseed_c/MVOE_/1998/10/9810-14-0328-19S.MVO_13_1.mseed
miniseed_c/MVOE_/1998/10/9810-23-1828-29S.MVO_13_1.mseed
miniseed_c/MVOE_/1998/10/9810-23-2038-23S.MVO_13_1.mseed
miniseed_c/MVOE_/2000/11/2000-11-20-0543-20S.MVO___019.mseed
miniseed_c/MVOE_/2000/11/2000-11-22-1445-31S.MVO___019.mseed
miniseed_c/MVOE_/2001/03/2001-03-06-2335-19S.MVO___019.mseed
miniseed_c/MVOE_/2001/03/2001-03-07-0220-45S.MVO___019.mseed
miniseed_c/MVOE_/2001/03/2001-03-07-0324-45S.MVO___019.mseed
miniseed_c/MVOE_/2001/03/2001-03-07-1231-42S.MVO___019.mseed
miniseed_c/MVOE_/2001/08/2001-08-01-0145-28S.MVO___018.mseed
miniseed_c/MVOE_/2001/08/2001-08-03-2134-36S.MVO___018.mseed
miniseed_c/MVOE_/2001/08/2001-08-04-0225-10S.MVO___018.mseed
miniseed_c/MVOE_/2001/08/2001-08-04-0634-46S.MVO___018.mseed
miniseed_c/MVOE_/2001/08/2001-08-06-0435-32S.MVO

miniseed_c/MVOE_/1996/10/9610-30-0239-56S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-30-2338-06S.MVO_21_1.mseed
miniseed_c/MVOE_/1996/10/9610-31-2029-24S.MVO_21_1.mseed
miniseed_c/MVOE_/1997/04/9704-01-0412-32S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-02-1433-40S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-02-2307-00S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-02-2308-37S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-02-2335-13S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-03-0508-48S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-03-1429-12S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-03-1934-25S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-03-1936-30S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-03-1947-17S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-03-2110-25S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-03-2114-32S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-03-2128-15S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-03-2129-20S.MVO_18_1.mseed
miniseed_c/MVOE_/1997/04/9704-0