# Prepare Montserrat CSV catalog for AAA
---

**Last update:** 12/2021 - Glenn THOMPSON  
**Contact:** thompsong@usf.edu   
---

In [None]:
import os
import pandas as pd
import pickle
import numpy as np


# Change if you want your screen to keep quiet
# 0 = quiet
# 1 = in between
# 2 = detailed information
verbatim = 2

### PREPARE THE CATALOG DataFrame ###
SEISAN_DATA = os.path.join( os.getenv('HOME'),'DATA','MVO') # e.g. /home/user/seismo
pandaSeisDir = os.path.join(SEISAN_DATA, 'miniseed_c') # e.g. /home/user/seismo/pandaSeis
SEISAN_DB = 'MVOE_' # e.g. the seisan database name (e.g. MVOE_)
PROJECTDIR = os.path.join(os.getenv('HOME'),'src', 'kitchensinkGT', 'PROJECTS', 'MontserratML') # this dir
#csvfile_external = os.path.join(PROJECTDIR, 'MVO_labelled_events.csv')
csvfile_external = os.path.join(SEISAN_DATA, 'MachineLearning', SEISAN_DB, 'runAAA', 'MVOE_11_labelled_events.csv')
#csvfile_internal = './catalog/MVO_labelled_events_filtered.csv'
csvfile_internal = 'catalog/30_MVO_labelled_events_filtered.csv' # has to match that in AAA-master/config/general/newsettings_10.json
csvfile_internal = './AAA-master/MONTSERRAT/' + csvfile_internal
output_path_cat = csvfile_internal.replace('.csv', '.pd')
alltraces_file = '30_alltraceDFs.csv'

# copy the catalog CSV file from external to internal directory
os.system('cp %s %s' % (csvfile_external, csvfile_internal))

# read the catalog
cat = pd.read_csv(csvfile_external)

# count by (sub)class
print('%d events before filtering' % len(cat.index))
cat['class'].value_counts()

# fix f0 and f1 columns - not really sure what this does
#cat['f0'] = cat.apply(lambda x:eval(x['f0']), axis=1)
#cat['f1'] = cat.apply(lambda x:eval(x['f1']), axis=1)
cat['f0']=None
cat['f1']=None

# List of traceID occurrences
frames = []
for i,row in cat.iterrows():

    # must have a symlink miniseed_c to correct directory in the current directory
    mseedpath = 'miniseed_c' + row['corrected_DSN_mseed'].split('miniseed_c')[1]
    print(mseedpath)
    if not os.path.exists(mseedpath):
        print('file not found :', mseedpath)
        break
    cat.loc[i, 'path'] = mseedpath
    
    # load trace CSV file
    tracecsv = mseedpath.replace('.mseed','.csv')
    tracedf = pd.read_csv(tracecsv)
    tracedf['filetime'] = row['filetime']
    frames.append(tracedf)

# stitch all the trace CSV files together    
alltraces = pd.concat(frames, sort=True)
alltraces.to_csv(alltraces_file)

#alltraces.set_index('filetime', inplace=True) # we will need this later to remerge
#alltraces.sort_index(inplace=True)
print(alltraces['id'].value_counts())

# save the catalog to CSV and pickle file
cat.to_csv(csvfile_internal)

pickle.dump(cat, open(output_path_cat,'wb'))