In [4]:
#!/usr/bin/env python
################################
# General setup                #
################################

import os
import sys
import pandas as pd
import pickle
import numpy as np
sys.path.insert(0, './AAA-master/automatic_processing')
from config import Config
from features import FeatureVector
from tools import extract_features
import obspy
import datetime

### PREPARE THE CATALOG DataFrame ###
SEISAN_DATA = os.path.join( os.getenv('HOME'),'DATA','MVO') # e.g. /home/user/seismo
pandaSeisDir = os.path.join(SEISAN_DATA, 'miniseed_c') # e.g. /home/user/seismo/pandaSeis
SEISAN_DB = 'MVOE_' # e.g. the seisan database name (e.g. MVOE_)
PROJECTDIR = os.path.join(os.getenv('HOME'),'src', 'kitchensinkGT', 'PROJECTS', 'MontserratML') # this dir
csvfile_external = os.path.join(SEISAN_DATA, 'MachineLearning', SEISAN_DB, 'runAAA', 'MVOE_11_labelled_events.csv')
csvfile_internal = 'catalog/30_MVO_labelled_events_filtered.csv' # has to match that in AAA-master/config/general/newsettings_10.json
csvfile_internal = './AAA-master/MONTSERRAT/' + csvfile_internal
output_path_cat = csvfile_internal.replace('.csv', '.pd')
alltraces_file = '30_alltraceDFs.csv'

metrics_to_add = ['bandratio_[0.8_4.0_16.0]', 'bandratio_[1.0_6.0_11.0]', \
                  'bw_max', 'bw_min', 'kurtosis', 'skewness', 'peakF', 'medianF']

# Change if you want your screen to keep quiet
# 0 = quiet
# 1 = in between
# 2 = detailed information
verbatim = 1

# Init project with configuration file
config = Config('./AAA-master/config/general/newsettings_10.json', verbatim=verbatim)
config.readAndCheck()  
cat = pd.read_csv(csvfile_internal)

# Get or define usefull stuff
features = FeatureVector(config, verbatim=verbatim)

# Save featuresList to pickle file
if not os.path.isdir('features'):
    os.makedirs('features')
        
# Glenn. path has miniseed_c hardcoded at start. I want to change this to whatever the config says
WAVTOPDIR = config.data_to_analyze['path_to_data'] 

# read catalog 
for i in range(len(cat.index)):
    if verbatim > 1:
        print('Processing waveform %d of %d' % (i, catalog_length))
    secondFloat = cat.iloc[i]['second']
    tStartSignature = datetime.datetime(int(cat.iloc[i]['year']),     \
                                        int(cat.iloc[i]['month']),    \
                                        int(cat.iloc[i]['day']),      \
                                        int(cat.iloc[i]['hour']),     \
                                        int(cat.iloc[i]['minute']),   \
                                        int(secondFloat), \
                                        int((secondFloat-int(secondFloat))*1000000)) #microseconds
    duration = cat.iloc[i]['length']
    mseedpath = cat.iloc[i]['path']     
    mseedpath = mseedpath.replace('miniseed_c', WAVTOPDIR)
    mseedbase = os.path.basename(mseedpath)
    
    # check is MSEED file exists. copy it to local if do not have already. read it.
    mseedlocal = mseedpath.replace('miniseed_c', 'miniseed_c_local')
    if not os.path.isfile(mseedpath):        
        print("File not found: ",mseedpath)
        continue
    else:
        mseedlocaldir = os.path.dirname(mseedlocal)
        if not os.path.isdir(mseedlocaldir):
            os.makedirs(mseedlocaldir)
        os.system('cp %s %s' % (mseedpath, mseedlocal))

        # SCAFFOLD, JUST TO GET DATA
        continue
        
        print('Reading ',mseedpath)
        st = obspy.read(mseedpath)
        
    ###### SPIKES CHECK - won't be needed when we reprocess all data #####
    # The reason we do the spike check on the raw WAV file is 
    # because filters run to produce the MSEED file distort the spike
    wavpath = mseedpath.replace('miniseed_c', 'WAV').replace('.mseed','')
    rawst = read(wavpath)
    for tr in rawst:
        check_for_spikes(tr)

    good_traces = 0
    trace_ids_to_eliminate = []
    fix_trace_id(rawst)
    for tr in rawst:
        check_for_spikes(tr)
        if tr.stats.quality_factor > 1.0:
            good_traces += 1
        else:
            trace_ids_to_eliminate.append(tr.id)

    for tr in st:
        if tr.id in trace_ids_to_eliminate:
            st.remove(tr)
    ################ END OF SPIKES CHECK ########################
    
    if len(st)==0:
        continue
        
    tracecsv = mseedpath.replace('.mseed','.csv')
    if os.path.isfile(tracecsv):
        print('Reading ',tracecsv)
        tracedf = pd.read_csv(tracecsv)
    else:
        print(tracecsv, ' not found')
        continue
    
    for tr in st:
        print('Processing ', tr.id)
        featurespkl = os.path.join('features',mseedbase.replace('.mseed', '.%s.pkl' % tr.id)) 
        if os.path.exists(featurespkl):
            print(featurespkl, ' exists. Skipping')
            continue
        
        # Get information about recording
        fs = tr.stats['sampling_rate']         
        length_n = tr.stats['npts'] # only change from read_ubinas
        
        if fs < 70.0 or length_n < 1000:
            continue         

        # Preprocessing & features extraction
        y = [tr.data]
        featuresList = extract_features(config, y, features, fs)
        
        # THIS WOULD BE THE PLACE TO ADD THE PRECOMPUTED FEATURES FROM SEISAN2PANDAS
        thistracedf = tracedf[tracedf['id']==tr.id]
        for col in metrics_to_add:
            featuresList = np.append(featuresList, thistracedf[col])
        bandwidth = thistracedf['bw_max'] - thistracedf['bw_min']
        featuresList = np.append(featuresList, bandwidth)
        
        print('Writing ',featurespkl)
        with open(featurespkl, 'wb') as f:
            pickle.dump(featuresList, f)

./AAA-master/config/specific/usecase1_continuous_classification/usecase1_EXAMPLE.json
Welcome to this automatic analysis architecture
Copyright: Marielle MALFANTE - GIPSA-Lab
Univ. Grenoble Alpes, CNRS, Grenoble INP, GIPSA-lab, 38000 Grenoble, France

 *** PROJECT CONFIGURATION 10 ***  
Configuration object from <path> ./AAA-master/config/general/newsettings_10.json, <configuration_number> 10,
 <general> {'project_root': './AAA-master/', 'analysis_type': 'continuous', 'path_to_specific_settings_file': 'config/specific/usecase1_continuous_classification/usecase1_EXAMPLE.json', 'path_to_res': 'res/', 'path_to_visuals': 'fig/', 'path_to_res_to_review': 'res_to_review/'},
 <application> {'name': 'montserrat'},
 <preprocessing> {'energy_norm': True},
 <learning> {'algo': RandomForestClassifier(criterion='entropy'), 'cv': StratifiedShuffleSplit(n_splits=50, random_state=None, test_size=0.3,
            train_size=0.7), 'path_to_catalogue': 'catalog/30_MVO_labelled_events_filtered.pd'},
 <fea