In [None]:
import os, sys, glob
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from obspy.core import read, Stream
LIBpath = os.path.join( os.getenv('HOME'),'src','kitchensinkGT', 'LIB')
sys.path.append(LIBpath)
from metrics import process_trace
from libMVO import fix_trace_id, inventory_fix_id_mvo, load_mvo_inventory
import random

SEISAN_DATA = os.path.join( os.getenv('HOME'),'DATA','MVO') # e.g. /home/user/seismo
CALDIR = os.path.join(SEISAN_DATA, 'CAL')
SEISAN_DB = 'MVOE_'




def compare_accelerometer_calib(original_st, makeplot=False, f_lower=1.0, f_upper=10.0):
    # Exploring the 'A N' channels. Data look like a geophone, since they look like high-passed LA100s.
    st = original_st.copy()
    stA = st.select(channel='SNZ')
    if makeplot:
        fh = plt.figure(figsize=(12,8))
    av_calib = {}
    num_stations = len(stA)
    for i, tr in enumerate(stA):
        low_cut = f_lower
        f_upper = min(f_upper, tr.stats.sampling_rate*0.4)
        freqs = []
        relative_amps = []
        while low_cut < f_upper:
            this_station = st.select(station=tr.stats.station).copy().trim(starttime=tr.stats.starttime+10,endtime=tr.stats.starttime+25)
            this_station.filter("bandpass", freqmin=low_cut, freqmax=low_cut*1.2)  
            diff_trace = this_station[0].copy().differentiate()
            compare_st = Stream()
            compare_st.append(diff_trace)
            compare_st.append(this_station[1])    
            relative_calib = np.max(compare_st[0].data)/np.max(compare_st[1].data)
            freqs.append(low_cut*1.1)
            relative_amps.append(relative_calib)
            low_cut *= 1.1
        av_calib[tr.stats.station]=np.mean(relative_amps)
        if makeplot:
            ax = fh.add_subplot(num_stations, 1, i+1)    
            ax.loglog(freqs, relative_amps)
            ax.set_title(tr.stats.station)
            ax.set_ylabel('Frequency (Hz)')
            ax.set_ylabel('Relative amplitude')
        #else:
        #    for f,a in zip(freqs,relative_amps):
        #        print(f,a)
    if makeplot:
        plt.show()
    return av_calib



def test_correct_wavfile(st):       
    fix_trace_id(st, shortperiod=False)
    for tr in st:  
        this_inv = load_mvo_inventory(tr, CALDIR)      
        process_trace(tr, inv=this_inv, quality_threshold=-10.0)
        if tr.stats.channel[0:2]=='SN': # accelerometer channels
            tr.stats.calib = 27500000
            tr.data = tr.data / tr.stats.calib # approx calib from comparing with co-located differentiated LA100 waveforms
            tr.stats.units = 'm/s'      
    for tr in st:
        print(tr.id, tr.stats.calib, tr.stats.quality_factor)
    print(st)

###    
    
id_mapping_df = pd.DataFrame()
id_mapping_lod = []
acc_calibs_df = pd.DataFrame()
acc_calibs_lod = []
bad_wavs = []
MAX_FILES_PER_MONTH = 5
MAX_FILES = 999
num_files = 0
for yeardir in glob.glob(os.path.join(SEISAN_DATA, 'WAV', SEISAN_DB, '[12][0-9][0-9][0-9]')):
    for monthdir in glob.glob(os.path.join(yeardir, '[01][0-9]')):
        if num_files >= MAX_FILES:
            continue
        print(monthdir)
        wavfiles = glob.glob(os.path.join(monthdir, '*MVO*'))
        if len(wavfiles)>0:
            wavindex_list=list(range(len(wavfiles)))
            random.shuffle(wavindex_list)
            for c in wavindex_list[0:min([MAX_FILES_PER_MONTH,len(wavfiles)])]: # choose up to 10 wavs randomly per month
                wavfile = wavfiles[c]
                print(wavfile)
                st = read(wavfile)         
                st0 = st.copy()
                try:
                    test_correct_wavfile(st)
                except:
                    bad_wavs.append(wavfile)
                else:
                    if 'SNZ' in [tr.stats.channel for tr in st]:
                        av_calib = compare_accelerometer_calib(st, makeplot=False, f_lower=2.0, f_upper=10.0)
                        if av_calib:
                            av_calib['wavfile']=wavfile
                            acc_calibs_lod.append(av_calib)
                    for i,tr in enumerate(st):
                        id_mapping_lod.append({'wav':wavfile, 'original_id':st0[i].id, 'corrected_id':tr.id})   
  
                num_files += 1
                print('\n') 
        id_mapping_df = pd.DataFrame(id_mapping_lod)
        #id_mapping_df = id_mapping_df[['original_id','corrected_id']] 
        id_mapping_df.drop_duplicates()
        id_mapping_df.to_csv('02_index_wavfile_original_traceID.csv',index=False)
        if acc_calibs_lod:
            acc_calibs_df = pd.DataFrame(acc_calibs_lod)
            acc_calibs_df.to_csv('accelerometer_calibs.csv',index=False)
print('WAV files that failed:')
for badwav in bad_wavs:
    print(badwav)

/Users/gt/DATA/MVO/WAV/MVOE_/1996/10
/Users/gt/DATA/MVO/WAV/MVOE_/1996/10/9610-30-1626-00S.MVO_21_1
Correcting MV.MBGA..BHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBGA..BHZ.xml
Correcting MV.MBGA..BHN with /Users/gt/DATA/MVO/CAL/station.MV.MBGA..BHN.xml
Correcting MV.MBGA..BHE with /Users/gt/DATA/MVO/CAL/station.MV.MBGA..BHE.xml
Correcting MV.MBLG..SHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBLG..SHZ.xml
Correcting MV.MBLG..SNZ with /Users/gt/DATA/MVO/CAL/station.MV.MBLG..SHZ.xml
No matching response info found
Correcting MV.MBRY..SHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBRY..SHZ.xml
Correcting MV.MBRY..SNZ with /Users/gt/DATA/MVO/CAL/station.MV.MBRY..SHZ.xml
No matching response info found
Correcting MV.MBGE..BHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBGE..BHZ.xml
Correcting MV.MBGE..BHN with /Users/gt/DATA/MVO/CAL/station.MV.MBGE..BHN.xml
Correcting MV.MBGE..BHE with /Users/gt/DATA/MVO/CAL/station.MV.MBGE..BHE.xml
Correcting MV.MBGH..BHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBG



/Users/gt/DATA/MVO/WAV/MVOE_/1996/10/9610-31-0922-51S.MVO_21_1
Correcting MV.MBGA..BHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBGA..BHZ.xml
Correcting MV.MBGA..BHN with /Users/gt/DATA/MVO/CAL/station.MV.MBGA..BHN.xml
Correcting MV.MBGA..BHE with /Users/gt/DATA/MVO/CAL/station.MV.MBGA..BHE.xml
Correcting MV.MBLG..SHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBLG..SHZ.xml
Correcting MV.MBLG..SNZ with /Users/gt/DATA/MVO/CAL/station.MV.MBLG..SHZ.xml
No matching response info found
Correcting MV.MBRY..SHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBRY..SHZ.xml
Correcting MV.MBRY..SNZ with /Users/gt/DATA/MVO/CAL/station.MV.MBRY..SHZ.xml
No matching response info found
Correcting MV.MBGE..BHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBGE..BHZ.xml
Correcting MV.MBGE..BHN with /Users/gt/DATA/MVO/CAL/station.MV.MBGE..BHN.xml
Correcting MV.MBGE..BHE with /Users/gt/DATA/MVO/CAL/station.MV.MBGE..BHE.xml
Correcting MV.MBGH..BHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBGH..BHZ.xml
Correcting MV.MBGH..BHN 



/Users/gt/DATA/MVO/WAV/MVOE_/1996/11/9611-04-0847-10S.MVO_21_1
Correcting MV.MBGA..BHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBGA..BHZ.xml
Correcting MV.MBGA..BHN with /Users/gt/DATA/MVO/CAL/station.MV.MBGA..BHN.xml
Correcting MV.MBGA..BHE with /Users/gt/DATA/MVO/CAL/station.MV.MBGA..BHE.xml
Correcting MV.MBLG..SHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBLG..SHZ.xml
Correcting MV.MBLG..SNZ with /Users/gt/DATA/MVO/CAL/station.MV.MBLG..SHZ.xml
No matching response info found
Correcting MV.MBRY..SHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBRY..SHZ.xml
Correcting MV.MBRY..SNZ with /Users/gt/DATA/MVO/CAL/station.MV.MBRY..SHZ.xml
No matching response info found
Correcting MV.MBGE..BHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBGE..BHZ.xml
Correcting MV.MBGE..BHN with /Users/gt/DATA/MVO/CAL/station.MV.MBGE..BHN.xml
Correcting MV.MBGE..BHE with /Users/gt/DATA/MVO/CAL/station.MV.MBGE..BHE.xml
Correcting MV.MBGH..BHZ with /Users/gt/DATA/MVO/CAL/station.MV.MBGH..BHZ.xml
Correcting MV.MBGH..BHN 

In [2]:
dfacc = pd.read_csv('accelerometer_calibs.csv')

dfacc.describe()

Unnamed: 0,wavfile
count,463
unique,463
top,/Users/gt/DATA/MVO/WAV/MVOE_/1996/10/9610-24-0...
freq,1
