In [1]:
import os
import glob
import obspy
from obspy.clients.filesystem.sds import Client as sdsclient
from SAM import RSAM
srcdir = '/data/Montserrat/LaharStudy'
SDS_DIR = '/data/SDS'
SAM_DIR = '/data/SAM2'
startTime = obspy.core.UTCDateTime(2018,3,17) # data start on 2018/03/17 but these are BLV stations before being renamed (not we do not seem to have the 2016-2018 data)
endTime = obspy.core.UTCDateTime(2020,3,21) # 2020/03/20 is end, but i believe there are still unconverted files in nanometrics format 
network = 'MV'
sampling_interval = 60


In [2]:
# Compute RSAM in 1-day chunks for multiple network-station-location-channel's
mySDSclient = sdsclient(SDS_DIR, sds_type='D', format='MSEED', fileborder_seconds=30, fileborder_samples=5000)

In [3]:
# Just some sanity checking that the SDSclient is connecting to data - but this cell does not seem to return valid results
print(mySDSclient)
try:
    print(mySDSclient.get_all_nslc(sds_type='D'), datetime=obspy.UTCDateTime(2018,4,3,2,25,0))# outputs a list of (net, sta, loc, chan) available
except:
    print(mySDSclient.get_all_stations(sds_type='D'))
percent_available, num_gaps = mySDSclient.get_availability_percentage(network, 'MTB1', '00', 'HH*', startTime, endTime, sds_type='D')
print(percent_available, num_gaps) # looks like 92.3% availability for combined ALEJ channels from 2012-04-01 to 2012-07-01

<obspy.clients.filesystem.sds.Client object at 0x7047587b8cd0>
[('1R', 'BHP1'), ('1R', 'BHP2'), ('1R', 'BHP3'), ('1R', 'BHP4'), ('1R', 'BHP5'), ('1R', 'BHP6'), ('1R', 'BHP7'), ('1R', 'BHP8'), ('1R', 'FIREP'), ('1R', 'TANKP'), ('2018', 'AIRS'), ('2018', 'ANWB'), ('2018', 'GERD'), ('2018', 'GRGR'), ('2018', 'MBBY'), ('2018', 'MBFL'), ('2018', 'MBFR'), ('2018', 'MBGB'), ('2018', 'MBGH'), ('2018', 'MBHA'), ('2018', 'MBLG'), ('2018', 'MBLY'), ('2018', 'MBRY'), ('2018', 'MBWH'), ('2018', 'MBWW'), ('2018', 'MSS1'), ('2018', 'MTGH'), ('2018', 'OLV1'), ('2018', 'OLV2'), ('2018', 'TRNT'), ('2019', 'ANWB'), ('2019', 'GERD'), ('2019', 'GRGR'), ('2019', 'MBBY'), ('2019', 'MBFL'), ('2019', 'MBGB'), ('2019', 'MBGH'), ('2019', 'MBHA'), ('2019', 'MBLY'), ('2019', 'MBRY'), ('2019', 'MBWH'), ('2019', 'MSS1'), ('2019', 'MTGH'), ('2019', 'OLV1'), ('2019', 'OLV2'), ('AK', 'ACH'), ('AK', 'AHB'), ('AK', 'AKS'), ('AK', 'AKV'), ('AK', 'ANCK'), ('AK', 'ANIA'), ('AK', 'ANM'), ('AK', 'ANNE'), ('AK', 'ANNW'), ('AK'

In [None]:
def isSdsDayAlreadyInRSAM(st, sdsclient, network, daytime, SAM_DIR, ext, sampling_interval): # not working. always returns Stream length 0. problem with len(rsamObj)? Also slow

    # we are not overwriting existing SAM data. get a list of SDS files for this day.
    filepattern = sdsclient._get_filename(network, "*", "*", "[HBESCDFG]*", daytime + 3600)
    filenames = sorted(glob.glob(filepattern))

    # for any SDS file time ranges we have where there are no RSAM data for that ID, we want to add the corresponding Trace data to the Stream for this day
    if len(filenames)>0:
        # we have SDS files. But we need to check for existing RSAM files
        for filename in filenames:
            # Read MiniSEED header.
            try:
                trheader = obspy.read(filename, format='MSEED', headeronly=True)[0]
            except:
                print(f'- Cannot read SDS file {filename}')
            else:
                rsamfile = RSAM.get_filename(SAM_DIR, trheader.id, yyyy, sampling_interval, ext, name='RSAM')
                rsamObj = RSAM.read(trheader.stats.starttime, trheader.stats.endtime, SAM_DIR=SAM_DIR, ext=ext)
                if len(rsamObj)==0:
                    print(f'- {filename}, {rsamfile}, {len(rsamObj)}')
                    print(f'- Reading {filename}')
                    tr = obspy.read(filename, format='MSEED')[0]
                    st.append(tr)

import numpy as np
def piecewise_detrend(st, null_value=0, fill_value=np.nan, detrend='linear', highpass=0.02): 
    # takes a Stream object nominally from an SDSclient that has gaps marked by zeros, and applies

    if not detrend and not highpass:
        return

    # split into contiguous segments
    isSplit = False
    for tr in st:
        if np.any(tr.data == null_value):
            tr.data = np.ma.masked_where(tr.data == null_value, tr.data)
            all_traces = tr.split()
            isSplit=True
            st.remove(tr)
            for newtr in all_traces:
                st.append(newtr)

    # detrend
    if detrend:
        st.detrend(detrend)        
    
    # taper and high filter filter each piece
    if highpass:
        st.taper(max_percentage=0.1, max_length=1/highpass)
        st.filter('highpass', freq=highpass, corners=2, zerophase=True)
    
    # recombine
    if isSplit:
        st.merge(method=0, fill_value=fill_value)

from obspy.clients.filesystem.sds import Client as sdsclient
def sds2detrendedstream(client_or_dir, startTime, endTime, network, station='*', location='*', channel='[BHESCDFG]*', sampling_interval=60, plot_stream=False):
    if isinstance(client_or_dir, str):
        mySDSclient = sdsclient(client_or_dir, sds_type='D', format='MSEED', fileborder_seconds=30, fileborder_samples=5000)
    else:
        mySDSclient = client_or_dir
    st = mySDSclient.get_waveforms(network, station, location, channel, startTime, endTime-0.004)
    piecewise_detrend(st, highpass=1/sampling_interval)
    return st

def sds2rsam(client_or_dir, startTime, endTime, network, station='*', location='*', channel='[BHESCDFG]*', sampling_interval=60, plot_stream=False):
    st = sds2detrendedstream(client_or_dir, startTime, endTime, network, station=station, location=location, channel=channel, sampling_interval=sampling_interval, plot_stream=plot_stream)
    if plot_stream:
        st.plot(equal_scale=False);
    rsamobj = RSAM(stream=st, sampling_interval=sampling_interval, filter=None)
    return rsamobj
 
overwrite=False
secondsPerDay = 60 * 60 * 24
numDays = (endTime-startTime)/secondsPerDay
daytime = startTime
ext='pickle'
while daytime < endTime:
    #print('\n', daytime, daytime.strftime('%Y.%j'))
    st = obspy.Stream()
    yyyy = daytime.year
    """ Not working, so abandoning.
    if not overwrite:
        isSdsDayAlreadyInRSAM(st, mySDSclient, network, daytime, SAM_DIR, ext, sampling_interval)           
    else:
        # we are forcing an overwrite
        print(f'- Loading Stream data for {daytime.strftime("%Y-%m-%d")}')
        st = mySDSclient.get_waveforms(network, "*", "*", "[HBESCDFG]*", daytime, daytime+secondsPerDay)
        for tr in st:
            if tr.stats.station[0:2]=='MT' or tr.stats.station[0:2]=='BL':
                st.remove(tr)
        print(f'  - got {len(st)} Trace ids')
    """
    # we are forcing an overwrite
    #print(f'- Loading Stream data for {daytime.strftime("%Y-%m-%d")}')

    """
    filepattern = mySDSclient._get_filename("MV", "MBGB", "", "HHZ", daytime + 100)
    filenames = sorted(glob.glob(filepattern))
    #print(filenames)
    st = mySDSclient.get_waveforms(network, "MBGB", "", "HHZ", daytime, daytime+secondsPerDay-0.001)
    if len(filenames)>0 and len(st)==0:
        st = obspy.read(filenames[0], format='MSEED')
    """
    st = sds2detrendedstream(mySDSclient, daytime, daytime+secondsPerDay-0.004, network, station='*', location='*', channel='[BHESCD]*', sampling_interval=sampling_interval, plot_stream=False)
    #st = mySDSclient.get_waveforms(network, "*", "", "[HBESCDFG]*", daytime, daytime+secondsPerDay-0.004) 

    st2 = st.copy()
    for tr in st2:
        """
        # SCAFFOLD: remove next 3 lines. just there to avoid recomputing MTB1,2,3,4 which is all days.
        sta = tr.stats.station
        if (sta[0:2]=='MT' and sta!='MTGH') or sta[0:2]=='BL':
            st.remove(tr)
        """
        if tr.stats.npts < int(tr.stats.sampling_rate * sampling_interval):
            st.remove(tr)

    if len(st)>0:
        print('\n', daytime, daytime.strftime('%Y.%j'))
        #print(st)
    
        unique_ids = list(set([tr.id for tr in st]))
        print(f'  - got {len(st)} Trace ids with {len(unique_ids)} unique IDs', unique_ids)
        if len(unique_ids) < len(st):
            try:
                st.merge(method=0, fill_value=0)
            except:
                pass
    
        # convert Stream to RSAM and write out
        #print(f'- Stream has length {len(st)} ')
    
        print(f'- Computing RSAM metrics for {daytime}, and saving to {ext} files')
        rsam24h = RSAM(stream=st, sampling_interval=sampling_interval, filter=None)
        rsam24h.write(SAM_DIR, ext=ext, overwrite=overwrite)
    
    
    daytime += secondsPerDay


 2018-03-17T00:00:00.000000Z 2018.076
  - got 6 Trace ids with 6 unique IDs ['MV.BLV01.00.HHE', 'MV.BLV01.00.HHZ', 'MV.BLV01.0.HDF', 'MV.BLV01.0.HD2', 'MV.BLV01.0.HD3', 'MV.BLV01.00.HHN']
- Computing RSAM metrics for 2018-03-17T00:00:00.000000Z, and saving to pickle files
write
Writing /data/SAM2/RSAM/MV/RSAM_MV.BLV01.0.HD2_2018_60s.pickle
Writing /data/SAM2/RSAM/MV/RSAM_MV.BLV01.0.HD3_2018_60s.pickle
Writing /data/SAM2/RSAM/MV/RSAM_MV.BLV01.0.HDF_2018_60s.pickle
Writing /data/SAM2/RSAM/MV/RSAM_MV.BLV01.00.HHE_2018_60s.pickle
Writing /data/SAM2/RSAM/MV/RSAM_MV.BLV01.00.HHN_2018_60s.pickle
Writing /data/SAM2/RSAM/MV/RSAM_MV.BLV01.00.HHZ_2018_60s.pickle

 2018-03-22T00:00:00.000000Z 2018.081
  - got 4 Trace ids with 4 unique IDs ['MV.BLV02.00.HHZ', 'MV.BLV02.00.HHE', 'MV.BLV02.00.HHN', 'MV.BLV02.10.HDF']
- Computing RSAM metrics for 2018-03-22T00:00:00.000000Z, and saving to pickle files
write
Writing /data/SAM2/RSAM/MV/RSAM_MV.BLV02.00.HHE_2018_60s.pickle
Writing /data/SAM2/RSAM/MV/RSA