In [73]:
# Currently only set up to download Ridgecrest M7 stations to match SW4

In [108]:
from obspy.core import Stream, UTCDateTime, read
from obspy.clients.fdsn import Client
import numpy as np
import pandas as pd
import os.path
import os
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d

In [124]:
# Naming the client where the data will be coming from. Googling obspy fdsn will
# list other options.

client = Client('IRIS')

# Reading in my info files - earthquakes is a list of the events I want to get data for,
# which includes locations and origin times, and then stas is the list of stations I want
# to pull data from. Chans is then the four strainmeter channels for these instruments.

path_to_files = '/Users/sydneydybing/StrainProject/2024/'

earthquakes = pd.read_csv(path_to_files + 'M6_catalog_2004-2024.csv', dtype = str)
bsm_mda = pd.read_csv(path_to_files + 'bsm_metadata.csv')
NA_only = bsm_mda.loc[bsm_mda['LONG'] < -110]
stas = NA_only['BNUM'].values
chans = ['BS1', 'BS2', 'BS3', 'BS4']

noise_events = True

fix_dups = False
if fix_dups:
    earthquakes = earthquakes.iloc[[8,9,14,16]] # Earthquakes with same day and magnitude
    
old_events = False # no data for these ones available
if old_events:
    earthquakes = earthquakes[-4:]
    
if noise_events == False:
    earthquakes = earthquakes.iloc[[0,1,11,27]]
    
weird_events_only = False
if weird_events_only:
    earthquakes = earthquakes.iloc[[4,15]]
    
rc7_only = True
if rc7_only:
    earthquakes = earthquakes.iloc[[11]]
    stas = ['B072', 'B079', 'B082', 'B087', 'B916', 'B917', 'B918', 'B921']
    
get_B918_only = False
if get_B918_only:
    earthquakes = earthquakes.iloc[[10]]
    stas = ['B918']

In [125]:
earthquakes

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
11,2019-07-06T03:19:53.040Z,35.7695,-117.5993333,8,7.1,mw,77,43,0.04616,0.22,...,2024-01-04T03:30:09.226Z,Ridgecrest Earthquake Sequence,earthquake,0.19,31.61,0.028,126,reviewed,ci,ci


In [126]:
# This method of reading in data from IRIS uses origin times in the UTCDateTime format.
# To get the origin times from my earthquakes file in this format, since in the CSV each
# time unit is a separate column, I pulled each time unit out into its own variable.

origin_times = earthquakes.time.values
mags = earthquakes.mag.values

In [129]:
stas_200_microns = NA_only.loc[NA_only['GAP(m)'] == 0.0002]['BNUM'].values
stas_200_microns

array(['B001', 'B003', 'B004', 'B005', 'B006', 'B007', 'B009', 'B010',
       'B011', 'B012', 'B018', 'B022', 'B024', 'B035', 'B081', 'B082',
       'B086', 'B087'], dtype=object)

In [130]:
# Process the noise data that was downloaded

bad_stachan = []
q = 0

for idx in range(len(earthquakes)):
    
    ot = origin_times[idx]
    print('Earthquake ' + str(idx+1) + '/' + str(len(mags)))
    
    if weird_events_only:
        if idx == 0:
            stas = ['B072', 'B076']

        elif idx == 1:
            stas = ['B928']
    
    print(ot)
    
    for sta in stas:
        
        # print(sta)
        
        for chan in chans:
            
            # print(chan)
            q += 1
            
            try:
                
                if fix_dups:
                    cut_ot = ot[:13]
                else:
                    cut_ot = ot[:10]

                # print(cut_ot)

                eq_load_dir = path_to_files + 'strain_data/noise/' + str(cut_ot) + '_M' + str(mags[idx]) + '/' + str(sta) + '/'
                # print(eq_load_dir)
                g = read(eq_load_dir + chan + '.mseed')
    #                 print('Raw data')
                # g.plot()

                # Equations from Barbour and Crowell to convert to strain
                # R = ratio of the gap between the fixed-capacitance plates and instrument diameter (0.087 m)

                if sta in stas_200_microns:
                    R = 2*10**(-4) / 0.087

                else:
                    R = 10**(-4) / 0.087

                C = 10**8

                # Calculating new linear extensional strains (turns from a Stream into a numpy array)

                e = R * (((g[0].data)/C)/(1 - (g[0].data)/C))

                times = g[0].times()
    #                 print(g[0].stats.npts)

                ## Fixing the data ##

                # Identifying stations with issues using derivatives
                # Doens't work if there are gaps - just spikes

                # Taking the derivative of the timeseries

                deriv_e = np.diff(np.hstack((e[0],e)))

                deriv_e_min = np.min(deriv_e)
                deriv_e_max = np.max(deriv_e)
                deriv_e_avg = np.average(deriv_e)

                if deriv_e_min <= -0.00025 or deriv_e_max >= 0.00025: # better to do this with averages or #s?

                    bad_label = str(cut_ot) + '_M' + str(mags[idx]) + '.' + str(sta) + '.' + str(chan)
                    bad_stachan.append(bad_label)

                else:
                    pass

                if str(cut_ot) + '_M' + str(mags[idx]) + '.' + str(sta) + '.' + str(chan) in bad_stachan:

                    # Finding the value of the messed up samples

                    data_min = np.amin(e)

                    # Finding the indices of the messed up samples

                    i = np.where(e <= data_min)[0]

                    num_bad = i.shape[0]

                    # print("Bad station & channel - event " + str(ot[:10]) + '_M' + str(mags[idx]) + '_' + str(sta) + '_' + str(chan) + '. ' + str(num_bad) + ' bad samples')

                    # Deleting the bad samples from the data and the times arrays

                    e_clean = np.delete(e, i)
                    times_clean = np.delete(times, i)

                    # Now fill in the gaps with the linear interpolation

                    f = interp1d(times_clean, e_clean)
                    e_fill = f(times)
                    e = e_fill

                else: 
                    pass
    #                     print("Good station & channel - event " + str(ot[:10]) + '_M' + str(mags[idx]) + '_' + str(sta) + '_' + str(chan))

                # Normalizing unfiltered data

                norm_value = np.mean(e[:200])
                data_length = e.shape
                normalize = np.full(data_length, norm_value)

                e_norm = np.subtract(e, normalize)

                # Plotting normalized fixed data

    #                 plt.plot(times, e_norm*10**6)
    #                 plt.xlim(0,100)
    #                 plt.ylim()
    #                 plt.xlabel('Time (s) from Earthquake Origin')
    #                 plt.ylabel('Microstrain ($10^{-6}$ m/m)')
    #                 plt.title(str(cut_ot) + '_M' + str(mags[idx]) + '_' + str(sta) + '_' + chan)

                e_fixed = g.copy()
                e_fixed[0].data = e_norm
    #                 print('Fixed data')
    #                 e_fixed.plot()

                # Create folder for the event
                eq_sta_save_dir = path_to_files + 'strain_data/noise/processed/' + str(cut_ot) + '_M' + str(mags[idx]) + '/' + str(sta) + '/'
    #                 print(eq_sta_save_dir)
                if os.path.isdir(eq_sta_save_dir):
                    pass
                else:
                    os.makedirs(eq_sta_save_dir)

                e_fixed.write(eq_sta_save_dir + chan + '.mseed', format = 'MSEED')

            except:
                # print('Passing')
                pass
# #                 print(str(ot[:10]) + '_M' + str(mags[idx]) + '_' + str(sta) + '_no_station')    
#     plt.show()


Earthquake 1/1
2019-07-06T03:19:53.040Z


A suitable encoding will be chosen.


In [131]:
# Compute RMS

for idx in range(len(earthquakes)):
    
    ot = origin_times[idx]
    print(ot)
    print('Earthquake ' + str(idx+1) + '/' + str(len(mags)))
    
    if weird_events_only:
        if idx == 0:
            stas = ['B072', 'B076']

        elif idx == 1:
            stas = ['B928']
    
    for sta in stas:
        
#         print(sta)
                    
        try:
            
            if fix_dups:
                cut_ot = ot[:13]
            else:
                cut_ot = ot[:10]

            BS1 = read(path_to_files + 'strain_data/noise/processed/' + str(cut_ot) + '_M' + str(mags[idx]) + '/' + str(sta) + '/BS1.mseed')
            BS2 = read(path_to_files + 'strain_data/noise/processed/' + str(cut_ot) + '_M' + str(mags[idx]) + '/' + str(sta) + '/BS2.mseed')
            BS3 = read(path_to_files + 'strain_data/noise/processed/' + str(cut_ot) + '_M' + str(mags[idx]) + '/' + str(sta) + '/BS3.mseed')
            BS4 = read(path_to_files + 'strain_data/noise/processed/' + str(cut_ot) + '_M' + str(mags[idx]) + '/' + str(sta) + '/BS4.mseed')

#             BS1.plot()
#             BS2.plot()
#             BS3.plot()
#             BS4.plot()

            RMS_strain = np.sqrt(((BS1[0].data)**2 + (BS2[0].data)**2 + (BS3[0].data)**2 + (BS4[0].data)**2)/4)         

            times = BS1[0].times()

#             plt.plot(times, BS1[0].data, label = 'BS1')
#             plt.plot(times, BS2[0].data, label = 'BS2')
#             plt.plot(times, BS3[0].data, label = 'BS3')
#             plt.plot(times, BS4[0].data, label = 'BS4')
#             plt.legend()
#             plt.show()

#             timeseries = [0.,300.]
#             #plt.xlim(timeseries)
#             timeseries = str(timeseries)

    #            plt.xlabel('Time (s)')
    #            plt.ylabel('RMS Microstrain ($10^{-6}$ m/m)')
    #            plt.title(quake + ' Earthquake at PBO Station ' + sta)
    #            plt.legend(loc = 1)

            RMS_st = BS1.copy()
            RMS_st[0].stats.channel = 'BSR'
            RMS_st[0].data = RMS_strain

#             RMS_st.plot()
            #print(RMS_st[0].stats)

            # Create folder for the event
            eq_sta_save_dir = path_to_files + 'strain_data/noise/rms/' + str(cut_ot) + '_M' + str(mags[idx]) + '/'
    #                 print(eq_sta_save_dir)
            if os.path.isdir(eq_sta_save_dir):
                pass
            else:
                os.makedirs(eq_sta_save_dir)

            RMS_st.write(eq_sta_save_dir + sta + '.mseed', format = 'MSEED')
                
        except:
            pass


2019-07-06T03:19:53.040Z
Earthquake 1/1


In [127]:
earthquakes

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
11,2019-07-06T03:19:53.040Z,35.7695,-117.5993333,8,7.1,mw,77,43,0.04616,0.22,...,2024-01-04T03:30:09.226Z,Ridgecrest Earthquake Sequence,earthquake,0.19,31.61,0.028,126,reviewed,ci,ci


In [128]:
# Download noise data

for idx in range(len(earthquakes)):
#     print('-----------------------------------------')
    print('Earthquake ' + str(idx+1) + '/' + str(len(mags)))
#     ot = str(ot_yr[idx]) + '-' + str(ot_mo[idx]) + '-' + str(ot_day[idx]) + 'T' + str(ot_hr[idx]) + ':' + str(ot_min[idx]) + ':' + str(ot_sec[idx])
    ot = origin_times[idx]
    print(ot)
    stime = UTCDateTime(ot) - 120.05
#     print(ot)
    etime = UTCDateTime(ot)
#     print(stime)
#     print(etime)
    
    # I then looped through my stations. All of the stations I want are in the 'PB' network,
    # and the data has the location 'T0'. This information you'll probably have to look up on
    # the IRIS Metadata Aggregator website. 
    
    for sta in stas:
        net = 'PB'
        sta = sta
        loc = 'T0'
        
        # My final loop was through my four channels so I could write individual miniSEED data
        # files for each channel.
        
        for chan in chans:
            chan = chan
            
            # I used this try-except condition so I didn't get errors if a station didn't have 
            # data for the time window I was looking at.
            
            try:
                
                # I use the function get_waveforms to get the data from the client (IRIS), and
                # read it into an obpsy stream object using the network, stations, etc. info
                # that I collected earlier. I printed some stats to make sure things were working.
                # Then I wrote the stream into a miniSEED data file and saved it onto my laptop.
                
                st = client.get_waveforms(net, sta, loc, chan, stime, etime)
#                 st.plot()
                
                # Create folder for the event
                if fix_dups:
                    cut_ot = ot[:13]
                else:
                    cut_ot = ot[:10]
                
                eq_sta_save_dir = '/Users/sydneydybing/StrainProject/2024/strain_data/noise/' + str(cut_ot) + '_M' + str(mags[idx]) + '/' + str(sta) + '/'
#                 print(eq_sta_save_dir)
                if os.path.isdir(eq_sta_save_dir):
                    pass
                else:
                    os.makedirs(eq_sta_save_dir)
                
                st.write(eq_sta_save_dir + chan + '.mseed', format = 'MSEED')
            
            # If that didn't work, my code prints out the station name and the earthquake number
            # (a piece of information in the original earthquakes CSV that just identifies the
            # event), and the phrase "not found" so I could tell what didn't work.
            
            except:
                pass
#                 print(idx, sta, chan, "not found")

Earthquake 1/1
2019-07-06T03:19:53.040Z
