In [1]:
from obspy.core import Stream, read
from scipy.interpolate import interp1d
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os.path
import os

In [2]:
path_to_files = '/Users/sydneydybing/StrainProject/2024/'

earthquakes = pd.read_csv(path_to_files + 'M6_catalog_2004-2024.csv', dtype = str)

fix_dups = True
if fix_dups:
    earthquakes = earthquakes.iloc[[8,9,14,16]] # Earthquakes with same day and magnitude
    
origin_times = earthquakes.time.values
mags = earthquakes.mag.values

bsm_mda = pd.read_csv(path_to_files + 'bsm_metadata.csv')
NA_only = bsm_mda.loc[bsm_mda['LONG'] < -110]
stas = NA_only['BNUM'].values
chans = ['BS1', 'BS2', 'BS3', 'BS4']

stas_200_microns = NA_only.loc[NA_only['GAP(m)'] == 0.0002]['BNUM'].values

earthquakes_test = earthquakes[:3]
stas_test = ['B028']

In [3]:
bad_stachan = []
q = 0

for idx in range(len(earthquakes)):
    
    ot = origin_times[idx]
    print('Earthquake ' + str(idx+1) + '/' + str(len(mags)))
    
    for sta in stas:
        
        for chan in chans:
            
            q += 1
            
            try:
                
                if fix_dups:
                    cut_ot = ot[:13]
                else:
                    cut_ot = ot[:10]
                
                eq_load_dir = path_to_files + 'strain_data/raw/' + str(cut_ot) + '_M' + str(mags[idx]) + '/' + str(sta) + '/'
#                 print(eq_load_dir)
                g = read(eq_load_dir + chan + '.mseed')
#                 print('Raw data')
#                 g.plot()
                
                # Equations from Barbour and Crowell to convert to strain
                # R = ratio of the gap between the fixed-capacitance plates and instrument diameter (0.087 m)
                
                if sta in stas_200_microns:
                    R = 2*10**(-4) / 0.087
                                    
                else:
                    R = 10**(-4) / 0.087
                                
                C = 10**8
                
                # Calculating new linear extensional strains (turns from a Stream into a numpy array)
                
                e = R * (((g[0].data)/C)/(1 - (g[0].data)/C))
                
                times = range(g[0].stats.npts)
                times = np.asarray(times)/(g[0].stats.sampling_rate)
                                                     
                ## Fixing the data ##
                
                # Identifying stations with issues using derivatives
                
                # Taking the derivative of the timeseries
                
                deriv_e = np.diff(np.hstack((e[0],e)))
             
                deriv_e_min = np.min(deriv_e)
                deriv_e_max = np.max(deriv_e)
                deriv_e_avg = np.average(deriv_e)
                
                if deriv_e_min <= -0.00025 or deriv_e_max >= 0.00025: # better to do this with averages or #s?
                    
                    bad_label = str(cut_ot) + '_M' + str(mags[idx]) + '.' + str(sta) + '.' + str(chan)
                    bad_stachan.append(bad_label)
                    
                else:
                    pass
                
                if str(cut_ot) + '_M' + str(mags[idx]) + '.' + str(sta) + '.' + str(chan) in bad_stachan:
                    
                    # Finding the value of the messed up samples
                    
                    data_min = np.amin(e)
                    
                    # Finding the indices of the messed up samples
                    
                    i = np.where(e <= data_min)[0]
                    
                    num_bad = i.shape[0]
                    
#                     print("Bad station & channel - event " + str(ot[:10]) + '_M' + str(mags[idx]) + '_' + str(sta) + '_' + str(chan) + '. ' + str(num_bad) + ' bad samples')
                    
                    # Deleting the bad samples from the data and the times arrays
                    
                    e_clean = np.delete(e, i)
                    times_clean = np.delete(times, i)
                                    
                    # Now fill in the gaps with the linear interpolation
                    
                    f = interp1d(times_clean, e_clean)
                    e_fill = f(times)
                    e = e_fill
                    
                else: 
                    pass
#                     print("Good station & channel - event " + str(ot[:10]) + '_M' + str(mags[idx]) + '_' + str(sta) + '_' + str(chan))
                    
                # Normalizing unfiltered data
                
                norm_value = e[0]
                data_length = e.shape
                normalize = np.full(data_length, norm_value)
                
                e_norm = np.subtract(e, normalize)
                
                # Plotting normalized fixed data
                
#                 plt.plot(times, e_norm*10**6)
#                 plt.xlim(0.,300.)
#                 plt.ylim()
#                 plt.xlabel('Time (s) from Earthquake Origin')
#                 plt.ylabel('Microstrain ($10^{-6}$ m/m)')
#                 plt.title(str(ot[:10]) + '_M' + str(mags[idx]) + '_' + str(sta) + '_' + chan)
    
                e_fixed = g.copy()
                e_fixed[0].data = e_norm
#                 print('Fixed data')
#                 e_fixed.plot()
                
                # Create folder for the event
                eq_sta_save_dir = path_to_files + 'strain_data/processed/' + str(cut_ot) + '_M' + str(mags[idx]) + '/' + str(sta) + '/'
#                 print(eq_sta_save_dir)
                if os.path.isdir(eq_sta_save_dir):
                    pass
                else:
                    os.makedirs(eq_sta_save_dir)
                
                e_fixed.write(eq_sta_save_dir + chan + '.mseed', format = 'MSEED')

            except: 
                pass
#                 print(str(ot[:10]) + '_M' + str(mags[idx]) + '_' + str(sta) + '_no_station')                        


Earthquake 1/4


A suitable encoding will be chosen.
This might have a negative influence on the compatibility with other programs.


Earthquake 2/4
Earthquake 3/4
Earthquake 4/4


In [4]:
print(len(bad_stachan))
print(q)

144
1216
