In [1]:
from obspy import read, Stream, UTCDateTime
import numpy as np
from mudpy.hfsims import windowed_gaussian, apply_spectrum
from mudpy.forward import gnss_psd
import matplotlib.pyplot as plt
import h5py
from glob import glob

In [2]:
project_name = 'newfault'
GF_list = 'rc_gflist.gflist' # Station file used in fq gen code
vel_mod = 'mojave'
stas_name = GF_list.split('.')[0]
fq_dir = '/hdd/rc_fq/fall24/' + project_name + '/'
arrival_save_dir = '/hdd/rc_fq/fall24/' + project_name + '_fq_parrivals/'

ruptures = np.load('/hdd/rc_fq/fall24/' + project_name + '_ruptures.npy')
extra_second_rupts = ['newfault.000474', 'newfault.001647', 'newfault.001745', 'newfault.002078', 'newfault.002165', 'newfault.002607', 'newfault.002911', 'newfault.003012']

stas = np.load('/hdd/rc_fq/fall24/' + stas_name + '_station_names.npy')
test_stas = ['P595', 'P594', 'CCCC']
# print(stas)

In [None]:
new_data_array = np.zeros((len(ruptures)*278, 768))

mag_list = []
sta_list = []
rupt_list = []

data_list = []

counter = 0

for idx in range(len(ruptures)):
    
    rupt = ruptures[idx]
    
    counter += 1
    if counter % 10 == 0:
        print('Rupture ' + rupt + ' (' + str(counter) + '/' + str(len(ruptures)) + ')')
    
    log = glob(fq_dir + 'output/ruptures/' + rupt + '.log')
    log = open(log[0],'r')
    line = log.readlines()
    
    mag = str(line[15][21:27])
#     print(mag)
    
    for idx2 in range(len(stas)):
        
        sta = stas[idx2]
        
#         print(sta)
        arrivals = np.genfromtxt(arrival_save_dir + stas_name + '_arrival_times_' + rupt + '.csv', dtype = 'U')
#         print(arrivals)
            
        # Read in data 
        
        stN = read(fq_dir + 'output/waveforms/' + rupt + '/' + sta + '.LYN.sac') 
        stE = read(fq_dir + 'output/waveforms/' + rupt + '/' + sta + '.LYE.sac') 
        stZ = read(fq_dir + 'output/waveforms/' + rupt + '/' + sta + '.LYZ.sac') 
        
#         print(stN[0].stats.starttime)
        
#         stN.plot()
        
        N_data = stN[0].data
        E_data = stE[0].data
        Z_data = stZ[0].data
        
        ### Zero-pad this data ###
        
        N_data_padded = np.pad(N_data, 128, mode = 'constant')
        E_data_padded = np.pad(E_data, 128, mode = 'constant')
        Z_data_padded = np.pad(Z_data, 128, mode = 'constant')
        
        stN_pad = stN.copy()
        stN_pad[0].data = N_data_padded
        
        stE_pad = stE.copy()
        stE_pad[0].data = E_data_padded
        
        stZ_pad = stZ.copy()
        stZ_pad[0].data = Z_data_padded
        
#         stN.plot()
#         stN_pad.plot()
#         print(stN_pad[0].stats.starttime)
        
        npts = stN_pad[0].stats.npts

        ### Trim around the arrival time ###
        
        stas_arrival = arrivals[:,1] # Station list from arrivals file
        
        i = np.where(stas_arrival == sta)[0]
        arrival = arrivals[i,2][0]
#         print(arrival)

        # Grab the arrival time

        arr_time = UTCDateTime(arrival)
        arr_time = arr_time + 128 # To account for padding at front
        starttime = arr_time - 128
        if sta == 'P284':
            if rupt in extra_second_rupts:
                endtime = arr_time + 128
            else:
                endtime = arr_time + 127
        else:
            endtime = arr_time + 127 # Needs to be 256 samples, not 256 seconds

#         arr_time = arr_time + 3 # To account for padding at front
#         starttime = arr_time - 3
#         endtime = arr_time + 3
        
#         print(starttime)
#         print(arr_time)
#         print(endtime)
        
        stN_trim = stN_pad.trim(starttime, endtime)
        stE_trim = stE_pad.trim(starttime, endtime)
        stZ_trim = stZ_pad.trim(starttime, endtime)
        
        stN_trim_data = stN_trim[0].data # Middle sample is the (npts + 1)/2 sample, index is (npts + 1)/2 - 1
        stE_trim_data = stE_trim[0].data
        stZ_trim_data = stZ_trim[0].data
        
#         print(stN_trim[0].stats.starttime)
#         print(stN_trim[0].stats.endtime)
#         print(stN_trim[0].stats.npts) # Middle sample is the (npts + 1)/2 sample, index is (npts + 1)/2 - 1
#         print(len(stN_trim_data))
#         stN_trim[0].plot()

        npts = stN_trim[0].stats.npts
#         print(npts)
        arrival_idx = int((npts + 1)/2 - 1)
        
        pick_N = stN_trim_data[arrival_idx]
        pick_E = stE_trim_data[arrival_idx]   
        pick_Z = stZ_trim_data[arrival_idx]
        
#         print(pick_N)

        stN_norm = stN_trim_data - pick_N
        stE_norm = stE_trim_data - pick_E
        stZ_norm = stZ_trim_data - pick_Z    
        
        stN_zeroed = stN_norm
        stN_zeroed[0:128] = 0 # Remember that the stop index is excluded
        
        stE_zeroed = stE_norm
        stE_zeroed[0:128] = 0 
        
        stZ_zeroed = stZ_norm
        stZ_zeroed[0:128] = 0 
        
        ### Combine N, E, and Z components into one array ###
        
        comb_data = np.append(stN_zeroed, stE_zeroed)
        comb_data = np.append(comb_data, stZ_zeroed) # Order: N, E, Z
        
#         print(comb_data.shape)
        
        big_idx = idx*278 + idx2
#         print(idx, idx2, big_idx)
#         try:
        new_data_array[big_idx] = comb_data
#         except:
#             print('Error: ' + str(rupt) + ', ' + str(sta))
        
        # N: indices 0 through 255 pick at index 128)
        # E: indices 256 through 511 (pick at index 384)
        # Z: indices 512 through 767 (pick at index 640)
        
#         plt.plot(comb_data) # Checking to make sure everything lines up
#         plt.xlim(635,645)
#         plt.ylim(-0.005, 0.005)
# #         plt.ylim(-0.2, 0.2)
#         plt.axvline(640)

        ### Adding new data to an array - each row = new station ### 
    
        data_list.append(comb_data) # Add clean data instead
        
        ### Add magnitude to list
        
        rupt_list.append(str(rupt))
        sta_list.append(str(sta))
        mag_list.append(str(mag))
        

Rupture newfault.000009 (10/3300)
Rupture newfault.000019 (20/3300)
Rupture newfault.000029 (30/3300)
Rupture newfault.000039 (40/3300)
Rupture newfault.000049 (50/3300)
Rupture newfault.000059 (60/3300)
Rupture newfault.000069 (70/3300)
Rupture newfault.000079 (80/3300)
Rupture newfault.000089 (90/3300)
Rupture newfault.000099 (100/3300)


In [None]:
data_array = np.array(data_list)
print('Data array shape:')
print(data_array.shape) # Arrivals at samples 128, 384, 640
# print(data_array[0])

print('New data array shape:')
print(new_data_array.shape) # Arrivals at samples 128, 384, 640
# print(data_array[0])

rupt_array = np.array(rupt_list)
# print(rupt_array.shape)

sta_array = np.array(sta_list)
# print(sta_array.shape)

mag_array = np.array(mag_list)
# print(mag_array.shape)

info_array = np.column_stack((rupt_array, sta_array, mag_array))
print('Info array shape:')
print(info_array.shape)
print(info_array[-1])

In [None]:
plt.plot(new_data_array[567])

In [None]:
h5f = h5py.File('/hdd/rc_fq/fall24/' + project_name + '_fq_wvfm_data_formatted.hdf5', 'w')
h5f.create_dataset('data', data = data_array)
h5f.close()

h5f = h5py.File('/hdd/rc_fq/fall24/' + project_name + '_fq_wvfm_data_formatted_newversion.hdf5', 'w')
h5f.create_dataset('data', data = new_data_array)
h5f.close()

np.save('/hdd/rc_fq/fall24/' + project_name + '_fq_wvfm_info.npy', info_array) 