In [1]:
import matplotlib.pyplot as plt
import numpy as np
import h5py

In [2]:
project_name = 'newfault' # Based on the name of the FakeQuakes project
fq_dir = '/hdd/rc_fq/fall24/' # Where are the FakeQuakes stored? (The final .hdf5 file)
noise_dir = '/home/sdybing/gnss-picker/data/noisedata/' # Where is the noise data stored?
realdata_dir = '/home/sdybing/gnss-picker/data/realdata/summer23/' # Where is the real data stored?

In [3]:
# FakeQuakes waveform data
print('Loading FakeQuakes...')
fq_data = h5py.File(fq_dir + 'newfault_fq_wvfm_data_formatted.hdf5', 'r')
fq_data = fq_data['data'][:,:]
# old_fq_data = h5py.File(fq_dir + 'july6_128samps_fq_wvfm_data_formatted.hdf5', 'r')
# old_fq_data = old_fq_data['data'][:,:]

# FakeQuakes metadata
print('Loading FakeQuakes metadata...')
fq_metadata = np.load(fq_dir + 'newfault_fq_wvfm_info.npy')

# Noise data
print('Loading noise...')
all_noise_data = h5py.File(noise_dir + 'summer23_128samps_all_noise_samples.hdf5', 'r')
all_noise_data = all_noise_data['all_noise_samples'][:,:]

# Demeaned real waveform data
print('Loading real data...')
real_data = h5py.File(realdata_dir + 'demean_realdata_rembad.hdf5', 'r')
real_data = real_data['demean_realdata_rembad'][:,:]

# Real metadata
print('Loading real metadata...')
real_metadata = np.load(realdata_dir + 'real_metadata_rembad_w_gauss_pos_mag.npy')

# Columns:
# 0: station name
# 1: date sample came from
# 2: sample start time
# 3: sample end time
# 4: random n counter
# 5: sample P-wave arrives at (gauss pos)
# 6: earthquake magnitude

# Trim noise data to match length of FakeQuakes data
noise_data = all_noise_data[:len(fq_data)]

# Array of NaNs to use to match added noise in concatenation later
nan_array = np.empty((len(fq_data), 3))
nan_array[:] = np.NaN

# Real data

# Check shapes
print('FakeQuakes shape: ' + str(fq_data.shape))
print('Noise data shape: ' + str(noise_data.shape))
print('Real data shape: ' + str(real_data.shape))


Loading FakeQuakes...
Loading FakeQuakes metadata...
Loading noise...
Loading real data...
Loading real metadata...
FakeQuakes shape: (917400, 768)
Noise data shape: (917400, 768)
Real data shape: (994155, 384)


In [4]:
real_rows_w_eqs = np.load(realdata_dir + 'real_metadata_rembad_rows_w_eqs.npy')

In [5]:
print(min(fq_metadata[:,2].astype(float)))
print(max(fq_metadata[:,2].astype(float)))

3.6167
7.7934


In [6]:
bins = np.arange(3.65,7.85,0.1)
bins

array([3.65, 3.75, 3.85, 3.95, 4.05, 4.15, 4.25, 4.35, 4.45, 4.55, 4.65,
       4.75, 4.85, 4.95, 5.05, 5.15, 5.25, 5.35, 5.45, 5.55, 5.65, 5.75,
       5.85, 5.95, 6.05, 6.15, 6.25, 6.35, 6.45, 6.55, 6.65, 6.75, 6.85,
       6.95, 7.05, 7.15, 7.25, 7.35, 7.45, 7.55, 7.65, 7.75])

In [7]:
real_metadata[:,6][real_rows_w_eqs].astype(float)

array([4.85, 4.74, 6.4 , ..., 4.47, 4.74, 5.51])

In [8]:
plt.figure(figsize = (6,6), dpi = 400)

plt.subplot(211)
plt.text(x = 2.2, y = 600, s = '(a)', fontsize = 22)
plt.grid(lw = 0.5, zorder = 0)
plt.hist(real_metadata[:,6][real_rows_w_eqs].astype(float), bins = bins, color = '#f01f42', edgecolor = 'black', lw = 0.5, zorder = 3)
plt.xlabel("Real earthquake waveforms' magnitudes", fontsize = 12)
plt.ylabel('Count', fontsize = 12)
plt.tick_params(labelsize = 11)
plt.xlim(3.2,8.2)

plt.subplot(212)
plt.text(x = 2.2, y = 33000, s = '(b)', fontsize = 22)
plt.grid(lw = 0.5, zorder = 0)
plt.hist(fq_metadata[:,2].astype(float), bins = bins, color = '#2DADB4', edgecolor = 'black', lw = 0.5, zorder = 3)
plt.xlabel("FakeQuakes waveforms' magnitudes", fontsize = 12)
plt.ylabel('Count', fontsize = 12)
plt.tick_params(labelsize = 11)
plt.xlim(3.2,8.2)

plt.subplots_adjust(hspace = 0.28, left = 0.2)

# plt.show()

# plt.savefig('/home/sdybing/gnss-picker/manuscript_figures/FigS1_fq_and_real_eq_mags_hist.jpg', format = 'JPG')
plt.savefig('/home/sdybing/gnss-picker/manuscript_figures/Figure_S1.png', format = 'PNG')
plt.close();