In [1]:
### ----- Imports ----- ###

import sys
sys.path.append('/home/sdybing/neic-mlaapde')

from mlaapde.access import MLAAPDE_Access
from mlaapde import UTC
import matplotlib.pyplot as plt
import numpy as np
import os.path
import os
import shutil
import glob
import h5py
import tensorflow as tf

os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'


# mlpa = MLAAPDE_Access(data_dir = '/data/hank/mlaapde_subset/data', random_seed = 616) # 3 months
# dataset = 'subset'

mlpa = MLAAPDE_Access(data_dir = '/data/hank/mlaapde_v1b/data', random_seed = 616)
dataset = 'v1b'

mlpa.data_dir

2022-09-08 13:37:38 - mlaapde.access.MLAAPDE_Access - INFO - MLAAPDE_Access.__init__() starting
2022-09-08 13:37:52 - mlaapde.access.MLAAPDE_Access - INFO - MLAAPDE_Access.__init__() complete


'/data/hank/mlaapde_v1b/data'

In [2]:
mlpa.default_args

{'nsamp': 10000,
 'split': [0.8, 0.2],
 'labels': ['phase_id',
  'phase_time',
  'phase_travel_sec',
  'source_distance_deg',
  'source_back_azimuth_deg'],
 'ids': False,
 'valid_phases': [['P', 'Pn', 'Pg'], ['S', 'Sn', 'Sg']],
 'only_oriented': True,
 'min_snr_db': 10,
 'max_snr_db': False,
 'time1': False,
 'time2': False,
 'dist1': False,
 'dist2': False,
 'mag_types': False,
 'mag1': False,
 'mag2': False,
 'bpf_freq_min': False,
 'bpf_freq_max': False,
 'trim_pre_sec': False,
 'trim_post_sec': False,
 'normalize': 'stream',
 'rotate': False,
 'channels': [True, True, True],
 'log_progress_fraction': False}

In [3]:
### ----- Parameters ----- ###

# Where to save the products
models_figs_path = '/home/sdybing/neic-mlaapde/allwaveforms/decimated/'

# MLAAPDE/data generation params
#nsamp = False # Samples of waveforms to load from MLAAPDE
#n_train_samp = 1000000
#n_valid_samp = 200000
#nsamp = n_train_samp + n_valid_samp
sr = 40 # Sampling rate
trim_sec = 60 # Trimming amount around phase pick to get from MLAAPDE
trim_pre_sec = trim_sec
trim_post_sec = trim_pre_sec
window_len = trim_pre_sec + trim_post_sec
#train_split = 0.8 # Percentage of data used in training
#valid_split = 0.2 # Percentage of data used for validation
n_channels = 3 # Instrument channels
cut_lens = [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 25, 30, 35, 40, 50, 60, 70, 80, 90, 100, 110, 120]
test_cut_lens = [7, 8]
desired_shift = 3
max_shift = desired_shift * 2 # Since the shifting method actually makes it half what this value is set to
min_snr_db = False
max_snr_db = False
log_progress_fraction = 100
valid_phases = ['P', 'Pn', 'Pg']
cast_dtype = np.float32

# Training/model params
epochs_number = 200
batch_size = int(256) # Reducing to help memory
monte_carlo_sampling = 50
drop_rate = 0.5
filters = [32, 64, 96, 128, 256] 

# Used if loading a trained model
training_samps = 100000 
training_dataset = 'v1b'
shift_status = 'shifted'
model_folder_path = '/home/sdybing/neic-mlaapde/allwaveforms/float32/'

# To make end error plots
mean_errors = []
std_errors = []

In [4]:
### ----- Where are the HDF5 files getting saved? ----- ###

# Location of HDF5 data files
hdf5_save_dir = '/data/sdybing/allwaveforms/decimated/'
if os.path.isdir(hdf5_save_dir):
    pass
else:
    os.makedirs(hdf5_save_dir)

# Pick extra labels and set keyword arguments for data parameters
return_labels = ['source_magnitude', 'source_magnitude_type', 'snr_db', 'phase_id']
kwargs = {'valid_phases':valid_phases, 'labels':return_labels, 'trim_pre_sec':trim_pre_sec, 'trim_post_sec':trim_post_sec, 'min_snr_db':min_snr_db, 'max_snr_db':max_snr_db, 'log_progress_fraction':log_progress_fraction, 'cast_dtype':cast_dtype}
#kwargs = {'valid_phases':valid_phases, 'labels':return_labels, 'trim_pre_sec':trim_pre_sec, 'trim_post_sec':trim_post_sec, 'min_snr_db':min_snr_db, 'max_snr_db':max_snr_db, 'cast_dtype':cast_dtype}

In [5]:
### ----- Load the full dataset from HDF5 files ----- ###

training_data = h5py.File(hdf5_save_dir + '/training_data.hdf5', 'r')
dataset_names = list(training_data.keys())
print(dataset_names)

train_waves = training_data['waves'][0:5000]
train_mags = training_data['magnitude'][0:5000]
train_phase_id = training_data['phase_id'][0:5000]

validation_data = h5py.File(hdf5_save_dir + '/validation_data.hdf5', 'r')

valid_waves = validation_data['waves'][0:5000]
valid_mags = validation_data['magnitude'][0:5000]

training_data.close()
validation_data.close()

['magnitude', 'magnitude_type', 'phase_id', 'snr_db', 'waves']


In [6]:
print(train_waves.shape)
print(train_mags.shape)
print(valid_waves.shape)
print(valid_mags.shape)

(5000, 3, 4800)
(5000,)
(5000, 3, 4800)
(5000,)


In [7]:
train_waves_t = train_waves.transpose(0,2,1)
valid_waves_t = valid_waves.transpose(0,2,1)

print(train_waves_t.shape)
print(train_mags.shape)
print(valid_waves_t.shape)
print(valid_mags.shape)

(5000, 4800, 3)
(5000,)
(5000, 4800, 3)
(5000,)


In [26]:
train_waves = train_waves_t
train_mags = train_mags
dropE_rate = 0.05
dropN_rate = 1
dropZ_rate = 0.05
lengthpts = train_waves_t.shape[1]
window_len = trim_sec * 2
#train_split = train_split
#lentraindata = int(shift_train_waves_t.shape[0])
#middle = int(shift_train_waves_t.shape[1] / 2)

wvf_idx = np.random.choice(np.arange(0,len(train_mags),1))
wvf_idx = 3115
print('Waveform index: ' + str(wvf_idx))
#print(train_labels['phase_id'][wvf_idx])
times = np.arange(0, window_len, 1/sr)

def plot_features(axis):
    axis.legend(loc = 'upper left', fontsize = 14)
    axis.set_xlim(0,shift_len)
    axis.set_ylim(-1.2,1.2)
    axis.axvline(shift_len/2, color = 'black', linestyle = '--', alpha = 0.7)
    axis.tick_params(axis = 'x', bottom = False, labelbottom = False)

f, ((a0, a1), (a2, a3), (a4, a5), (a6, a7), (a8, a9), (a10, a11), (a12, a13), (a14, a15), (a16, a17), (a18, a19), (a20, a21)) = plt.subplots(nrows = 11, ncols = 2, gridspec_kw={'height_ratios': [1, 1, 1, 0.75, 1, 1, 1, 0.75, 1, 1, 1]}, figsize = (16,18), dpi=300, facecolor = 'white')

### Original waveforms ###

a0.set_title('Original waveforms', fontsize = 16)
#a0.plot(times, train_waves_t[wvf_idx,:,0], color = '#001628', label = 'E') 
#a0.plot(times, train_waves_t[wvf_idx,:,0], color = '#37392E', label = 'E') 
#a0.plot(times, train_waves_t[wvf_idx,:,0], color = '#001628', label = 'E') # Kraken
a0.plot(times, train_waves_t[wvf_idx,:,0], color = '#001528', label = 'E')
a0.legend(loc = 'upper left', fontsize = 14)
a0.set_xlim(0,window_len)
a0.set_ylim(-1.2,1.2)
a0.axvline(window_len/2, color = 'black', linestyle = '--', alpha = 0.7)
a0.tick_params(axis = 'x', bottom = False, labelbottom = False)

a2.plot(times, train_waves_t[wvf_idx,:,1], color = '#2DADB4', label = 'N')
#a2.plot(times, train_waves_t[wvf_idx,:,1], color = '#31BCC4', label = 'N')
#a2.plot(times, train_waves_t[wvf_idx,:,1], color = '#3BAECE', label = 'N')
#a2.plot(times, train_waves_t[wvf_idx,:,1], color = '#45B3D1', label = 'N')
#a2.plot(times, train_waves_t[wvf_idx,:,1], color = '#449DD1', label = 'N')
#a2.plot(times, train_waves_t[wvf_idx,:,1], color = '#192BC2', label = 'N')
#a2.plot(times, train_waves_t[wvf_idx,:,1], color = '#19647E', label = 'N')
#a2.plot(times, train_waves_t[wvf_idx,:,1], color = '#68A2B9', label = 'N') # Kraken
a2.set_ylabel('Stream-normalized amplitude', fontsize = 14)
a2.legend(loc = 'upper left', fontsize = 14)
a2.set_xlim(0,window_len)
a2.set_ylim(-1.2,1.2)
a2.axvline(window_len/2, color = 'black', linestyle = '--', alpha = 0.7)
a2.tick_params(axis = 'x', bottom = False, labelbottom = False)

a4.plot(times, train_waves_t[wvf_idx,:,2], color = '#E9072D', label = 'Z')
#a4.plot(times, train_waves_t[wvf_idx,:,2], color = '#449DD1', label = 'Z')
#a4.plot(times, train_waves_t[wvf_idx,:,2], color = '#28AFB0', label = 'Z')
#a4.plot(times, train_waves_t[wvf_idx,:,2], color = '#E9072B', label = 'Z') # Kraken
a4.set_xlabel('Time (s)', fontsize = 14)
a4.legend(loc = 'upper left', fontsize = 14)
a4.set_xlim(0,window_len)
a4.set_ylim(-1.2,1.2)
a4.axvline(window_len/2, color = 'black', linestyle = '--', alpha = 0.7)
a4.tick_params(axis = 'x', bottom = True, labelbottom = True)

### Trimming to window length ###

cut_len = 14
middle = int(train_waves_t.shape[1] / 2)
X = train_waves_t[wvf_idx, int(middle - (cut_len/2)*sr) : int(middle + (cut_len/2)*sr), 0:3]
X = X / np.max(np.abs(X))
cut_lengthpts = X.shape[0]

cut_times = np.arange(0, cut_len, 1/sr)

a1.set_title('Trimming to desired window length', fontsize = 16)
a1.plot(cut_times, X[:,0], color = '#001528', label = 'E')
a1.legend(loc = 'upper left', fontsize = 14)
a1.set_xlim(0,cut_len)
a1.set_ylim(-1.2,1.2)
a1.axvline(cut_len/2, color = 'black', linestyle = '--', alpha = 0.7)
a1.tick_params(axis = 'x', bottom = False, labelbottom = False)

a3.plot(cut_times, X[:,1], color = '#2DADB4', label = 'N')
a3.set_ylabel('Stream-normalized amplitude', fontsize = 14)
a3.legend(loc = 'upper left', fontsize = 14)
a3.set_xlim(0,cut_len)
a3.set_ylim(-1.2,1.2)
a3.axvline(cut_len/2, color = 'black', linestyle = '--', alpha = 0.7)
a3.tick_params(axis = 'x', bottom = False, labelbottom = False)

a5.plot(cut_times, X[:,2], color = '#E9072D', label = 'Z')
a5.set_xlabel('Time (s)', fontsize = 14)
a5.legend(loc = 'upper left', fontsize = 14)
a5.set_xlim(0,cut_len)
a5.set_ylim(-1.2,1.2)
a5.axvline(cut_len/2, color = 'black', linestyle = '--', alpha = 0.7)
a5.tick_params(axis = 'x', bottom = True, labelbottom = True)

### Shifting up to 3 seconds ###

desired_shift = 3 # seconds
shift = desired_shift * 2 # to shift properly
time_offset = np.random.uniform(low = 0, high = shift) # seconds

shift_len = cut_len - shift
samps_offset = int(time_offset*sr)
start = samps_offset
end = int(start + shift_len*sr)
X = X[start:end, 0:3]
X = X / np.max(np.abs(X))
shift_lengthpts = X.shape[0]

shift_times = np.arange(0, shift_len, 1/sr)

a8.set_title('Randomly shifted', fontsize = 16)
a8.plot(shift_times, X[:,0], color = '#001528', label = 'E')
plot_features(a8)

a10.plot(shift_times, X[:,1], color = '#2DADB4', label = 'N')
a10.set_ylabel('Stream-normalized amplitude', fontsize = 14)
plot_features(a10)

a12.plot(shift_times, X[:,2], color = '#E9072D', label = 'Z')
a12.set_xlabel('Time (s)', fontsize = 14)
plot_features(a12)
a12.tick_params(axis = 'x', bottom = True, labelbottom = True)

### Extra noise ###

noiseE = X[:,0] + np.random.normal(0, np.random.uniform(0.01, 0.15), shift_lengthpts)
noiseN = X[:,1] + np.random.normal(0, np.random.uniform(0.01, 0.15), shift_lengthpts)
noiseZ = X[:,2] + np.random.normal(0, np.random.uniform(0.01, 0.15), shift_lengthpts)

a9.set_title('Extra noise', fontsize = 16)
a9.plot(shift_times, noiseE, color = '#001528', label = 'E') 
plot_features(a9)

a11.plot(shift_times, noiseN, color = '#2DADB4', label = 'N')
a11.set_ylabel('Stream-normalized amplitude', fontsize = 14)
plot_features(a11)

a13.plot(shift_times, noiseZ, color = '#E9072D', label = 'Z')
a13.set_xlabel('Time (s)', fontsize = 14)
plot_features(a13)
a13.tick_params(axis = 'x', bottom = True, labelbottom = True)

### Flip channels ###
    
flip = X[:,0].copy()
newE = X[:,1]
newN = flip

a16.set_title('Flip horizontal components', fontsize = 16)
a16.plot(shift_times, newE, color = '#001528', label = 'E') 
plot_features(a16)

a18.plot(shift_times, newN, color = '#2DADB4', label = 'N')
a18.set_ylabel('Stream-normalized amplitude', fontsize = 14)
plot_features(a18)

a20.plot(shift_times, X[:,2], color = '#E9072D', label = 'Z')
a20.set_xlabel('Time (s)', fontsize = 14)
plot_features(a20)
a20.tick_params(axis = 'x', bottom = True, labelbottom = True)

### Drop channels ###

dropE = X[:,0].copy()
dropN = X[:,1].copy()
dropZ = X[:,2].copy()

if(np.random.random() < dropE_rate):
    dropE = np.zeros(X.shape[0])
    drop = 'E'

if(np.random.random() < dropN_rate):
    dropN = np.zeros(X.shape[0])
    drop = 'N'

if(np.random.random() < dropZ_rate):
    dropZ = np.zeros(X.shape[0])
    drop = 'Z'

a17.set_title('Drop channel', fontsize = 16)
a17.plot(shift_times, dropE, color = '#001528', label = 'E') 
plot_features(a17)

a19.plot(shift_times, dropN, color = '#2DADB4', label = 'N')
a19.set_ylabel('Stream-normalized amplitude', fontsize = 14)
plot_features(a19)

a21.plot(shift_times, dropZ, color = '#E9072D', label = 'Z')
a21.set_xlabel('Time (s)', fontsize = 14)
plot_features(a21)
a21.tick_params(axis = 'x', bottom = True, labelbottom = True)

### Invisible spacing plots ###

a6.set_visible(False)
a7.set_visible(False)
a14.set_visible(False)
a15.set_visible(False)

plt.subplots_adjust(hspace = 0)

### Letters ###

a0.text(x = -16, y = 1.4, s = 'a)', fontsize = 22)
a1.text(x = -1.9, y = 1.4, s = 'b)', fontsize = 22)
a8.text(x = -1, y = 1.4, s = 'c)', fontsize = 22)
a9.text(x = -1, y = 1.4, s = 'd)', fontsize = 22)
a16.text(x = -1, y = 1.4, s = 'e)', fontsize = 22)
a17.text(x = -1, y = 1.4, s = 'f)', fontsize = 22)

### Show or save ###

#plt.show()
plt.savefig('/home/sdybing/neic-mlaapde/paperfigs/sixpanel_augexamps.png', format = 'PNG')
plt.close();

# 606, 4827, 2397, 3115 (best)

Waveform index: 3115
