In [1]:
import os
import obspy
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Flatten, LSTM, Dense, Dropout, Conv1D
from tensorflow.keras.layers import TimeDistributed, ConvLSTM2D, MaxPooling1D
from tensorflow.keras.utils import to_categorical
from pathlib import Path
%run helperfunctions.ipynb
#os.getcwd()

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
os.chdir('lse_challenge/data')

In [4]:
# Signal List
os.chdir('signal')
signals = []
signal_files = os.listdir()
signal_files = random.sample(signal_files, 100) #Need to expand for final model
for sigfile in signal_files: 
    stream = obspy.read(sigfile)
    cutlength = np.random.randint(16, 95) # Aligning with the [0.5 sec, 3 sec] range
    
    for tr in stream:
        tr.data = tr.data[cutlength:1000+cutlength]
        tr.normalize()
    
    trace_mat = [stream[0].data, stream[1].data, stream[2].data]
    signals.append([trace_mat[0], cutlength, 1])
    signals.append([trace_mat[1], cutlength, 1])
    signals.append([trace_mat[2], cutlength, 1])


# Noise List
os.chdir('../noise')
noise_files = os.listdir()
noise_files = random.sample(noise_files, 200) #Need to expand for final model
for noisefile in noise_files: 
    stream = obspy.read(noisefile)
    cutlength = np.random.randint(16, 95) # Aligning with the [0.5 sec, 3 sec] range
    
    for tr in stream:
        tr.data = tr.data[cutlength:1000+cutlength]
        tr.normalize()
    
    trace_mat = [stream[0].data, stream[1].data, stream[2].data]
    signals.append([trace_mat[0], cutlength, 0])
    signals.append([trace_mat[1], cutlength, 0])
    signals.append([trace_mat[2], cutlength, 0])
    
os.chdir('../')

sig_df = pd.DataFrame(signals, columns = ["trace", "cutlength", "signal"])
sig_df['p_arrival'] = 1000 - sig_df['cutlength']

## Training Neural Network for dectecting whether or not the trace has a signal

In [5]:
# For rolling averages
roll_short = 25
roll_long = 50

# For location segments
window_step = 20
window_size = 20

# Step size for CNN
n_steps_sig = 3
n_steps_loc = 5

In [6]:
# Features and Targets for Identifying the Entire Trace

sigfeatures = []
sigtargets = []

for index, slice_df in sig_df.iterrows():
    tr = slice_df["trace"]
    mag = abs(tr)
    signal = slice_df["signal"]
    p_arrival = slice_df["p_arrival"]
    d = {"trace": tr, "magnitude":mag}
    
    temp_df = pd.DataFrame(data = d)
    temp_df["STA"] = temp_df["magnitude"].rolling(roll_short).mean()
    temp_df["LTA"] = temp_df["magnitude"].rolling(roll_long).mean()
    temp_df["RAV"] = temp_df["STA"]/temp_df["LTA"]
    temp_df["STV"] = temp_df["magnitude"].rolling(roll_short).var()
    temp_df["LTV"] = temp_df["magnitude"].rolling(roll_long).var()
    
    temp_df.dropna(inplace = True)
    
    sigfeatures.append(temp_df.values)
    sigtargets.append(signal)

In [7]:
X_train_sig, X_test_sig, y_train_sig, y_test_sig = train_test_split(sigfeatures, sigtargets, test_size = 0.3)

In [8]:
X_train_sig = np.array(X_train_sig)
X_test_sig = np.array(X_test_sig)
y_train_sig = np.array(y_train_sig)
y_test_sig = np.array(y_test_sig)

In [9]:
n_timesteps, n_features, n_outputs = X_train_sig.shape[1], X_train_sig.shape[2], 2
n_length = int(n_timesteps/n_steps_sig)

In [10]:
signalmodel = Sequential()
signalmodel.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(n_steps_sig, n_length, n_features)))
signalmodel.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
signalmodel.add(TimeDistributed(Dropout(0.5)))
signalmodel.add(TimeDistributed(MaxPooling1D(pool_size=2)))
signalmodel.add(TimeDistributed(Flatten()))
signalmodel.add(LSTM(100))
signalmodel.add(Dropout(0.5))
signalmodel.add(Dense(100, activation='relu'))
signalmodel.add(Dense(n_outputs, activation='softmax'))
signalmodel.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

signalmodel.summary()

2022-03-13 11:05:04.056269: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-03-13 11:05:04.056363: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDistr  (None, 3, 315, 64)       1408      
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 3, 313, 64)       12352     
 tributed)                                                       
                                                                 
 time_distributed_2 (TimeDis  (None, 3, 313, 64)       0         
 tributed)                                                       
                                                                 
 time_distributed_3 (TimeDis  (None, 3, 156, 64)       0         
 tributed)                                                       
                                                                 
 time_distributed_4 (TimeD

In [11]:
X_train_sig1 = X_train_sig.reshape((X_train_sig.shape[0], n_steps_sig, n_length, n_features))
X_test_sig1 = X_test_sig.reshape((X_test_sig.shape[0], n_steps_sig, n_length, n_features))
y_train_sig1 = to_categorical(y_train_sig)
y_test_sig1 = to_categorical(y_test_sig)

In [12]:
signalmodel.fit(X_train_sig1, y_train_sig1, epochs = 30)

Epoch 1/30


2022-03-13 11:05:04.374843: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-03-13 11:05:05.132348: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-13 11:05:05.298528: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


 3/20 [===>..........................] - ETA: 0s - loss: 0.8191 - accuracy: 0.7083 

2022-03-13 11:05:05.403735: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x28b8c9850>

In [13]:
model_loss_sig, model_accuracy_sig = signalmodel.evaluate(X_test_sig1, y_test_sig1, verbose = 0)
print(f"Loss: {model_loss_sig}, Accuracy: {model_accuracy_sig}")

predictions_test_sig = signalmodel.predict(X_test_sig1)
predictions_train_sig = signalmodel.predict(X_train_sig1)

2022-03-13 11:05:21.741988: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-13 11:05:21.810014: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Loss: 0.2972554862499237, Accuracy: 0.9629629254341125


2022-03-13 11:05:22.237237: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-13 11:05:22.282207: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [14]:
class_test_sig = predictions_test_sig > 0.2
f1_score_test_sig = sklearn.metrics.f1_score(y_test_sig1, class_test_sig, average = 'micro')
print(f"Test F1 Score: {f1_score_test_sig}")

class_train_sig = predictions_train_sig > 0.2
f1_score_train_sig = sklearn.metrics.f1_score(y_train_sig1, class_train_sig, average = 'micro')
print(f"Train F1 Score: {f1_score_train_sig}")

Test F1 Score: 0.9595588235294118
Train F1 Score: 1.0


In [15]:
# ##@ TODO: Make compatible
# tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_test_sig1, class_test_sig).ravel()
# (tn, fp, fn, tp)

In [16]:
# Exporting Model
os.getcwd()

'/Users/albertkong/grillochallenge/lse_challenge/data'

In [17]:
# Only run if we need to get to root directory
os.chdir('../../')

In [None]:
# Exporting signal model
signalmodel.save('signalmodelCNN_v3')

## Training Neural Network for dectecting where in the trace the p-wave starts

In [28]:
# Features and Targets for Identifying Windows of Trace
locfeatures = []
loctargets = []

for index, slice_df in sig_df.iterrows():
    
    signal = slice_df["signal"]
    
    if signal == 1:
        tr = slice_df["trace"]
        mag = abs(tr)

        p_arrival = slice_df["p_arrival"]
        d = {"trace":tr, "magnitude":mag}

        temp_df = pd.DataFrame(data = d)
        temp_df["STA"] = temp_df["magnitude"].rolling(roll_short).mean()
        temp_df["LTA"] = temp_df["magnitude"].rolling(roll_long).mean()
        temp_df["RAV"] = temp_df["STA"]/temp_df["LTA"]
        temp_df["STV"] = temp_df["magnitude"].rolling(roll_short).var()
        temp_df["LTV"] = temp_df["magnitude"].rolling(roll_long).var()

        temp_df.dropna(inplace = True)

        start_ind = 0
        end_ind = start_ind + window_size

        while end_ind < (1000 - roll_long):
            trwindow = temp_df["trace"].iloc[start_ind:end_ind]
            magwindow = temp_df["magnitude"].iloc[start_ind:end_ind]
            ravwindow = temp_df["RAV"].iloc[start_ind:end_ind]
            stvwindow = temp_df["STV"].iloc[start_ind:end_ind]
            ltvwindow = temp_df["LTV"].iloc[start_ind:end_ind]

            window_data = {"trace": trwindow, "magnitude": magwindow,
                        "RAV": ravwindow, "STV": stvwindow, "LTV": ltvwindow}
            window_df = pd.DataFrame(data = window_data)

            locfeatures.append(window_df.values)

            if ((p_arrival - roll_long + 1) >= start_ind) and ((p_arrival - roll_long + 1) <= end_ind):
                loctargets.append(1)
            else:
                loctargets.append(0)

            start_ind += window_step
            end_ind = start_ind + window_size

In [29]:
# For Windows of Trace
X_train_loc, X_test_loc, y_train_loc, y_test_loc = train_test_split(locfeatures, loctargets, test_size = 0.3)

In [30]:
X_train_loc = np.array(X_train_loc)
X_test_loc = np.array(X_test_loc)
y_train_loc = np.array(y_train_loc)
y_test_loc = np.array(y_test_loc)

In [31]:
n_timesteps, n_features, n_outputs = X_train_loc.shape[1], X_train_loc.shape[2], 2
n_length = int(n_timesteps/n_steps_loc)

In [37]:
locmodelcl = Sequential()
locmodelcl.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(None, n_length, n_steps_loc)))
locmodelcl.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
locmodelcl.add(TimeDistributed(Dropout(0.5)))
locmodelcl.add(TimeDistributed(MaxPooling1D(pool_size=2)))
locmodelcl.add(TimeDistributed(Flatten()))
locmodelcl.add(LSTM(100))
locmodelcl.add(Dropout(0.5))
locmodelcl.add(Dense(100, activation='relu'))
locmodelcl.add(Dense(n_outputs, activation='softmax'))
locmodelcl.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

locmodelcl.summary()

ValueError: Exception encountered when calling layer "conv1d_11" (type Conv1D).

Negative dimension size caused by subtracting 3 from 2 for '{{node time_distributed_20/conv1d_11/Conv1D}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](time_distributed_20/conv1d_11/Conv1D/ExpandDims, time_distributed_20/conv1d_11/Conv1D/ExpandDims_1)' with input shapes: [?,1,2,64], [1,3,64,64].

Call arguments received:
  • inputs=tf.Tensor(shape=(None, 2, 64), dtype=float32)

In [38]:
X_train_loc1 = X_train_loc.reshape((X_train_loc.shape[0], n_steps_loc, n_length, n_features))
X_test_loc1 = X_test_loc.reshape((X_test_loc.shape[0], n_steps_loc, n_length, n_features))
y_train_loc1 = to_categorical(y_train_loc)
y_test_loc1 = to_categorical(y_test_loc)

In [39]:
locmodelcl.fit(X_train_loc1, y_train_loc1, epochs = 15)

RuntimeError: You must compile your model before training/testing. Use `model.compile(optimizer, loss)`.

In [None]:
model_loss_loc, model_accuracy_loc = locmodel.evaluate(X_test_loc1, y_test_loc1, verbose = 0)
print(f"Loss: {model_loss_loc}, Accuracy: {model_accuracy_loc}")

predictions_test_loc = locmodel.predict(X_test_loc1)
predictions_train_loc = locmodel.predict(X_train_loc1)

In [None]:
class_test_loc = predictions_test_loc > 0.25
f1_score_test_loc = sklearn.metrics.f1_score(y_test_loc1, class_test_loc, average='micro')
print(f"Test F1 Score: {f1_score_test_loc}")

class_train_loc = predictions_train_loc > 0.25
f1_score_train_loc = sklearn.metrics.f1_score(y_train_loc1, class_train_loc, average='micro')
print(f"Train F1 Score: {f1_score_train_loc}")

In [None]:
# ##@ TODO: Make compatible
# tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_test_loc[:,1], class_test_loc[:,1]).ravel()
# (tn, fp, fn, tp)

In [None]:
# Exporting location model
locmodel.save('locmodelCNN_v2')

## Spot checking results

In [None]:
loaded_signalmodel = load_model('signalmodelCNN_v3')

In [None]:
loaded_locmodel = load_model('locmodelCNN_v3')

In [None]:
slice_df = sig_df.iloc[28]

In [None]:
sigfeatures = []
sigtargets = []
locfeatures = []
loctargets = []

tr = slice_df["trace"]
mag = abs(tr)
signal = slice_df["signal"]
p_arrival = slice_df["p_arrival"]
d = {"trace": tr, "magnitude":mag}
temp_df = pd.DataFrame(data = d)
temp_df["STA"] = temp_df["magnitude"].rolling(roll_short).mean()
temp_df["LTA"] = temp_df["magnitude"].rolling(roll_long).mean()
temp_df["RAV"] = temp_df["STA"]/temp_df["LTA"]
temp_df["STV"] = temp_df["magnitude"].rolling(roll_short).var()
temp_df["LTV"] = temp_df["magnitude"].rolling(roll_long).var()
    
temp_df.dropna(inplace = True)
sigfeatures.append(temp_df.values)
sigtargets.append(signal)

sigfeatures = np.array(sigfeatures)
n_timesteps, n_features, n_outputs = sigfeatures.shape[1], sigfeatures.shape[2], 2
n_length = int(n_timesteps/n_steps_sig)

sigfeatures1 = sigfeatures.reshape((sigfeatures.shape[0], n_steps_sig, 1, n_length, n_features))

In [None]:
loaded_signalmodel.predict(sigfeatures1)[0][1]

In [None]:
sigtargets

In [None]:
start_ind = 0
end_ind = start_ind + window_size
    
while end_ind < (1000 - roll_long):
    magwindow = temp_df["magnitude"].iloc[start_ind:end_ind]
    trwindow = temp_df["trace"].iloc[start_ind:end_ind]
    ravwindow = temp_df["RAV"].iloc[start_ind:end_ind]
    stvwindow = temp_df["STV"].iloc[start_ind:end_ind]
    ltvwindow = temp_df["LTV"].iloc[start_ind:end_ind]

    window_data = {"trace":trwindow, "magnitude": magwindow,
                    "RAV": ravwindow, "STV": stvwindow, "LTV": ltvwindow}
        
    window_df = pd.DataFrame(data = window_data)

    locfeatures.append(window_df.values)

    if ((p_arrival-roll_long) >= start_ind) and ((p_arrival-roll_long) <= end_ind):
        loctargets.append(1)
    else:
        loctargets.append(0)

    start_ind += window_step
    end_ind = start_ind + window_size
    

locfeatures = np.array(locfeatures)
n_timesteps, n_features, n_outputs = locfeatures.shape[1], locfeatures.shape[2], 2
n_length = int(n_timesteps/n_steps_loc)

locfeatures1 = locfeatures.reshape((locfeatures.shape[0], n_steps_loc, 1, n_length, n_features))

In [None]:
loaded_locmodel.predict(locfeatures1)[:,1]

In [None]:
loctargets