In [1]:
import os
import obspy
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Flatten, LSTM, Dense, Dropout
from tensorflow.keras.layers import TimeDistributed, ConvLSTM2D
from tensorflow.keras.utils import to_categorical
from pathlib import Path
%run helperfunctions.ipynb
#os.getcwd()

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
os.chdir('lse_challenge/data')

In [4]:
# Signal List
os.chdir('signal')
signals = []
signal_files = os.listdir()
signal_files = random.sample(signal_files, 100) #Need to expand for final model
for sigfile in signal_files: 
    stream = obspy.read(sigfile)
    cutlength = np.random.randint(16, 95) # Aligning with the [0.5 sec, 3 sec] range
    
    for tr in stream:
        tr.data = tr.data[cutlength:1000+cutlength]
        tr.normalize()
    
    trace_mat = [stream[0].data, stream[1].data, stream[2].data]
    signals.append([trace_mat[0], cutlength, 1])
    signals.append([trace_mat[1], cutlength, 1])
    signals.append([trace_mat[2], cutlength, 1])


# Noise List
os.chdir('../noise')
noise_files = os.listdir()
noise_files = random.sample(noise_files, 300) #Need to expand for final model
for noisefile in noise_files: 
    stream = obspy.read(noisefile)
    cutlength = np.random.randint(16, 95) # Aligning with the [0.5 sec, 3 sec] range
    
    for tr in stream:
        tr.data = tr.data[cutlength:1000+cutlength]
        tr.normalize()
    
    trace_mat = [stream[0].data, stream[1].data, stream[2].data]
    signals.append([trace_mat[0], cutlength, 0])
    signals.append([trace_mat[1], cutlength, 0])
    signals.append([trace_mat[2], cutlength, 0])
    
os.chdir('../')

sig_df = pd.DataFrame(signals, columns = ["trace", "cutlength", "signal"])
sig_df['p_arrival'] = 1000 - sig_df['cutlength']

## Training Neural Network for dectecting whether or not the trace has a signal

In [138]:
# For rolling averages
roll_short = 25
roll_long = 50

# For location segments
window_step = 20
window_size = 20

# Step size for CNN
n_steps_sig = 3
n_steps_loc = 5

In [6]:
# Features and Targets for Identifying the Entire Trace

sigfeatures = []
sigtargets = []

for index, slice_df in sig_df.iterrows():
    tr = slice_df["trace"]
    mag = abs(tr)
    signal = slice_df["signal"]
    p_arrival = slice_df["p_arrival"]
    d = {"trace": tr, "magnitude":mag}
    
    temp_df = pd.DataFrame(data = d)
    temp_df["STA"] = temp_df["magnitude"].rolling(roll_short).mean()
    temp_df["LTA"] = temp_df["magnitude"].rolling(roll_long).mean()
    temp_df["RAV"] = temp_df["STA"]/temp_df["LTA"]
    temp_df["STV"] = temp_df["magnitude"].rolling(roll_short).var()
    temp_df["LTV"] = temp_df["magnitude"].rolling(roll_long).var()
    
    temp_df.dropna(inplace = True)
    
    sigfeatures.append(temp_df.values)
    sigtargets.append(signal)

In [7]:
X_train_sig, X_test_sig, y_train_sig, y_test_sig = train_test_split(sigfeatures, sigtargets, test_size = 0.3)

In [8]:
X_train_sig = np.array(X_train_sig)
X_test_sig = np.array(X_test_sig)
y_train_sig = np.array(y_train_sig)
y_test_sig = np.array(y_test_sig)

In [9]:
n_timesteps, n_features, n_outputs = X_train_sig.shape[1], X_train_sig.shape[2], 2
n_length = int(n_timesteps/n_steps_sig)

In [10]:
signalmodel = Sequential()
signalmodel.add(ConvLSTM2D(filters=64, kernel_size=(1,3), activation='relu', input_shape=(n_steps_sig, 1, n_length, n_features)))
signalmodel.add(Dropout(0.5))
signalmodel.add(Flatten())
signalmodel.add(Dense(100, activation='relu'))
signalmodel.add(Dense(2, activation='softmax'))
signalmodel.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

signalmodel.summary()

Metal device set to: Apple M1
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   


2022-03-13 09:52:01.769524: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-03-13 09:52:01.769790: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


 conv_lstm2d (ConvLSTM2D)    (None, 1, 315, 64)        54784     
                                                                 
 dropout (Dropout)           (None, 1, 315, 64)        0         
                                                                 
 flatten (Flatten)           (None, 20160)             0         
                                                                 
 dense (Dense)               (None, 100)               2016100   
                                                                 
 dense_1 (Dense)             (None, 2)                 202       
                                                                 
Total params: 2,071,086
Trainable params: 2,071,086
Non-trainable params: 0
_________________________________________________________________


In [11]:
X_train_sig1 = X_train_sig.reshape((X_train_sig.shape[0], n_steps_sig, 1, n_length, n_features))
X_test_sig1 = X_test_sig.reshape((X_test_sig.shape[0], n_steps_sig, 1, n_length, n_features))
y_train_sig1 = to_categorical(y_train_sig)
y_test_sig1 = to_categorical(y_test_sig)

In [12]:
signalmodel.fit(X_train_sig1, y_train_sig1, epochs = 30)

Epoch 1/30


2022-03-13 09:52:02.115413: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-03-13 09:52:03.040660: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x16a66e9d0>

In [13]:
model_loss_sig, model_accuracy_sig = signalmodel.evaluate(X_test_sig1, y_test_sig1, verbose = 0)
print(f"Loss: {model_loss_sig}, Accuracy: {model_accuracy_sig}")

predictions_test_sig = signalmodel.predict(X_test_sig1)
predictions_train_sig = signalmodel.predict(X_train_sig1)

2022-03-13 09:53:10.677916: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Loss: 0.12459424883127213, Accuracy: 0.9722222685813904


2022-03-13 09:53:11.124001: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [14]:
class_test_sig = predictions_test_sig > 0.2
f1_score_test_sig = sklearn.metrics.f1_score(y_test_sig1, class_test_sig, average = 'micro')
print(f"Test F1 Score: {f1_score_test_sig}")

class_train_sig = predictions_train_sig > 0.2
f1_score_train_sig = sklearn.metrics.f1_score(y_train_sig1, class_train_sig, average = 'micro')
print(f"Train F1 Score: {f1_score_train_sig}")

Test F1 Score: 0.9500674763832658
Train F1 Score: 0.9567224466243508


In [15]:
# ##@ TODO: Make compatible
# tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_test_sig1, class_test_sig).ravel()
# (tn, fp, fn, tp)

In [16]:
# Exporting Model
os.getcwd()

'/Users/albertkong/grillochallenge/lse_challenge/data'

In [17]:
# Only run if we need to get to root directory
os.chdir('../../')

In [18]:
# Exporting signal model
signalmodel.save('signalmodelCNN_v2')

2022-03-13 09:53:12.476720: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: signalmodelCNN_v2/assets


## Training Neural Network for dectecting where in the trace the p-wave starts

In [139]:
# Features and Targets for Identifying Windows of Trace
locfeatures = []
loctargets = []

for index, slice_df in sig_df.iterrows():
    
    signal = slice_df["signal"]
    
    if signal == 1:
        tr = slice_df["trace"]
        mag = abs(tr)

        p_arrival = slice_df["p_arrival"]
        d = {"trace":tr, "magnitude":mag}

        temp_df = pd.DataFrame(data = d)
        temp_df["STA"] = temp_df["magnitude"].rolling(roll_short).mean()
        temp_df["LTA"] = temp_df["magnitude"].rolling(roll_long).mean()
        temp_df["RAV"] = temp_df["STA"]/temp_df["LTA"]
        temp_df["STV"] = temp_df["magnitude"].rolling(roll_short).var()
        temp_df["LTV"] = temp_df["magnitude"].rolling(roll_long).var()

        temp_df.dropna(inplace = True)

        start_ind = 0
        end_ind = start_ind + window_size

        while end_ind < (1000 - roll_long):
            trwindow = temp_df["trace"].iloc[start_ind:end_ind]
            magwindow = temp_df["magnitude"].iloc[start_ind:end_ind]
            ravwindow = temp_df["RAV"].iloc[start_ind:end_ind]
            stvwindow = temp_df["STV"].iloc[start_ind:end_ind]
            ltvwindow = temp_df["LTV"].iloc[start_ind:end_ind]

            window_data = {"trace": trwindow, "magnitude": magwindow,
                        "RAV": ravwindow, "STV": stvwindow, "LTV": ltvwindow}
            window_df = pd.DataFrame(data = window_data)

            locfeatures.append(window_df.values)

            if ((p_arrival - roll_long + 1) >= start_ind) and ((p_arrival - roll_long + 1) <= end_ind):
                loctargets.append(1)
            else:
                loctargets.append(0)

            start_ind += window_step
            end_ind = start_ind + window_size

In [140]:
# For Windows of Trace
X_train_loc, X_test_loc, y_train_loc, y_test_loc = train_test_split(locfeatures, loctargets, test_size = 0.3)

In [141]:
X_train_loc = np.array(X_train_loc)
X_test_loc = np.array(X_test_loc)
y_train_loc = np.array(y_train_loc)
y_test_loc = np.array(y_test_loc)

In [142]:
n_timesteps, n_features, n_outputs = X_train_loc.shape[1], X_train_loc.shape[2], 2
n_length = int(n_timesteps/n_steps_loc)

In [143]:
locmodel = Sequential()
locmodel.add(ConvLSTM2D(filters=64, kernel_size=(1,3), activation='relu', input_shape=(n_steps_loc, 1, n_length, n_features)))
locmodel.add(Dropout(0.5))
locmodel.add(Flatten())
locmodel.add(Dense(100, activation='relu'))
locmodel.add(Dense(2, activation='softmax'))
locmodel.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

signalmodel.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_lstm2d (ConvLSTM2D)    (None, 1, 315, 64)        54784     
                                                                 
 dropout (Dropout)           (None, 1, 315, 64)        0         
                                                                 
 flatten (Flatten)           (None, 20160)             0         
                                                                 
 dense (Dense)               (None, 100)               2016100   
                                                                 
 dense_1 (Dense)             (None, 2)                 202       
                                                                 
Total params: 2,071,086
Trainable params: 2,071,086
Non-trainable params: 0
_________________________________________________________________


In [144]:
X_train_loc1 = X_train_loc.reshape((X_train_loc.shape[0], n_steps_loc, 1, n_length, n_features))
X_test_loc1 = X_test_loc.reshape((X_test_loc.shape[0], n_steps_loc, 1, n_length, n_features))
y_train_loc1 = to_categorical(y_train_loc)
y_test_loc1 = to_categorical(y_test_loc)

In [146]:
locmodel.fit(X_train_loc1, y_train_loc1, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x29796e7f0>

In [147]:
model_loss_loc, model_accuracy_loc = locmodel.evaluate(X_test_loc1, y_test_loc1, verbose = 0)
print(f"Loss: {model_loss_loc}, Accuracy: {model_accuracy_loc}")

predictions_test_loc = locmodel.predict(X_test_loc1)
predictions_train_loc = locmodel.predict(X_train_loc1)

2022-03-13 10:23:47.833714: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Loss: 0.09330099821090698, Accuracy: 0.9796894192695618


2022-03-13 10:23:51.721112: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [148]:
class_test_loc = predictions_test_loc > 0.25
f1_score_test_loc = sklearn.metrics.f1_score(y_test_loc1, class_test_loc, average='micro')
print(f"Test F1 Score: {f1_score_test_loc}")

class_train_loc = predictions_train_loc > 0.25
f1_score_train_loc = sklearn.metrics.f1_score(y_train_loc1, class_train_loc, average='micro')
print(f"Train F1 Score: {f1_score_train_loc}")

In [None]:
# ##@ TODO: Make compatible
# tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_test_loc[:,1], class_test_loc[:,1]).ravel()
# (tn, fp, fn, tp)

In [166]:
# Exporting location model
locmodel.save('locmodelCNN_v2')

INFO:tensorflow:Assets written to: locmodelCNN_v2/assets


## Spot checking results

In [167]:
loaded_signalmodel = load_model('signalmodelCNN_v2')

In [168]:
loaded_locmodel = load_model('locmodelCNN_v2')

In [179]:
slice_df = sig_df.iloc[28]

In [180]:
sigfeatures = []
sigtargets = []
locfeatures = []
loctargets = []

tr = slice_df["trace"]
mag = abs(tr)
signal = slice_df["signal"]
p_arrival = slice_df["p_arrival"]
d = {"trace": tr, "magnitude":mag}
temp_df = pd.DataFrame(data = d)
temp_df["STA"] = temp_df["magnitude"].rolling(roll_short).mean()
temp_df["LTA"] = temp_df["magnitude"].rolling(roll_long).mean()
temp_df["RAV"] = temp_df["STA"]/temp_df["LTA"]
temp_df["STV"] = temp_df["magnitude"].rolling(roll_short).var()
temp_df["LTV"] = temp_df["magnitude"].rolling(roll_long).var()
    
temp_df.dropna(inplace = True)
sigfeatures.append(temp_df.values)
sigtargets.append(signal)

sigfeatures = np.array(sigfeatures)
n_timesteps, n_features, n_outputs = sigfeatures.shape[1], sigfeatures.shape[2], 2
n_length = int(n_timesteps/n_steps_sig)

sigfeatures1 = sigfeatures.reshape((sigfeatures.shape[0], n_steps_sig, 1, n_length, n_features))

In [181]:
loaded_signalmodel.predict(sigfeatures1)[0][1]

0.94776422

In [182]:
sigtargets

[1]

In [183]:
start_ind = 0
end_ind = start_ind + window_size
    
while end_ind < (1000 - roll_long):
    magwindow = temp_df["magnitude"].iloc[start_ind:end_ind]
    trwindow = temp_df["trace"].iloc[start_ind:end_ind]
    ravwindow = temp_df["RAV"].iloc[start_ind:end_ind]
    stvwindow = temp_df["STV"].iloc[start_ind:end_ind]
    ltvwindow = temp_df["LTV"].iloc[start_ind:end_ind]

    window_data = {"trace":trwindow, "magnitude": magwindow,
                    "RAV": ravwindow, "STV": stvwindow, "LTV": ltvwindow}
        
    window_df = pd.DataFrame(data = window_data)

    locfeatures.append(window_df.values)

    if ((p_arrival-roll_long) >= start_ind) and ((p_arrival-roll_long) <= end_ind):
        loctargets.append(1)
    else:
        loctargets.append(0)

    start_ind += window_step
    end_ind = start_ind + window_size
    

locfeatures = np.array(locfeatures)
n_timesteps, n_features, n_outputs = locfeatures.shape[1], locfeatures.shape[2], 2
n_length = int(n_timesteps/n_steps_loc)

locfeatures1 = locfeatures.reshape((locfeatures.shape[0], n_steps_loc, 1, n_length, n_features))

In [184]:
loaded_locmodel.predict(locfeatures1)[:,1]

array([ 0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02313247,
        0.02313247,  0.02313247,  0.02313247,  0.02313247,  0.02

In [177]:
loctargets

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0]