In [1]:
import os
import obspy
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Flatten, LSTM, Dense, Dropout, Conv1D
from tensorflow.keras.layers import TimeDistributed, ConvLSTM2D, MaxPooling1D
from tensorflow.keras.utils import to_categorical
from pathlib import Path
%run helperfunctions.ipynb
#os.getcwd()

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
os.chdir('lse_challenge/data')

In [4]:
# Signal List
os.chdir('signal')
signals = []
signal_files = os.listdir()
for sigfile in signal_files: 
    stream = obspy.read(sigfile)
    cutlength = np.random.randint(16, 95) # Aligning with the [0.5 sec, 3 sec] range
    
    for tr in stream:
        tr.data = tr.data[cutlength:1000+cutlength]
        tr.normalize()
    
    trace_mat = [stream[0].data, stream[1].data, stream[2].data]
    signals.append([trace_mat[0], cutlength, 1])
    signals.append([trace_mat[1], cutlength, 1])
    signals.append([trace_mat[2], cutlength, 1])


# Noise List
os.chdir('../noise')
noise_files = os.listdir()
noise_files = random.sample(noise_files, 2000) #Need to expand for final model
for noisefile in noise_files: 
    stream = obspy.read(noisefile)
    cutlength = np.random.randint(16, 95) # Aligning with the [0.5 sec, 3 sec] range
    
    for tr in stream:
        tr.data = tr.data[cutlength:1000+cutlength]
        tr.normalize()
    
    trace_mat = [stream[0].data, stream[1].data, stream[2].data]
    signals.append([trace_mat[0], cutlength, 0])
    signals.append([trace_mat[1], cutlength, 0])
    signals.append([trace_mat[2], cutlength, 0])
    
os.chdir('../')

sig_df = pd.DataFrame(signals, columns = ["trace", "cutlength", "signal"])
sig_df['p_arrival'] = 1000 - sig_df['cutlength']

## Training Neural Network for dectecting whether or not the trace has a signal

In [5]:
# For rolling averages
roll_short = 25
roll_long = 50

# For location segments
window_step = 20
window_size = 20

# Step size for CNN
n_steps_sig = 3
n_steps_loc = 2

In [6]:
# Features and Targets for Identifying the Entire Trace

sigfeatures = []
sigtargets = []

for index, slice_df in sig_df.iterrows():
    tr = slice_df["trace"]
    mag = abs(tr)
    signal = slice_df["signal"]
    p_arrival = slice_df["p_arrival"]
    d = {"trace": tr, "magnitude":mag}
    
    temp_df = pd.DataFrame(data = d)
    temp_df["STA"] = temp_df["magnitude"].rolling(roll_short).mean()
    temp_df["LTA"] = temp_df["magnitude"].rolling(roll_long).mean()
    temp_df["RAV"] = temp_df["STA"]/temp_df["LTA"]
    temp_df["STV"] = temp_df["magnitude"].rolling(roll_short).var()
    temp_df["LTV"] = temp_df["magnitude"].rolling(roll_long).var()
    
    temp_df.dropna(inplace = True)
    
    sigfeatures.append(temp_df.values)
    sigtargets.append(signal)

In [7]:
X_train_sig, X_test_sig, y_train_sig, y_test_sig = train_test_split(sigfeatures, sigtargets, test_size = 0.3)

In [8]:
X_train_sig = np.array(X_train_sig)
X_test_sig = np.array(X_test_sig)
y_train_sig = np.array(y_train_sig)
y_test_sig = np.array(y_test_sig)

In [9]:
n_timesteps, n_features, n_outputs = X_train_sig.shape[1], X_train_sig.shape[2], 2
n_length = int(n_timesteps/n_steps_sig)

In [10]:
signalmodel = Sequential()
signalmodel.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(n_steps_sig, n_length, n_features)))
signalmodel.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
signalmodel.add(TimeDistributed(Dropout(0.5)))
signalmodel.add(TimeDistributed(MaxPooling1D(pool_size=2)))
signalmodel.add(TimeDistributed(Flatten()))
signalmodel.add(LSTM(100))
signalmodel.add(Dropout(0.5))
signalmodel.add(Dense(100, activation='relu'))
signalmodel.add(Dense(n_outputs, activation='softmax'))
signalmodel.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

signalmodel.summary()

Metal device set to: Apple M1


2022-03-14 09:39:08.696888: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-03-14 09:39:08.697139: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDistr  (None, 3, 315, 64)       1408      
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 3, 313, 64)       12352     
 tributed)                                                       
                                                                 
 time_distributed_2 (TimeDis  (None, 3, 313, 64)       0         
 tributed)                                                       
                                                                 
 time_distributed_3 (TimeDis  (None, 3, 156, 64)       0         
 tributed)                                                       
                                                                 
 time_distributed_4 (TimeDis  (None, 3, 9984)          0

In [11]:
X_train_sig1 = X_train_sig.reshape((X_train_sig.shape[0], n_steps_sig, n_length, n_features))
X_test_sig1 = X_test_sig.reshape((X_test_sig.shape[0], n_steps_sig, n_length, n_features))
y_train_sig1 = to_categorical(y_train_sig)
y_test_sig1 = to_categorical(y_test_sig)

In [12]:
signalmodel.fit(X_train_sig1, y_train_sig1, epochs = 25)

Epoch 1/25


2022-03-14 09:39:09.530723: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-03-14 09:39:10.306562: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 09:39:10.474843: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


  1/212 [..............................] - ETA: 4:13 - loss: 0.7160 - accuracy: 0.3750

2022-03-14 09:39:10.621782: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x16e6f4d00>

In [13]:
model_loss_sig, model_accuracy_sig = signalmodel.evaluate(X_test_sig1, y_test_sig1, verbose = 0)
print(f"Loss: {model_loss_sig}, Accuracy: {model_accuracy_sig}")

predictions_test_sig = signalmodel.predict(X_test_sig1)
predictions_train_sig = signalmodel.predict(X_train_sig1)

2022-03-14 09:41:22.551670: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 09:41:22.624658: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Loss: 0.11442726105451584, Accuracy: 0.9813857674598694


2022-03-14 09:41:23.807684: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 09:41:23.857491: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [14]:
class_test_sig = predictions_test_sig > 0.2
f1_score_test_sig = sklearn.metrics.f1_score(y_test_sig1, class_test_sig, average = 'micro')
print(f"Test F1 Score: {f1_score_test_sig}")

class_train_sig = predictions_train_sig > 0.2
f1_score_train_sig = sklearn.metrics.f1_score(y_train_sig1, class_train_sig, average = 'micro')
print(f"Train F1 Score: {f1_score_train_sig}")

Test F1 Score: 0.9797459663577068
Train F1 Score: 1.0


In [15]:
# ##@ TODO: Make compatible
# tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_test_sig1, class_test_sig).ravel()
# (tn, fp, fn, tp)

In [16]:
# Exporting Model
os.getcwd()

'/Users/albertkong/grillochallenge/lse_challenge/data'

In [17]:
# Only run if we need to get to root directory
os.chdir('../../')

In [18]:
# Exporting signal model
signalmodel.save('signalmodelCNN_v3')

2022-03-14 09:41:28.352137: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: signalmodelCNN_v3/assets


INFO:tensorflow:Assets written to: signalmodelCNN_v3/assets


## Training Neural Network for dectecting where in the trace the p-wave starts

In [53]:
# Features and Targets for Identifying Windows of Trace
locfeatures = []
loctargets = []

for index, slice_df in sig_df.iterrows():
    
    signal = slice_df["signal"]
    
    if signal == 1:
        tr = slice_df["trace"]
        mag = abs(tr)

        p_arrival = slice_df["p_arrival"]
        d = {"trace":tr, "magnitude":mag}

        temp_df = pd.DataFrame(data = d)
        temp_df["STA"] = temp_df["magnitude"].rolling(roll_short).mean()
        temp_df["LTA"] = temp_df["magnitude"].rolling(roll_long).mean()
        temp_df["RAV"] = temp_df["STA"]/temp_df["LTA"]
        temp_df["STV"] = temp_df["magnitude"].rolling(roll_short).var()
        temp_df["LTV"] = temp_df["magnitude"].rolling(roll_long).var()

        temp_df.dropna(inplace = True)

        start_ind = 0
        end_ind = start_ind + window_size

        while end_ind < (1000 - roll_long):
            trwindow = temp_df["trace"].iloc[start_ind:end_ind]
            magwindow = temp_df["magnitude"].iloc[start_ind:end_ind]
            ravwindow = temp_df["RAV"].iloc[start_ind:end_ind]
            stvwindow = temp_df["STV"].iloc[start_ind:end_ind]
            ltvwindow = temp_df["LTV"].iloc[start_ind:end_ind]

            window_data = {"trace": trwindow, "magnitude": magwindow,
                        "RAV": ravwindow, "STV": stvwindow, "LTV": ltvwindow}
            window_df = pd.DataFrame(data = window_data)

            locfeatures.append(window_df.values)

            if ((p_arrival - roll_long + 1) >= start_ind) and ((p_arrival - roll_long + 1) <= end_ind):
                loctargets.append(1)
            else:
                loctargets.append(0)

            start_ind += window_step
            end_ind = start_ind + window_size

In [54]:
# For Windows of Trace
X_train_loc, X_test_loc, y_train_loc, y_test_loc = train_test_split(locfeatures, loctargets, test_size = 0.3)

In [55]:
X_train_loc = np.array(X_train_loc)
X_test_loc = np.array(X_test_loc)
y_train_loc = np.array(y_train_loc)
y_test_loc = np.array(y_test_loc)

In [56]:
n_timesteps, n_features, n_outputs = X_train_loc.shape[1], X_train_loc.shape[2], 2
n_length = int(n_timesteps/n_steps_loc)

In [57]:
locmodelcl = Sequential()
locmodelcl.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(None, n_length, n_features)))
locmodelcl.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
locmodelcl.add(TimeDistributed(Dropout(0.5)))
locmodelcl.add(TimeDistributed(MaxPooling1D(pool_size=2)))
locmodelcl.add(TimeDistributed(Flatten()))
locmodelcl.add(LSTM(100))
locmodelcl.add(Dropout(0.5))
locmodelcl.add(Dense(100, activation='relu'))
locmodelcl.add(Dense(n_outputs, activation='softmax'))
locmodelcl.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

locmodelcl.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_15 (TimeDi  (None, None, 8, 64)      1024      
 stributed)                                                      
                                                                 
 time_distributed_16 (TimeDi  (None, None, 6, 64)      12352     
 stributed)                                                      
                                                                 
 time_distributed_17 (TimeDi  (None, None, 6, 64)      0         
 stributed)                                                      
                                                                 
 time_distributed_18 (TimeDi  (None, None, 3, 64)      0         
 stributed)                                                      
                                                                 
 time_distributed_19 (TimeDi  (None, None, 192)       

In [58]:
X_train_loc1 = X_train_loc.reshape((X_train_loc.shape[0], n_steps_loc, n_length, n_features))
X_test_loc1 = X_test_loc.reshape((X_test_loc.shape[0], n_steps_loc, n_length, n_features))
y_train_loc1 = to_categorical(y_train_loc)
y_test_loc1 = to_categorical(y_test_loc)

In [59]:
locmodelcl.fit(X_train_loc1, y_train_loc1, epochs = 10)

Epoch 1/10


2022-03-14 10:14:36.850907: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 10:14:37.489345: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 10:14:37.633015: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2ee174b80>

In [60]:
model_loss_loc, model_accuracy_loc = locmodelcl.evaluate(X_test_loc1, y_test_loc1, verbose = 0)
print(f"Loss: {model_loss_loc}, Accuracy: {model_accuracy_loc}")

predictions_test_loc = locmodelcl.predict(X_test_loc1)
predictions_train_loc = locmodelcl.predict(X_train_loc1)

2022-03-14 10:17:06.380678: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 10:17:06.489447: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Loss: 0.06387130171060562, Accuracy: 0.9847679734230042


2022-03-14 10:17:16.193223: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 10:17:16.258494: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [61]:
class_test_loc = predictions_test_loc > 0.25
f1_score_test_loc = sklearn.metrics.f1_score(y_test_loc1, class_test_loc, average='micro')
print(f"Test F1 Score: {f1_score_test_loc}")

class_train_loc = predictions_train_loc > 0.25
f1_score_train_loc = sklearn.metrics.f1_score(y_train_loc1, class_train_loc, average='micro')
print(f"Train F1 Score: {f1_score_train_loc}")

Test F1 Score: 0.9840328203547835
Train F1 Score: 0.9838648443432042


In [62]:
# ##@ TODO: Make compatible
# tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_test_loc1[:,1], class_test_loc[:,1]).ravel()
# (tn, fp, fn, tp)

In [63]:
# Exporting location model
locmodelcl.save('locmodelCNN_v3')



INFO:tensorflow:Assets written to: locmodelCNN_v3_2/assets


INFO:tensorflow:Assets written to: locmodelCNN_v3_2/assets


## Spot checking results

In [30]:
loaded_signalmodel = load_model('signalmodelCNN_v3')

In [31]:
loaded_locmodel = load_model('locmodelCNN_v3')

In [32]:
slice_df = sig_df.iloc[28]

In [35]:
sigfeatures = []
sigtargets = []
locfeatures = []
loctargets = []

tr = slice_df["trace"]
mag = abs(tr)
signal = slice_df["signal"]
p_arrival = slice_df["p_arrival"]
d = {"trace": tr, "magnitude":mag}
temp_df = pd.DataFrame(data = d)
temp_df["STA"] = temp_df["magnitude"].rolling(roll_short).mean()
temp_df["LTA"] = temp_df["magnitude"].rolling(roll_long).mean()
temp_df["RAV"] = temp_df["STA"]/temp_df["LTA"]
temp_df["STV"] = temp_df["magnitude"].rolling(roll_short).var()
temp_df["LTV"] = temp_df["magnitude"].rolling(roll_long).var()
    
temp_df.dropna(inplace = True)
sigfeatures.append(temp_df.values)
sigtargets.append(signal)

sigfeatures = np.array(sigfeatures)
n_timesteps, n_features, n_outputs = sigfeatures.shape[1], sigfeatures.shape[2], 2
n_length = int(n_timesteps/n_steps_sig)

sigfeatures1 = sigfeatures.reshape((sigfeatures.shape[0], n_steps_sig, n_length, n_features))

In [36]:
loaded_signalmodel.predict(sigfeatures1)[0][1]

2022-03-14 09:54:17.458231: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 09:54:17.546155: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


0.99253815

In [37]:
sigtargets

[1]

In [38]:
start_ind = 0
end_ind = start_ind + window_size
    
while end_ind < (1000 - roll_long):
    magwindow = temp_df["magnitude"].iloc[start_ind:end_ind]
    trwindow = temp_df["trace"].iloc[start_ind:end_ind]
    ravwindow = temp_df["RAV"].iloc[start_ind:end_ind]
    stvwindow = temp_df["STV"].iloc[start_ind:end_ind]
    ltvwindow = temp_df["LTV"].iloc[start_ind:end_ind]

    window_data = {"trace":trwindow, "magnitude": magwindow,
                    "RAV": ravwindow, "STV": stvwindow, "LTV": ltvwindow}
        
    window_df = pd.DataFrame(data = window_data)

    locfeatures.append(window_df.values)

    if ((p_arrival-roll_long) >= start_ind) and ((p_arrival-roll_long) <= end_ind):
        loctargets.append(1)
    else:
        loctargets.append(0)

    start_ind += window_step
    end_ind = start_ind + window_size
    

locfeatures = np.array(locfeatures)
n_timesteps, n_features, n_outputs = locfeatures.shape[1], locfeatures.shape[2], 2
n_length = int(n_timesteps/n_steps_loc)

locfeatures1 = locfeatures.reshape((locfeatures.shape[0], n_steps_loc, n_length, n_features))

In [39]:
loaded_locmodel.predict(locfeatures1)[:,1]

2022-03-14 09:54:24.420603: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 09:54:24.471932: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


array([ 0.01167935,  0.01299282,  0.01216586,  0.01807489,  0.01110025,
        0.010433  ,  0.00977269,  0.01119774,  0.01333069,  0.01247729,
        0.01368018,  0.00893758,  0.01112482,  0.00910873,  0.00860235,
        0.01526659,  0.01021155,  0.00925542,  0.00966458,  0.01995534,
        0.01288745,  0.01324253,  0.01017845,  0.01262258,  0.01123476,
        0.01550565,  0.01340404,  0.01293549,  0.01382197,  0.01887472,
        0.01322759,  0.00754811,  0.01145441,  0.01104216,  0.00949281,
        0.01460842,  0.01191368,  0.01885998,  0.010083  ,  0.01090752,
        0.01114371,  0.01978127,  0.01489686,  0.00829664,  0.01323323,
        0.01397049,  0.05023283], dtype=float32)

In [40]:
loctargets

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0]