# **EQNeMix**

### EQNmix is a mixed architecture that combines two widely-used neural networks in seismology: ConvNetQuake (Perol et al., 2018) and EQTransformer (Mousavi et al., 2020). Our algorithm employs a Gaussian mixture model for Bayesian Inference using the outputs generated by both neural networks. The ultimate outcome is a probabilistic location pinpointed using just a single seismic station.ks.

##### An integral facet of its versatile design is the algorithm's adaptability, as it is not confined to a single travel-time algorithm. It accommodates a spectrum of options ranging from simpler to more intricate travel-time methods. Furthermore, various sampling techniques such as variational inference, Hamiltonian sampling, among others, can be seamlessly integrated. 
##### This algorithm is applicable not only to individual seismic stations but can also be extended to entire seismic networks.

In [4]:
# Importing libraries
import numpy as np
import pymc3 as pm
import pandas as pd
import json
from obspy.core import UTCDateTime

#### **STEP 1:** Obtain events from EQTransformer prediction

In [5]:
# Read EQT output file
eqt_output = '/Users/jorge/EQTransformer/examples/detectionsCLC/CLC_outputs/X_prediction_results.csv'
df = pd.read_csv(eqt_output)

# Filter events in the dataframe
df_filtered = df[(df['detection_probability'] > 0.95) & 
                 (df['s_probability'] > 0.88) & 
                 (df['p_probability'] > 0.80)].copy()

# Apply UTCDateTime transformation
df_filtered['p_arrival_time'] = pd.to_datetime(df_filtered['p_arrival_time']).apply(UTCDateTime)
df_filtered['s_arrival_time'] = pd.to_datetime(df_filtered['s_arrival_time']).apply(UTCDateTime)

# Calculate the difference between S and P arrival times
df_filtered['t_observed'] = df_filtered['s_arrival_time'] - df_filtered['p_arrival_time']

t_observed = df_filtered['t_observed'].iloc[0]

# Show dataframe filtered
df_filtered

Unnamed: 0,file_name,network,station,instrument_type,station_lat,station_lon,station_elv,event_start_time,event_end_time,detection_probability,detection_uncertainty,p_arrival_time,p_probability,p_uncertainty,p_snr,s_arrival_time,s_probability,s_uncertainty,s_snr,t_observed
2430,CLC_CI_HH_2019-07-05T13:48:48.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-05 13:49:42.648300,2019-07-05 13:49:45.238300,0.99,,2019-07-05T13:49:42.618300Z,0.82,,10.2,2019-07-05T13:49:43.468300Z,0.89,,6.4,0.85
2454,CLC_CI_HH_2019-07-05T14:01:24.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-05 14:01:48.688300,2019-07-05 14:01:50.498300,0.99,,2019-07-05T14:01:48.678300Z,0.84,,31.7,2019-07-05T14:01:49.448300Z,0.89,,11.1,0.77
3740,CLC_CI_HH_2019-07-06T01:14:54.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-06 01:15:22.528300,2019-07-06 01:15:24.568300,0.98,,2019-07-06T01:15:22.528300Z,0.83,,17.4,2019-07-06T01:15:23.158300Z,0.89,,5.2,0.63
6035,CLC_CI_HH_2019-07-06T21:26:36.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-06 21:27:28.128300,2019-07-06 21:27:30.888300,0.99,,2019-07-06T21:27:28.108300Z,0.85,,25.5,2019-07-06T21:27:29.268300Z,0.9,,5.4,1.16


#### **STEP 2:** Upload covariance ellipses information

In [None]:
# Select reference system: STA or TT
ref = 'TT'

# Choose dimensionality: 2D or 3D
dim = '3D'

# Upload json files
ellipse_data = []
for i in range(6):
    file_path = f'/Users/cecilia/CONVN/data/6_clusters/csv_clusters/{dim}_{ref}/ellipse_parameters_{dim}_{ref}_{i}.json'
    with open(file_path, 'r') as file:
        data = json.load(file)
    datos.append(data)

In [None]:
# Read covariance matrices
cov_matrices = []
for i in range(6):
    cov_matrices.append(np.array(ellipse_data[i]['Covariance']))

# Show covariance matrices
cov_matrices

In [None]:
# Read means
mus = []
for i in range(6):
    mus.append(np.array(ellipse_data[i]['Mean']))

# Show means
mus

#### **STEP 3:** Search events in ConvNetQuake results

In [None]:
# Read CNQ output file
cnq_output = '/Users/cecilia/CONVN/output/july_detections/from_stream/CI.CLC.2019-07-05.csv'
df_cnq = pd.read_csv(cnq_output)

In [None]:
# Extract P wave arrival times infromation from EQT filtered catalog
p_arrival_time = df_filtered['p_arrival_time'].iloc[1]
p_times = UTCDateTime(p_arrival_time)

# Filter CNQ Dataframe to find where p_times is in between start_time and end_time
find_times = df_cnq[(df_cnq['start_time'] <= p_times) & (df_cnq['end_time'] >= p_times)]


In [None]:
clusters_prob = search_times['clusters_prob']
clusters_weight = clusters_prob.tolist()[0]
clusters_weight_i = eval(clusters_weight)

w0 = clusters_weight_i[0]
w1 = clusters_weight_i[1]
w2 = clusters_weight_i[2]
w3 = clusters_weight_i[3]
w4 = clusters_weight_i[4]
w5 = clusters_weight_i[5]

weights = [w0, w1, w2, w3, w4, w5] 

#### **STEP 4:** Execute Bayesian Inference

In [None]:
%%time
# Observed value of S-P is given by EQTransformer [SECONDS]
ts_observed = 35.618300
tp_observed = 34.068300
t_observed = ts_observed - tp_observed
print(f"The t_observed value by EQT is: {t_observed}")

# Define the function S_P_t (Theoretical traveltime function) [SECONDS]
def S_P_t(x, y):
    st_loc = [1, 3]
    p_velocity = 7100   #[METERS/SECOND]
    s_velocity = 2900   #[METERS/SECOND]
    lent = (1 / s_velocity - 1 / p_velocity)
    dis = np.sqrt((x - st_loc[0]) ** 2 + (y - st_loc[1]) ** 2)
    sminp = dis * lent
    return sminp

# Define the Bayesian model
with pm.Model() as model:
    # Define the categories to choose the means
    category = pm.Categorical('category', p=weights)

    # Define the means corresponding to the categories
    mus = [pm.MvNormal(f'mu{i}', mu=mus[i], cov=cov_matrices[i], shape=2) for i in range(len(weights))]

    # Select the averages corresponding to the selected category.
    x = pm.Deterministic('x', pm.math.switch(
        pm.math.eq(category, 0), mus[0][0],
        pm.math.switch(pm.math.eq(category, 1), mus[1][0],
        pm.math.switch(pm.math.eq(category, 2), mus[2][0],
        pm.math.switch(pm.math.eq(category, 3), mus[3][0],
        pm.math.switch(pm.math.eq(category, 4), mus[4][0], mus[5][0]))))))
    
    y = pm.Deterministic('y', pm.math.switch(
        pm.math.eq(category, 0), mus[0][1],
        pm.math.switch(pm.math.eq(category, 1), mus[1][1],
        pm.math.switch(pm.math.eq(category, 2), mus[2][1],
        pm.math.switch(pm.math.eq(category, 3), mus[3][1],
        pm.math.switch(pm.math.eq(category, 4), mus[4][1], mus[5][1]))))))
        
    # Calculate t using the theoretical function
    t = S_P_t(x, y)

    # Likelihood of the observed data
    obs = pm.Normal('obs', mu=t, sigma=0.1, observed=t_observed)

with model:
    trace = pm.sample(300, tune=50, cores=1)

# Results summary
pm.summary(trace)

#pm.traceplot(trace)
#pm.autocorrplot(trace)