# **EQNmix**

### EQNmix is a mixed architecture that combines two widely-used neural networks in seismology: ConvNetQuake (Perol et al., 2018) and EQTransformer (Mousavi et al., 2020). Our algorithm employs a Gaussian mixture model for Bayesian Inference using the outputs generated by both neural networks. The ultimate outcome is a probabilistic location pinpointed using just a single seismic station.ks.

##### An integral facet of its versatile design is the algorithm's adaptability, as it is not confined to a single travel-time algorithm. It accommodates a spectrum of options ranging from simpler to more intricate travel-time methods. Furthermore, various sampling techniques such as variational inference, Hamiltonian sampling, among others, can be seamlessly integrated. 
##### This algorithm is applicable not only to individual seismic stations but can also be extended to entire seismic networks.

###### Information of the TEST events obtained by the **Southern California Earthquake Data Center (SCEDC)**

##### **Event A** \| 2019/07/04 19\:21:32.09 eq  l 4.50 w   35.67150 -117.47883   5.2 A 38443871  120 3331
###### Mixing coefficients by CNQ: \[0.06642566, 0.13303314, 0.018152032, 0.2676338, 0.03821565, 0.27928686\]
           
##### **Event B** \| 2019/07/05 12\:38:30.02 eq  l 4.09 w   35.77167 -117.57067   6.8 A 38451079  107 3341
###### Mixing coefficients by CNQ: \[0.06656365, 0.13407934, 0.018142378, 0.2691841, 0.038651247, 0.2805622\]

##### **Event C** \| 2019/07/06 23\:50:41.99 eq  l 4.50 w   35.82350 -117.66300   6.5 A 38469375  210 2460
###### Mixing coefficients by CNQ: \[0.0666608, 0.13491394, 0.018160287, 0.27093622, 0.03867901, 0.28238913\]

In [1]:
# Importing libraries
import numpy as np
import pymc3 as pm
import pandas as pd
import theano
import theano.tensor as tt
import json
from obspy.core import UTCDateTime
import pyproj

In [2]:
# Read EQT output file
eqt_output = 'X_prediction_results.csv'
df = pd.read_csv(eqt_output)
# Filter events in the dataframe
df_filtered = df[(df['detection_probability'] > 0.95) & 
                 (df['s_probability'] > 0.88) & 
                 (df['p_probability'] > 0.80)].copy()  # Create a copy
# Apply UTCDateTime transformation to the copy
df_filtered['p_arrival_time'] = pd.to_datetime(df_filtered['p_arrival_time']).apply(UTCDateTime)
df_filtered['s_arrival_time'] = pd.to_datetime(df_filtered['s_arrival_time']).apply(UTCDateTime)
# Calculate the difference between S and P arrival times
df_filtered['t_observed'] = df_filtered['s_arrival_time'] - df_filtered['p_arrival_time']
t_observed = df_filtered['t_observed'].iloc[3]

In [7]:
# Select reference system: STA or TT
ref = 'TT'
# Choose dimensionality: 2D or 3D
dim = '3D'
# Upload json files
ellipse_data = []
datos=[]
for i in range(6):
    file_path = f'/Users/cecilia/CONVN/data/6_clusters/csv_clusters/{dim}_{ref}/ellipse_parameters_{dim}_{ref}_{i}'
    with open(file_path, 'r') as file:
        data = json.load(file)
    ellipse_data.append(data)
cov_matrices = []
for i in range(6):
    cov_matrices.append(np.array(ellipse_data[i]['Covariance']))
#clusters_prob = search_times['clusters_prob']
# Read CNQ output file
mus = []
for i in range(6):
    mus.append(np.array(ellipse_data[i]['Mean']))
cnq_output = '/Users/cecilia/CONVN/output/july_detections/from_stream/CI.CLC.2019-07-05.csv'
df_cnq = pd.read_csv(cnq_output)
# Extract P wave arrival times infromation from EQT filtered catalog
p_arrival_time = df_filtered['p_arrival_time'].iloc[3]
p_times = UTCDateTime(p_arrival_time)
# Filter CNQ Dataframe to find where p_times is in between start_time and end_time
find_times = df_cnq[(df_cnq['start_time'] <= p_times) & (df_cnq['end_time'] >= p_times)]
clusters_prob = find_times['clusters_prob']
clusters_weight = clusters_prob.tolist()[2]
clusters_weight_i = eval(clusters_weight)
w0 = clusters_weight_i[0]
w1 = clusters_weight_i[1]
w2 = clusters_weight_i[2]
w3 = clusters_weight_i[3]
w4 = clusters_weight_i[4]
w5 = clusters_weight_i[5]
weights = [w0, w1, w2, w3, w4, w5] 

IndexError: list index out of range

### TEST EVENT A

In [4]:
print(f"The t_observed value by EQT is: {t_observed}")
# Define specific bidimensional means for each ellipse calculated in 
# Building_Confidence_Ellipses_meters.ipynb (category) [METERS]
# Define the function S_P_t (Theoretical traveltime function) [SECONDS]
def S_P_t(x, y,z):
    filename="/Users/roberto/LIATRAB/EQNeMix/PYEIFMM/tsp.npy"
    tsp = np.load(filename)
    tsp2 = theano.shared(tsp) 
    X_rounded = tt.cast(tt.floor_div(x, 500) * 500, 'int64')
    Y_rounded = tt.cast(tt.floor_div(y, 500) * 500, 'int64')
    Z_rounded = tt.cast(tt.floor_div(z, 500) * 500, 'int64')
    # Find the corresponding indices in the tsp array
    x_index = X_rounded // 500
    y_index = Y_rounded // 500
    z_index = Z_rounded // 500
    tval=tsp2[x_index,y_index,z_index]
    return tval

# Define the Bayesian model
with pm.Model() as model:
    # Define the categories to choose the means
    category = pm.Categorical('category', p=weights)

    # Define the means corresponding to the categories
    mus = [pm.MvNormal(f'mu{i}', mu=mus[i], cov=cov_matrices[i], shape=3) for i in range(len(weights))]

    # Select the averages corresponding to the selected category.
    x = pm.Deterministic('x', pm.math.switch(
        pm.math.eq(category, 0), mus[0][0],
        pm.math.switch(pm.math.eq(category, 1), mus[1][0],
        pm.math.switch(pm.math.eq(category, 2), mus[2][0],
        pm.math.switch(pm.math.eq(category, 3), mus[3][0],
        pm.math.switch(pm.math.eq(category, 4), mus[4][0], mus[5][0]))))))
    
    y = pm.Deterministic('y', pm.math.switch(
        pm.math.eq(category, 0), mus[0][1],
        pm.math.switch(pm.math.eq(category, 1), mus[1][1],
        pm.math.switch(pm.math.eq(category, 2), mus[2][1],
        pm.math.switch(pm.math.eq(category, 3), mus[3][1],
        pm.math.switch(pm.math.eq(category, 4), mus[4][1], mus[5][1]))))))

    z = pm.Deterministic('z', pm.math.switch(
        pm.math.eq(category, 0), mus[0][1],
        pm.math.switch(pm.math.eq(category, 1), mus[1][2],
        pm.math.switch(pm.math.eq(category, 2), mus[2][2],
        pm.math.switch(pm.math.eq(category, 3), mus[3][2],
        pm.math.switch(pm.math.eq(category, 4), mus[4][2], mus[5][2]))))))
    
    # Calculate t using the theoretical function
    t = S_P_t(x, y, z)

    # Likelihood of the observed data
    obs = pm.Normal('obs', mu=t, sigma=0.1, observed=t_observed)

with model:
    trace = pm.sample(300, tune=50, cores = 1)

# Trace summary
summary_df = pm.summary(trace)

# Convert summary to dataframe
summary_df = pd.DataFrame(summary_df)

# Show DataFrame
print(summary_df)

#pm.traceplot(trace)
#pm.autocorrplot(trace)

The t_observed value by EQT is: 1.16


NameError: name 'weights' is not defined

In [5]:
# Extract 'x', 'y', 'z' from 'mean' column
x_utm = summary_df.at['x', 'mean']
y_utm = summary_df.at['y', 'mean']
z_utm = summary_df.at['z', 'mean']

# Transformer from (EPSG:32611 - UTM11N, WGS84) to (EPSG:4326 - lat, long, WGS84)
latlon_proj = pyproj.Transformer.from_crs(32611, 4326, always_xy=True)

# Transformer from (EPSG:4326 - lat, long, WGS84) to (EPSG:32611 - UTM11N, WGS84)
utm_proj = pyproj.Transformer.from_crs(4326, 32611, always_xy=True)

# Relative reference origin coordinates
ref_latitude = 35.2
ref_longitude = -118.2
ref_depth = 0

# Transform reference coordinates to UTM
ref_longitude_utm, ref_latitude_utm = utm_proj.transform(ref_longitude, ref_latitude)

# Correct relative reference origin effect
x_utm += ref_longitude_utm
y_utm += ref_latitude_utm

# Convert depth from meters [m] to kilometers [km]
z_depth = z_utm/1000

# Transform coordinates from UTM to latitude, longitude
x_longitude, y_latitude = latlon_proj.transform(x_utm,y_utm)

# Print transformed coordinates
print(f'Latitude: {x_longitude}°')
print(f'Longitude: {y_latitude}°')
print(f'Depth: {z_depth} [km]')


NameError: name 'summary_df' is not defined

In [None]:
def metolatlon(x_utm, y_utm, z_utm):
    # Transformer from (EPSG:32611 - UTM11N, WGS84) to (EPSG:4326 - lat, long, WGS84)
    latlon_proj = pyproj.Transformer.from_crs(32611, 4326, always_xy=True)

    # Transformer from (EPSG:4326 - lat, long, WGS84) to (EPSG:32611 - UTM11N, WGS84)
    utm_proj = pyproj.Transformer.from_crs(4326, 32611, always_xy=True)

    # Relative reference origin coordinates
    ref_latitude = 35.2
    ref_longitude = -118.2
    ref_depth = 0

    # Transform reference coordinates to UTM
    ref_longitude_utm, ref_latitude_utm = utm_proj.transform(ref_longitude, ref_latitude)

    # Correct relative reference origin effect
    x_utm += ref_longitude_utm
    y_utm += ref_latitude_utm

    # Convert depth from meters [m] to kilometers [km]
    z_depth = z_utm / 1000

    # Transform coordinates from UTM to latitude, longitude
    x_longitude, y_latitude = latlon_proj.transform(x_utm, y_utm)

    return x_longitude, y_latitude, z_depth

# Example...
x_utm_example = summary_df.at['x', 'mean']
y_utm_example = summary_df.at['y', 'mean']
z_utm_example = summary_df.at['z', 'mean']

result = metolatlon(x_utm_example, y_utm_example, z_utm_example)

print(f'Latitude: {result[0]}°')
print(f'Longitude: {result[1]}°')
print(f'Depth: {result[2]} [km]')
