# **EQNmix**

### EQNmix is a mixed architecture that combines two widely-used neural networks in seismology: ConvNetQuake (Perol et al., 2018) and EQTransformer (Mousavi et al., 2020). Our algorithm employs a Gaussian mixture model for Bayesian Inference using the outputs generated by both neural networks. The ultimate outcome is a probabilistic location pinpointed using just a single seismic station.ks.

##### An integral facet of its versatile design is the algorithm's adaptability, as it is not confined to a single travel-time algorithm. It accommodates a spectrum of options ranging from simpler to more intricate travel-time methods. Furthermore, various sampling techniques such as variational inference, Hamiltonian sampling, among others, can be seamlessly integrated. 
##### This algorithm is applicable not only to individual seismic stations but can also be extended to entire seismic networks.

###### Information of the TEST events obtained by the **Southern California Earthquake Data Center (SCEDC)**

In [1]:
# Importing libraries
import numpy as np
import pymc3 as pm
import pandas as pd
import theano
import theano.tensor as tt
import json
from obspy.core import UTCDateTime
import pyproj

In [2]:
# Read EQT output file
eqt_output = '/Users/jorge/EQTransformer/examples/detectionsCLC/CLC_outputs/X_prediction_results.csv'
df = pd.read_csv(eqt_output)
# Filter events in the dataframe
df_filtered = df[(df['detection_probability'] > 0.95) & 
                 (df['s_probability'] > 0.88) & 
                 (df['p_probability'] > 0.80)].copy()  # Create a copy
# Apply UTCDateTime transformation to the copy
df_filtered['p_arrival_time'] = pd.to_datetime(df_filtered['p_arrival_time']).apply(UTCDateTime)
df_filtered['s_arrival_time'] = pd.to_datetime(df_filtered['s_arrival_time']).apply(UTCDateTime)
# Calculate the difference between S and P arrival times
df_filtered['t_observed'] = df_filtered['s_arrival_time'] - df_filtered['p_arrival_time']
t_observed = df_filtered['t_observed'].iloc[0]
# Show df_filtered
print(df_filtered)


                                  file_name network station instrument_type  \
2430  CLC_CI_HH_2019-07-05T13:48:48.008300Z      CI    CLC               HH   
2454  CLC_CI_HH_2019-07-05T14:01:24.008300Z      CI    CLC               HH   
3740  CLC_CI_HH_2019-07-06T01:14:54.008300Z      CI    CLC               HH   
6035  CLC_CI_HH_2019-07-06T21:26:36.008300Z      CI    CLC               HH   

      station_lat  station_lon  station_elv            event_start_time  \
2430     35.81574   -117.59751          775  2019-07-05 13:49:42.648300   
2454     35.81574   -117.59751          775  2019-07-05 14:01:48.688300   
3740     35.81574   -117.59751          775  2019-07-06 01:15:22.528300   
6035     35.81574   -117.59751          775  2019-07-06 21:27:28.128300   

                  event_end_time  detection_probability  \
2430  2019-07-05 13:49:45.238300                   0.99   
2454  2019-07-05 14:01:50.498300                   0.99   
3740  2019-07-06 01:15:24.568300                   

In [3]:
# Select reference system: STA or TT
ref = 'TT'
# Choose dimensionality: 2D or 3D
dim = '3D'
# Upload json files
ellipse_data = []
datos=[]
for i in range(6):
    file_path = f'/Users/cecilia/CONVN/data/6_clusters/csv_clusters/{dim}_{ref}/ellipse_parameters_{dim}_{ref}_{i}'
    with open(file_path, 'r') as file:
        data = json.load(file)
    ellipse_data.append(data)
cov_matrices = []
for i in range(6):
    cov_matrices.append(np.array(ellipse_data[i]['Covariance']))
#clusters_prob = search_times['clusters_prob']
# Read CNQ output file
means = []
for i in range(6):
    means.append(np.array(ellipse_data[i]['Mean']))
cnq_output = '/Users/cecilia/CONVN/output/july_detections/from_stream/CI.CLC.2019-07-05.csv'
df_cnq = pd.read_csv(cnq_output)
# Extract P wave arrival times infromation from EQT filtered catalog
p_arrival_time = df_filtered['p_arrival_time'].iloc[1]
p_times = UTCDateTime(p_arrival_time)
# Filter CNQ Dataframe to find where p_times is in between start_time and end_time
find_times = df_cnq[(df_cnq['start_time'] <= p_times) & (df_cnq['end_time'] >= p_times)]
clusters_prob = find_times['clusters_prob']
clusters_weight = clusters_prob.tolist()[0]
clusters_weight_i = eval(clusters_weight)
w0 = clusters_weight_i[0]
w1 = clusters_weight_i[1]
w2 = clusters_weight_i[2]
w3 = clusters_weight_i[3]
w4 = clusters_weight_i[4]
w5 = clusters_weight_i[5]
weights = [w0, w1, w2, w3, w4, w5] 

### TEST EVENT A

In [4]:
print(f"The t_observed value by EQT is: {t_observed}")
# Define specific bidimensional means for each ellipse calculated in 
# Building_Confidence_Ellipses_meters.ipynb (category) [METERS]
# Define the function S_P_t (Theoretical traveltime function) [SECONDS]
def S_P_t(x, y,z):
    filename="/Users/roberto/LIATRAB/EQNeMix/PYEIFMM/tsp.npy"
    tsp = np.load(filename)
    tsp2 = theano.shared(tsp) 
    X_rounded = tt.cast(tt.floor_div(x, 500) * 500, 'int64')
    Y_rounded = tt.cast(tt.floor_div(y, 500) * 500, 'int64')
    Z_rounded = tt.cast(tt.floor_div(z, 500) * 500, 'int64')
    # Find the corresponding indices in the tsp array
    x_index = X_rounded // 500
    y_index = Y_rounded // 500
    z_index = Z_rounded // 500
    tval=tsp2[x_index,y_index,z_index]
    return tval

# Define the Bayesian model
with pm.Model() as model:
    # Define the categories to choose the means
    category = pm.Categorical('category', p=weights)

    # Define the means corresponding to the categories
    mus = [pm.MvNormal(f'mu{i}', mu=means[i], cov=cov_matrices[i], shape=3) for i in range(len(weights))]

    # Select the averages corresponding to the selected category.
    x = pm.Deterministic('x', pm.math.switch(
        pm.math.eq(category, 0), mus[0][0],
        pm.math.switch(pm.math.eq(category, 1), mus[1][0],
        pm.math.switch(pm.math.eq(category, 2), mus[2][0],
        pm.math.switch(pm.math.eq(category, 3), mus[3][0],
        pm.math.switch(pm.math.eq(category, 4), mus[4][0], mus[5][0]))))))
    
    y = pm.Deterministic('y', pm.math.switch(
        pm.math.eq(category, 0), mus[0][1],
        pm.math.switch(pm.math.eq(category, 1), mus[1][1],
        pm.math.switch(pm.math.eq(category, 2), mus[2][1],
        pm.math.switch(pm.math.eq(category, 3), mus[3][1],
        pm.math.switch(pm.math.eq(category, 4), mus[4][1], mus[5][1]))))))

    z = pm.Deterministic('z', pm.math.switch(
        pm.math.eq(category, 0), mus[0][1],
        pm.math.switch(pm.math.eq(category, 1), mus[1][2],
        pm.math.switch(pm.math.eq(category, 2), mus[2][2],
        pm.math.switch(pm.math.eq(category, 3), mus[3][2],
        pm.math.switch(pm.math.eq(category, 4), mus[4][2], mus[5][2]))))))
    
    # Calculate t using the theoretical function
    t = S_P_t(x, y, z)

    # Likelihood of the observed data
    obs = pm.Normal('obs', mu=t, sigma=0.1, observed=t_observed)

with model:
    trace = pm.sample(300, tune=50, cores=4)

# Trace summary
summary_df = pm.summary(trace)

# Convert summary to dataframe
summary_df = pd.DataFrame(summary_df)

# Show DataFrame
print(summary_df)

#pm.traceplot(trace)
#pm.autocorrplot(trace)

The t_observed value by EQT is: 0.85


  return wrapped_(*args_, **kwargs_)
Only 300 samples in chain.
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>CategoricalGibbsMetropolis: [category]
>NUTS: [mu5, mu4, mu3, mu2, mu1, mu0]


Sampling 4 chains for 50 tune and 300 draw iterations (200 + 1_200 draws total) took 18 seconds.
There was 1 divergence after tuning. Increase `target_accept` or reparameterize.
The rhat statistic is larger than 1.05 for some parameters. This indicates slight problems during sampling.
The estimated number of effective samples is smaller than 200 for some parameters.
Got error No model on context stack. trying to find log_likelihood in translation.


                mean        sd     hdi_3%     hdi_97%  mcse_mean  mcse_sd  \
category       3.358     0.805      3.000       5.000      0.172    0.124   
mu0[0]     31933.864  4906.535  22152.189   40506.226    179.888  127.644   
mu0[1]    100151.170  5084.953  90605.492  109762.189    162.829  115.172   
mu0[2]      4823.747  1871.170   1585.359    8612.759     87.509   65.623   
mu1[0]     47355.618  2828.807  42005.783   52574.510    101.314   72.509   
mu1[1]     75343.362  5183.481  65833.771   85232.692    193.633  136.975   
mu1[2]      7967.098  2858.386   2362.264   13150.532    129.181   93.212   
mu2[0]     70358.726  6298.888  57860.266   81844.431    250.617  177.295   
mu2[1]     85310.890  5462.806  74658.275   94906.451    154.515  109.973   
mu2[2]      5394.625  1641.549   2623.008    8541.727     74.421   52.655   
mu3[0]     58450.894  3275.488  52628.712   64295.725    103.757   73.389   
mu3[1]     55467.935  3922.629  50098.831   64449.570    181.812  129.920   

In [7]:
# Extract 'x', 'y', 'z' from 'mean' column
x_utm = summary_df.at['x', 'mean']
y_utm = summary_df.at['y', 'mean']
z_utm = summary_df.at['z', 'mean']

# Transformer from (EPSG:32611 - UTM11N, WGS84) to (EPSG:4326 - lat, long, WGS84)
latlon_proj = pyproj.Transformer.from_crs(32611, 4326, always_xy=True)

# Transformer from (EPSG:4326 - lat, long, WGS84) to (EPSG:32611 - UTM11N, WGS84)
utm_proj = pyproj.Transformer.from_crs(4326, 32611, always_xy=True)

# Relative reference origin coordinates
ref_latitude = 35.2
ref_longitude = -118.2
ref_depth = 0

# Transform reference coordinates to UTM
ref_longitude_utm, ref_latitude_utm = utm_proj.transform(ref_longitude, ref_latitude)

# Correct relative reference origin effect
x_utm += ref_longitude_utm
y_utm += ref_latitude_utm

# Convert depth from meters [m] to kilometers [km]
z_depth = z_utm/1000

# Transform coordinates from UTM to latitude, longitude
x_longitude, y_latitude = latlon_proj.transform(x_utm,y_utm)

# Print transformed coordinates
print(f'Longitude: {x_longitude}°')
print(f'Latitude: {y_latitude}°')
print(f'Depth: {z_depth} [km]')


Latitude: -117.54913547167894°
Longitude: 35.702019381210256°
Depth: 8.253553 [km]


In [8]:
def metolatlon(x_utm, y_utm, z_utm):
    # Transformer from (EPSG:32611 - UTM11N, WGS84) to (EPSG:4326 - lat, long, WGS84)
    latlon_proj = pyproj.Transformer.from_crs(32611, 4326, always_xy=True)

    # Transformer from (EPSG:4326 - lat, long, WGS84) to (EPSG:32611 - UTM11N, WGS84)
    utm_proj = pyproj.Transformer.from_crs(4326, 32611, always_xy=True)

    # Relative reference origin coordinates
    ref_latitude = 35.2
    ref_longitude = -118.2
    ref_depth = 0

    # Transform reference coordinates to UTM
    ref_longitude_utm, ref_latitude_utm = utm_proj.transform(ref_longitude, ref_latitude)

    # Correct relative reference origin effect
    x_utm += ref_longitude_utm
    y_utm += ref_latitude_utm

    # Convert depth from meters [m] to kilometers [km]
    z_depth = z_utm / 1000

    # Transform coordinates from UTM to latitude, longitude
    x_longitude, y_latitude = latlon_proj.transform(x_utm, y_utm)

    return x_longitude, y_latitude, z_depth

# Example...
x_utm_example = summary_df.at['x', 'mean']
y_utm_example = summary_df.at['y', 'mean']
z_utm_example = summary_df.at['z', 'mean']

result = metolatlon(x_utm_example, y_utm_example, z_utm_example)

print(f'Latitude: {result[0]}°')
print(f'Longitude: {result[1]}°')
print(f'Depth: {result[2]} [km]')


Latitude: -117.54913547167894°
Longitude: 35.702019381210256°
Depth: 8.253553 [km]


In [None]:
path_white_catalog = 'Users/cecilia/Figuras Tesis Lia/csv/Ridgecrest_filtrado_UTC.csv'