### Codigo para unir salidas de EQT y CNQ

In [1]:
# Importing libraries
import numpy as np
import pymc3 as pm
import pandas as pd
import theano
import theano.tensor as tt
import json
from obspy.core import UTCDateTime
import pyproj
import os

## Step 1: EQT output

In [3]:
# Ruta del archivo de salida de EQT
eqt_output = 'X_prediction_results.csv'

# Leer el archivo CSV en un DataFrame
df = pd.read_csv(eqt_output)

# Filtrar eventos en el DataFrame
df_filtered = df[(df['detection_probability'] >= 0.80) & 
                 (df['s_probability'] >= 0.80) & 
                 (df['p_probability'] >= 0.80)].copy()  # Crear una copia

# Aplicar la transformación UTCDateTime a las columnas 'p_arrival_time' y 's_arrival_time'
df_filtered['p_arrival_time'] = pd.to_datetime(df_filtered['p_arrival_time']).apply(UTCDateTime)
df_filtered['s_arrival_time'] = pd.to_datetime(df_filtered['s_arrival_time']).apply(UTCDateTime)

# Calcular la diferencia entre 's_arrival_time' y 'p_arrival_time' y almacenarla en una nueva columna
df_filtered['t_observed'] = df_filtered['s_arrival_time'] - df_filtered['p_arrival_time']

# Mostrar el DataFrame filtrado con la nueva columna 't_observed'
df_filtered

Unnamed: 0,file_name,network,station,instrument_type,station_lat,station_lon,station_elv,event_start_time,event_end_time,detection_probability,detection_uncertainty,p_arrival_time,p_probability,p_uncertainty,p_snr,s_arrival_time,s_probability,s_uncertainty,s_snr,t_observed
42,CLC_CI_HH_2019-07-04T03:20:12.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-04 03:20:34.808300,2019-07-04 03:20:44.978300,1.00,,2019-07-04T03:20:34.778300Z,0.80,,16.5,2019-07-04T03:20:39.088300Z,0.81,,10.0,4.31
48,CLC_CI_HH_2019-07-04T04:14:48.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-04 04:15:19.448300,2019-07-04 04:15:27.328300,1.00,,2019-07-04T04:15:19.438300Z,0.85,,8.8,2019-07-04T04:15:22.758300Z,0.83,,7.2,3.32
114,CLC_CI_HH_2019-07-04T17:18:48.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-04 17:19:32.368300,2019-07-04 17:19:38.348300,0.99,,2019-07-04T17:19:32.288300Z,0.86,,11.9,2019-07-04T17:19:34.898300Z,0.80,,5.9,2.61
123,CLC_CI_HH_2019-07-04T17:27:12.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-04 17:27:38.288300,2019-07-04 17:27:44.338300,1.00,,2019-07-04T17:27:38.278300Z,0.82,,28.7,2019-07-04T17:27:40.798300Z,0.82,,7.4,2.52
126,CLC_CI_HH_2019-07-04T17:30:00.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-04 17:30:38.128300,2019-07-04 17:30:44.068300,1.00,,2019-07-04T17:30:38.058300Z,0.84,,10.1,2019-07-04T17:30:40.618300Z,0.83,,6.4,2.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6309,CLC_CI_HH_2019-07-06T23:52:12.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-06 23:52:46.608300,2019-07-06 23:52:52.918300,1.00,,2019-07-06T23:52:46.578300Z,0.80,,29.8,2019-07-06T23:52:49.248300Z,0.83,,4.7,2.67
6310,CLC_CI_HH_2019-07-06T23:52:54.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-06 23:53:13.088300,2019-07-06 23:53:15.858300,0.97,,2019-07-06T23:53:13.008300Z,0.85,,11.7,2019-07-06T23:53:14.248300Z,0.80,,3.7,1.24
6314,CLC_CI_HH_2019-07-06T23:54:18.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-06 23:54:40.648300,2019-07-06 23:54:44.458300,0.98,,2019-07-06T23:54:40.648300Z,0.84,,9.0,2019-07-06T23:54:42.228300Z,0.82,,4.4,1.58
6318,CLC_CI_HH_2019-07-06T23:55:42.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-06 23:56:36.128300,2019-07-06 23:56:39.438300,0.99,,2019-07-06T23:56:36.128300Z,0.92,,34.4,2019-07-06T23:56:37.388300Z,0.80,,15.1,1.26


In [4]:
len(df_filtered)

1290

In [7]:
for index, row in df_filtered.iterrows():
    # Obtener la fecha del evento actual (en cada fila)
    event_date = row['event_start_time'].split(' ')[0]
    cnq_output = f'/Users/cecilia/CONVN/output/july_detections/from_stream/CI.CLC.{event_date}.csv'
    df_cnq = pd.read_csv(cnq_output)

    # Extract P wave arrival times information from EQT filtered catalog
    p_arrival_time = row['p_arrival_time']
    p_times = UTCDateTime(p_arrival_time)
    
    # Filter CNQ DataFrame to find where p_times is in between start_time and end_time
    find_times = df_cnq[(df_cnq['start_time'] <= p_times) & (df_cnq['end_time'] >= p_times)]

    # Check if find_times is empty
    if not find_times.empty:
        # Extract clusters_prob and clusters_weight
        clusters_prob = find_times['clusters_prob'].iloc[0]
    
        # Añadir la nueva columna 'clusters_weight' a df_filtered
        df_filtered.at[index, 'clusters_weight'] = clusters_prob
    else:
        # Si find_times está vacío, eliminar completamente la fila del DataFrame df_filtered
        df_filtered.drop(index, inplace=True)

xlsx_path = '/Users/cecilia/EQNeMix/PYMC3/Results/df_filtered.xlsx'

# Especificar el nombre del archivo Excel
df_filtered.to_excel(xlsx_path, index=False)

# Mostrar el DataFrame con las nuevas columnas
df_filtered

Unnamed: 0,file_name,network,station,instrument_type,station_lat,station_lon,station_elv,event_start_time,event_end_time,detection_probability,...,p_arrival_time,p_probability,p_uncertainty,p_snr,s_arrival_time,s_probability,s_uncertainty,s_snr,t_observed,clusters_weight
42,CLC_CI_HH_2019-07-04T03:20:12.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-04 03:20:34.808300,2019-07-04 03:20:44.978300,1.00,...,2019-07-04T03:20:34.778300Z,0.80,,16.5,2019-07-04T03:20:39.088300Z,0.81,,10.0,4.31,"[0.06659277, 0.13351446, 0.018104438, 0.268382..."
48,CLC_CI_HH_2019-07-04T04:14:48.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-04 04:15:19.448300,2019-07-04 04:15:27.328300,1.00,...,2019-07-04T04:15:19.438300Z,0.85,,8.8,2019-07-04T04:15:22.758300Z,0.83,,7.2,3.32,"[0.066614, 0.13338098, 0.018089224, 0.26825786..."
114,CLC_CI_HH_2019-07-04T17:18:48.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-04 17:19:32.368300,2019-07-04 17:19:38.348300,0.99,...,2019-07-04T17:19:32.288300Z,0.86,,11.9,2019-07-04T17:19:34.898300Z,0.80,,5.9,2.61,"[0.06661966, 0.13420713, 0.018133879, 0.269701..."
123,CLC_CI_HH_2019-07-04T17:27:12.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-04 17:27:38.288300,2019-07-04 17:27:44.338300,1.00,...,2019-07-04T17:27:38.278300Z,0.82,,28.7,2019-07-04T17:27:40.798300Z,0.82,,7.4,2.52,"[0.06663134, 0.13450064, 0.018150516, 0.270214..."
126,CLC_CI_HH_2019-07-04T17:30:00.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-04 17:30:38.128300,2019-07-04 17:30:44.068300,1.00,...,2019-07-04T17:30:38.058300Z,0.84,,10.1,2019-07-04T17:30:40.618300Z,0.83,,6.4,2.56,"[0.06665968, 0.13470304, 0.018156696, 0.270392..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6298,CLC_CI_HH_2019-07-06T23:45:12.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-06 23:45:56.248300,2019-07-06 23:46:02.578300,0.99,...,2019-07-06T23:45:56.228300Z,0.83,,16.7,2019-07-06T23:45:58.198300Z,0.85,,8.7,1.97,"[0.06663991, 0.13487642, 0.018153632, 0.271021..."
6309,CLC_CI_HH_2019-07-06T23:52:12.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-06 23:52:46.608300,2019-07-06 23:52:52.918300,1.00,...,2019-07-06T23:52:46.578300Z,0.80,,29.8,2019-07-06T23:52:49.248300Z,0.83,,4.7,2.67,"[0.066505425, 0.13513586, 0.018229175, 0.27154..."
6310,CLC_CI_HH_2019-07-06T23:52:54.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-06 23:53:13.088300,2019-07-06 23:53:15.858300,0.97,...,2019-07-06T23:53:13.008300Z,0.85,,11.7,2019-07-06T23:53:14.248300Z,0.80,,3.7,1.24,"[0.066487, 0.13532399, 0.018168757, 0.2721459,..."
6314,CLC_CI_HH_2019-07-06T23:54:18.008300Z,CI,CLC,HH,35.81574,-117.59751,775,2019-07-06 23:54:40.648300,2019-07-06 23:54:44.458300,0.98,...,2019-07-06T23:54:40.648300Z,0.84,,9.0,2019-07-06T23:54:42.228300Z,0.82,,4.4,1.58,"[0.06672571, 0.13502866, 0.018162727, 0.271245..."


In [8]:
len(df_filtered)

1125