In [None]:
import sys
lib_path = [r'C:\Users\ikahbasi\OneDrive\Applications\GitHub\SeisRoutine',
            r'C:\Users\ikahb\OneDrive\Applications\GitHub\SeisRoutine']
for path in lib_path:
    sys.path.append(path)
##########################################################################
import SeisRoutine.catalog as src
import SeisRoutine.waveform as srw
import SeisRoutine.config as srconf
import SeisRoutine.statistics as srs

In [None]:
import seisbench.generate as sbg
import seisbench.models as sbm
import torch


In [None]:
def auto_labeling(data, dl_pickers):
    for key, model in models.items():
        with torch.no_grad():
            data = torch.tensor(data_X, device=model.device).unsqueeze(0)
            pred = model(data)
            pred = pred[0].cpu().numpy()
        results[key] = pred

In [None]:
def auto_labeling(stream, dl_pickers):
    outputs = {'P': {}, 'S': {}}
    for name, picker in dl_pickers.items():
        output = picker.classify(stream)
        picks = output.picks
        creator = output.creator
        for pick in picks:
            outputs[pick.phase][name] = pick.peak_time
    return outputs

In [None]:
def window_checking(array, window_len=0.2, min_num_picks=3):
    array.sort()
    s = np.diff(array).sum()
    # print(s)
    logging.info(f'{s}')
    if (s <= window_len) and (min_num_picks<=array.size):
        cond = True
    else:
        cond = False
    return cond

In [None]:
def find_optimum_pick_time(times, outlier_detector='Z-score'):
    if outlier_detector=='Z-score':
        outlier_msk = srs.detect_outliers_ztest(array=times, threshold=1)
    elif outlier_detector=='IQR':
        outlier_msk = srs.detect_outliers_iqr(array=times, multiplier=1.5)
    times_inlier = times[~outlier_msk]
    # print(times_inlier)
    cond = window_checking(array=times_inlier,
                           window_len=1,
                           min_num_picks=2)
    # print(cond)
    logging.info(f'{cond}')
    if cond:
        time_optimum = srs.distance_weighted_average(array=times_inlier)
    else:
        time_optimum = np.nan
    # print(times_inlier)
    return time_optimum

In [None]:
# model = sbm.EQTransformer()
# for n in model.list_pretrained():
#     print(n)
#     try:
#         model.from_pretrained(n)
#     except Exception as error:
#         print(error)

In [None]:
if cfg.mk_dataset.autolabeling:
    dl_pickers = {'PhaseNet_stead': sbm.PhaseNet.from_pretrained("stead"),
                  'PhaseNet_original': sbm.PhaseNet.from_pretrained("original"),
                #   'PhaseNet_scedc': sbm.PhaseNet.from_pretrained("scedc"),
                #   'PhaseNet_instance': sbm.PhaseNet.from_pretrained("instance"),
                  #
                  'EQTransformer_stead': sbm.EQTransformer.from_pretrained("stead"),
                #   'EQTransformer_original': sbm.EQTransformer.from_pretrained("original"),
                #   'EQTransformer_scedc': sbm.EQTransformer.from_pretrained("scedc"),
                #   'EQTransformer_instance': sbm.EQTransformer.from_pretrained("instance"),
                  #
                #   sbm.GPD.from_pretrained("stead"),
                  'GPD_original': sbm.GPD.from_pretrained("original"),
                #   'GPD_scedc': sbm.GPD.from_pretrained("scedc"),
                #   'GPD_instance': sbm.GPD.from_pretrained("instance"),
    }

    if torch.cuda.is_available():
        for key, dl_picker in dl_pickers.items():
            dl_picker.cuda();
            logging.info(f"{key} Running on GPU")
    else:
        logging.info("Running on CPU")

In [None]:
def get_picks_time_difference(picks):
    picks_time = [pick.time for pick in picks]
    picks_time = sorted(picks_time)
    picks_difftime = [time-picks_time[0] for time in picks_time]
    return picks_difftime

In [None]:
def reversing_dictionary(dictionary):
    return {v:k for k, v in dictionary.items()}

In [None]:
def auto_labeling(stream, dl_pickers):
    outputs = {'P': {}, 'S': {}}
    for name, picker in dl_pickers.items():
        output = picker.classify(stream)
        picks = output.picks
        creator = output.creator
        for pick in picks:
            outputs[pick.phase][name] = pick.peak_time
    return outputs

In [None]:
#####################
### Auto Labeling ###
#####################
if cfg.mk_dataset.autolabeling:
    stime = min([tr.stats.starttime for tr in st])
    etime = max([tr.stats.endtime for tr in st])
    st.taper(0.2)
    ################################################################
    st.trim(
        starttime=stime-(60-cfg.mk_dataset.cut_time.before),
        endtime=etime+(60-cfg.mk_dataset.cut_time.after),
        pad=True, fill_value=0)
    ##
    # waveforms_padded = srw.st_noise_padding(
    #     st=waveforms,
    #     stime=60-cfg.cut_time.before,
    #     etime=60-cfg.cut_time.after,
    #     std_windows=(cfg.noisepad.std_start, cfg.noisepad.std_end))
    st_padded = st
    ################################################################
    auto_label = auto_labeling(stream=st_padded, dl_pickers=dl_pickers)
    for phase_hint, auto_picks in auto_label.items():
        for picker_dataset_name, picker_time in auto_picks.items():
            # print(phase_hint, picker_dataset_name, picker_time)
            sample = (picker_time - starttime) * sps
            phase_params[f"trace_autoDL_{picker_dataset_name}_{phase_hint}_arrival_sample"] = int(sample)

In [None]:
auto_label = auto_labeling(stream=stream, dl_pickers=dl_pickers)
for phase_hint, auto_picks in auto_label.items():
    for picker_dataset_name, picker_time in auto_picks.items():
        logging.info(f'{phase_hint}\n{picker_dataset_name}\n{picker_time}')
        # print(phase_hint, picker_dataset_name, picker_time)
        sample = (picker_time - starttime) * sampling_rate
        trace_params[f"trace_{phase_hint}_arrival_sample_autoDL_{picker_dataset_name}"] = int(sample)

In [None]:
from scipy import signal

In [None]:
class Tapering:
    def __init__(self, alpha=0.3, key='X'):
        self.alpha = alpha  # Tapering Coefficient
        if isinstance(key, str):
            self.key = (key, key)
        else:
            self.key = key

    def __call__(self, state_dict):
        x, metadata = state_dict[self.key[0]]
        taper = signal.windows.tukey(x.shape[-1], self.alpha)
        x = x * taper
        state_dict[self.key[1]] = (x, metadata)

# codes

In [None]:
import os
import seisbench.data as sbd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
init_cfg = srconf.load_config('0-init-cfg.yml')
cfg_path = os.path.join(init_cfg.target_config_filepath,
                        init_cfg.target_config_filename)
cfg = srconf.load_config(cfg_path)

In [None]:
dataset = sbd.WaveformDataset(
    path=cfg.dataset.path,
    sampling_rate=cfg.training.dataset.sampling_rate,
    component_order=cfg.training.dataset.component_order,
          )

In [None]:
sps = 100
augmentations = [
    # Tapering(),
    sbg.Filter(N=4,
               Wn=[0.5],
               btype='highpass',
               forward_backward=True,
               ),
    sbg.Normalize(
        demean_axis=-1,
        amp_norm_axis=-1,
        amp_norm_type="peak"),
    # sbg.FixedWindow(
    #     p0=-15*sps,
    #     windowlen=1*60*sps,
    #     strategy="pad",
    #     key='X'),
    sbg.ChangeDtype(np.float32),
]
generator = sbg.GenericGenerator(dataset)
generator.add_augmentations(augmentations)

In [None]:
models = {'phasenet-original': sbm.PhaseNet(phases='NPS').from_pretrained('original')}
for key, model in models.items():
    model.cuda()

In [None]:
from scipy.ndimage import label

In [None]:
def find_peaks(data, treshold):
    mask = data > treshold
    labeled, num_features = label(mask)
    peaks = []
    for i in range(1, num_features + 1):
        segment_indices = np.where(labeled == i)[0]
        segment_values = data[segment_indices]
        max_index = np.argmax(segment_values)
        max_index_in_segment = segment_indices[np.argmax(segment_values)]
        peaks.append(max_index_in_segment)
    return peaks

In [None]:
from tqdm import tqdm

In [None]:
lst = []
for ii in tqdm(range(len(metadata))):
    data = generator[ii]
    data_X = data['X']
    lst.append(data_X.shape)
for index, el in enumerate(lst):
    if el != (3, 3001):
        print(f'{el=}\t{index=}\t{lst.count(el)=}')

In [None]:
treshold = 0.3
metadata = dataset.metadata.copy()
for key, model in models.items():
    for phase in ['P', 'S']:
        metadata[f'trace_{phase}_{key}-AutoPik'] = None  # or object
        metadata = metadata.astype({f'trace_{phase}_{key}-AutoPik': 'object'}, copy=False)

for ii in tqdm(range(len(metadata))):
    data = generator[ii]
    data_X = data['X']
    if data_X.shape != (3, 3001):
        continue
    # plt.plot(data_X.T); plt.show()
    for key, model in models.items():
        with torch.no_grad():
            data_X = torch.tensor(data_X, device=model.device).unsqueeze(0)
            pred = model(data_X)
            pred = pred.cpu().detach().numpy().squeeze()
        p_peaks = find_peaks(pred[1], treshold=treshold)
        s_peaks = find_peaks(pred[2], treshold=treshold)
        # print(p_peaks, s_peaks)
        metadata.at[ii, f'trace_P_{key}-AutoPik'] = p_peaks
        metadata.at[ii, f'trace_S_{key}-AutoPik'] = s_peaks
        #
        # plt.plot(pred.T[:, 1:]); plt.legend([_ for _ in 'PS'])
        # plt.vlines(p_peaks, ymin=0, ymax=1, colors='red')
        # plt.vlines(s_peaks, ymin=0, ymax=1, colors='blue')
        # results[key] = pred
    # plt.show()
    # if ii == 1:
    #     break
path = r'F:\DataSets-Local\Merged_All_DataSets_2025-07-10 (Ahar-Ilam-Kaki-Qeshm)\metadata-with-AutoPicks.pkl'
metadata.to_pickle(path)

    

In [None]:
import pandas as pd

In [None]:
def find_ps_pairs(metadata):
    keys = metadata.keys()
    df_p = metadata[[key for key in keys
                     if (key.upper().startswith('trace_P'.upper())
                         and
                         key.upper().endswith('_arrival_sample'.upper())
                         )
                    ]]
    p_condition = df_p.notna().any(axis=1)
    ############################################################################
    df_s = metadata[[key for key in keys
                     if (key.upper().startswith('trace_S'.upper())
                         and
                         key.upper().endswith('_arrival_sample'.upper())
                         )
                    ]]
    s_condition = df_s.notna().any(axis=1)
    ############################################################################
    ps_pairs_condition = s_condition == p_condition
    return ps_pairs_condition

In [None]:
df = pd.read_pickle(path)

In [None]:
df['PS-pairs'] = find_ps_pairs(metadata=df)

In [None]:
keys = [key for key in df.keys() if key.endswith('AutoPik')]
keys

In [None]:
df0 = df[df['PS-pairs']]

In [None]:
df1 = df0[~df0[keys[0]].isna().to_numpy()]

In [None]:
func = lambda x: len(x)
cond1 = (df1[keys[0]].apply(func)==1) & (df1[keys[1]].apply(func)==1)
sum(cond1)

In [None]:
cond2 = df1[cond1][keys[0]].apply(lambda x: abs(x[0]-500)<100)
sum(cond2)

In [None]:
df_good = df1[cond1][cond2]
index_good = df_good.index.to_list()
# df.iloc[index_good]

In [None]:
df_good['station_network_code'].hist()

In [None]:
df0['station_network_code'].hist()