In [1]:
import joblib 
import time
import pandas as pd
import os
import sys
import numpy as np
import threadpoolctl
import glob
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.processing import GatherFeatureDatasets
from scripts.pMagnitude import create_features

In [2]:
stat = "YUF"

# Time how long it takes to extract waveform information

In [3]:
archive_dir = '/uufs/chpc.utah.edu/common/home/koper-group4/bbaker/waveformArchive/archives/'
h5_archive_files = glob.glob(archive_dir + '/archive_????.h5')
catalog_dir = '/uufs/chpc.utah.edu/common/home/koper-group3/alysha/ben_catalogs/20240220'
arrival_catalog_3c = f'{catalog_dir}/currentEarthquakeArrivalInformation3CWithGains.csv'
arrival_catalog_3c_df = pd.read_csv(arrival_catalog_3c, dtype = {'location' : object})

In [4]:
# Just get one example
ex_arrival_df = arrival_catalog_3c_df[(arrival_catalog_3c_df.evid == 60001407) 
                      & (arrival_catalog_3c_df.station == stat) 
                      & (arrival_catalog_3c_df.phase == "P")]
ex_arrival_df

Unnamed: 0,evid,network,station,location,channelz,channel1,channel2,phase,arrival_id,arrival_time,...,low_freq_corners_2,high_freq_corners_z,high_freq_corners_1,high_freq_corners_2,channel_dip_z,channel_azimuth_z,channel_dip_1,channel_azimuth_1,channel_dip_2,channel_azimuth_2
987,60001407,WY,YUF,1,HHZ,HHN,HHE,P,10001508,1350270000.0,...,40.0,3.530537,3.530537,3.530537,-90.0,0.0,0.0,0.0,0.0,90.0


In [5]:
sys.path.insert(0, '/uufs/chpc.utah.edu/common/home/koper-group4/bbaker/mlmodels/features/np4_build')
import pyWaveformArchive as pwa 

archive_manager = pwa.ArchiveManager()
archive_manager.open_files_for_reading(h5_archive_files)

create_features(archive_manager, ex_arrival_df,
                magnitude_type = 'l',
                output_file = 'time_ex.csv')

In [6]:
with threadpoolctl.threadpool_limits(limits=1):
    start = time.perf_counter()
    for _ in range(1000):
        create_features(archive_manager, ex_arrival_df,
                        magnitude_type = 'l',
                        output_file = 'time_ex.csv')
    end = time.perf_counter()
    extraction_time = (end-start)/1000
    print("Time:", extraction_time)

Time: 0.009597017209976912


# Time how long it takes to compute features from extracted info

In [7]:
data_file = "/uufs/chpc.utah.edu/common/home/koper-group3/alysha/magnitudes/feature_splits/p.train.csv"
train_df = pd.read_csv(data_file)
example = train_df[train_df.station == "YUF"].iloc[0:1]
proc = GatherFeatureDatasets(is_p=True)
feature_dict, meta_dict, feature_names = proc.process_station_datasets(stat,
                                                                        example, 
                                                                        scaler=False,
                                                                        linear_model=False,
                                                                        source_dist_type='dist')

YUF
X shape: (1, 45), y shape: (1,)


In [8]:
with threadpoolctl.threadpool_limits(limits=1):
    start = time.perf_counter()
    for _ in range(1000):
        proc.get_X_y(example,
                freq_max=18,
                scaler=False,
                source_dist_type='dist',
                linear_model=False,
                target_column='Event-Mean-YPML-S',
                verbose=False)
    end = time.perf_counter()
    feature_comp_time = (end-start)/1000
    print("Time:", feature_comp_time)

Time: 0.007951987776905298


# Time how long it takes to scale the example and evaluate the model

In [9]:
modeldir = f"/uufs/chpc.utah.edu/common/home/koper-group3/alysha/magnitudes/p_models/selected_features_constHP"
model = joblib.load(os.path.join(modeldir, f"{stat}.P.SVR.joblib"))
scaler = joblib.load(os.path.join(modeldir, f"{stat}.P.scaler.joblib"))


In [10]:
selected_feats = ["amp_1",
                    "amp_2",
                    "signal_variance",
                    "noise_variance",
                    "source_depth_km",
                    "source_receiver_distance_logkm",
                    "source_receiver_back_azimuth_deg"]

In [11]:
feature_subset_inds = np.where(np.isin( feature_names, selected_feats,))[0]

In [12]:
filtered_example = feature_dict["X_train"][0:1, feature_subset_inds]
filtered_example.shape

(1, 7)

In [13]:
with threadpoolctl.threadpool_limits(limits=1):
    start = time.perf_counter()
    for _ in range(10000):
        #scaled_ex = scaler.transform(ex)
        model.predict(scaler.transform(filtered_example))
    end = time.perf_counter()
    scale_eval_time = (end-start)/10000
    print("Time:", scale_eval_time)

Time: 0.00041623956002295016


# Estimate total time

In [20]:
total_time = extraction_time + feature_comp_time + scale_eval_time
print(f"Total time: {total_time:0.4f} s")

Total time: 0.0180 s


In [22]:
total_time*52

0.9341927164390682