## FDSN Waveform Downloader & Converter

Downloads I06AU and I52GB waveforms from IRIS FSDN services using Obpsy Mass Downloader and then converts them the .SAC file with it's stats for further processing. 

This notebook consists of 3 parts:
1. Dowloading Data using Obspy Mass Downloader
2. Conversion of MSEED Files to .SAC
3. Conversion of .SAC files to CSV

In [1]:
import sys
sys.path.append('/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/')

import obspy
from obspy import UTCDateTime, read, read_inventory
from obspy.clients.fdsn.mass_downloader import GlobalDomain, \
    Restrictions, MassDownloader
from obspy.core.util.attribdict import AttribDict
import os
import subprocess
import glob
from tqdm.notebook import tqdm
from tqdm import tqdm
from src.utils.converter import *
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor

### 1. Downloading Data Using Obspy Mass Downloader

Download the data ranging from 2018-06-24T00:00:00 until 2019-09-03T00:00:00

In [None]:
# 1.1 I06AU Waveform Download

domain = GlobalDomain()

restrictions = Restrictions(
    # Get data for a whole year.
    starttime=obspy.UTCDateTime(2018, 6, 24),
    endtime=obspy.UTCDateTime(2019, 9, 3),
    # Chunk it to have one file per day.
    chunklength_in_sec=86400,
    network="IM", station="I06H*", location="", channel="BDF",
    # The typical use case for such a data set are noise correlations where
    # gaps are dealt with at a later stage.
    reject_channels_with_gaps=False,
    # Same is true with the minimum length. All data might be useful.
    minimum_length=0.0,
    # Guard against the same station having different names.
    minimum_interstation_distance_in_m=100.0)

mdl = MassDownloader(providers=["IRIS"])
mdl.download(domain, restrictions, mseed_storage="waveform_collection/I06AU/WAVEFORM_I06AU_MSEED",
             stationxml_storage="waveform_collection/I06AU/I06AU_STATIONS")

In [None]:
# 1.2 I52GB Waveform Download

domain = GlobalDomain()

restrictions = Restrictions(
    starttime=obspy.UTCDateTime(2018, 6, 24),
    endtime=obspy.UTCDateTime(2019, 9, 3),
    chunklength_in_sec=86400,
    network="IM", station="I52H*", location="", channel="BDF",
    reject_channels_with_gaps=False,
    minimum_length=0.0,
    minimum_interstation_distance_in_m=100.0)


mdl = MassDownloader(providers=["IRIS"])
mdl.download(domain, restrictions, mseed_storage="/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I52GB/I52GB_MSEED",
             stationxml_storage="/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I52GB/I52GB_STATIONS")

### 2. Conversion of MSEED Files to .SAC
Converting MSEED Files to SAC Complete with Important Headers

In [2]:
# 2.1 I06AU Waveform Conversion

input_directory = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I06AU/I06AU_MSEED'
output_directory = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I06AU/I06AU_SAC'
stationxml_directory = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I06AU/I06AU_STATIONS'

# Run the Function
mseed_to_sac(input_directory, output_directory, stationxml_directory)

In [None]:
        # 2.2 I52GB Waveform Conversion

input_directory = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I52GB/I52GB_MSEED'
output_directory = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I52GB/I52GB_SAC'
stationxml_directory = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I52GB/I52GB_STATIONS'

# Run the Function
mseed_to_sac(input_directory, output_directory, stationxml_directory)


### 3. Conversion of SAC files to CSV

In [2]:
# 3.1 I06AU SAC to csv conversion

folder_path = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I06AU/I06AU_SAC'
output_folder = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I06AU/I06AU_CSV'
freq = 0.1
df_grouped_file_paths = sac_path_grouping(folder_path)

# Iterate over the DataFrame rows and process each file path
with concurrent.futures.ProcessPoolExecutor() as executor:
    # Store future objects along with the corresponding file pattern for error handling
    future_to_file = {executor.submit(sac_to_csv, row['GroupedFilePath'], output_folder, freq): row['GroupedFilePath'] for index, row in df_grouped_file_paths.iterrows()}

    # Process completed futures
    for future in tqdm(concurrent.futures.as_completed(future_to_file), total=len(future_to_file), desc="Processing SAC files"):
        file_path_pattern = future_to_file[future]
        try:
            # Get the result of the future
            csv_output_path = future.result()
            print(f'CSV file created at: {csv_output_path}')
        except Exception as e:
            print(f"Error processing file pattern {file_path_pattern}: {e}")

Processing SAC files:   0%|          | 0/436 [00:00<?, ?it/s]

In [None]:
# 3.1 I52GB SAC to csv conversion

folder_path = '/run/media/viblab/Markov11/Haykal/AnakKrakatauEWS/waveform_collection/I06AU/WAVEFORM_I06AU_SAC'
output_folder = '/run/media/viblab/Markov11/Haykal/AnakKrakatauEWS/waveform_collection/I06AU/WAVEFORM_I06AU_CSV'
freqmin = 0.7 
freqmax = 4.0 
df_grouped_file_paths = sac_path_grouping(folder_path)

# Iterate over the DataFrame rows and process each file path
for index, row in df_grouped_file_paths.iterrows():
    file_path_pattern = row['GroupedFilePath']
    try:
        # Process each file path pattern with the sac_to_csv function
        csv_output_path = sac_to_csv(file_path_pattern, output_folder, freqmin, freqmax)
        print(f'CSV file created at: {csv_output_path}')
    except Exception as e:
        print(f"Error processing file pattern {file_path_pattern}: {e}")

### 7. Running Spectral Yield Detection

In [11]:
!infrapy run_spye regional -h


Usage: infrapy run_spye regional [OPTIONS]

  Run Spectral Yield Estimation (SpYE) methods to estimate the equivalent TNT
  yield of an above-ground explosion using a single set of transmission loss
  models (TLMs)

  Example usage (run from infrapy/examples directory):
      infrapy run_spye regional --local-wvfrms '../infrapy-data/hrr-5/*/*.sac' --local-detect-label data/HRR-5.dets.json --src-lat 33.5377 --src-lon -106.333961 --tlm-label "../infrapy/propagation/priors/tloss/2007_08-" --local-yld-label "HRR-5"

Options:
  --config-file TEXT         Configuration file
  --local-wvfrms TEXT        Local waveform data files
  --fdsn TEXT                FDSN source for waveform data files
  --db-config TEXT           Database configuration file
  --local-detect-label TEXT  Detection results path
  --local-loc-label TEXT     Localization results path
  --local-yld-label TEXT     Output file for results
  --tlm-label TEXT           Transmission loss model (TLM) path
  --src-lat TEXT        

In [3]:
csv_folder = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I06AU/I06AU_CSV'
detection_json = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/processed/I06AU.dets.json'
output_folder = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/processed/I06AU_FULL'

extract_det_from_csv(csv_folder, detection_json, output_folder)

Saved trimmed data to /run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/processed/I06AU_FULL/IM_det_635_20190105_000105_to_20190105_235550.csv


Processing detections for IM.I06H_..BDF__20180917T000000Z__20180918T000000Z.csv:   0%|          | 0/906 [00:00…

Processing detections for IM.I06H_..BDF__20190722T000000Z__20190723T000000Z.csv:   0%|          | 0/906 [00:00…

Processing detections for IM.I06H_..BDF__20181016T000000Z__20181017T000000Z.csv:   0%|          | 0/906 [00:00…