# Download the inventory data for the following criterias
Select an earthquake event (by id).\
Get the station inventory within the study area which recorded this event.\
Use this station inventory data to download earthquake data (`.mseed`) and station data (`.txt` or `.xml` format).\
3-channel data for earthquakes are along x, y and z axis. Usually named as HHE, HHN and HHZ or HH1, HH2, HHZ.\
I want to select the following channels in order. If the 1st one is available, get that one only and stop. If not search for the 2nd one and so on.
1. HH*
2. BH*
3. HN*
4. EH* \
Lastly, 


In [2]:
import os
import time
import pandas as pd
import numpy as np
from obspy.clients.fdsn import Client
from obspy import UTCDateTime, read_inventory, Inventory, read, Stream

In [26]:
%reload_ext autoreload
# Read earthquake data
eqdf = pd.read_csv("../data/above_slab_eq_0.2_grid.csv", parse_dates=["time"])
eqdf = eqdf[eqdf.mag >= 2.5].reset_index(drop=True)

# Extract unique grid codes and sort them
unique_grid_codes = np.sort(eqdf['grid_code'].unique())
print(f"Number grids : {len(unique_grid_codes)}")

# create an empty dataframe to store the selected events
selected_eq = pd.DataFrame(columns=eqdf.columns)

# Loop through each grid code
for i in range(len(unique_grid_codes)):

    # select events in the grid
    temp_df = eqdf[eqdf.grid_code == unique_grid_codes[i]].reset_index(drop=True) # select events in the grid
    # sort the events by magnitude
    temp_df = temp_df.sort_values(by='mag', ascending=False).reset_index(drop=True)

    # Check if there are events in the grid
    if len(temp_df) > 0:
        # print(temp_df[['id', 'mag', 'time']])
        # check the depth values if 0 or 5 or 10 km fixed depth, select the next event
        for i, row in temp_df.iterrows():
            if row['depth'] == 0 or row['depth'] == 5 or row['depth'] == 10:
                continue
            else:
                selected_eq = pd.concat([selected_eq, row.to_frame().T], ignore_index=True)
                break



Number grids : 47


In [None]:
# make a data folder to store all data in the same folder
data_path = "../data/eq_data/all_data"
os.makedirs(data_path, exist_ok=True)

# Loop through each grid code
for i, row in selected_eq.iterrows():
    # get the details of the event
    event_id = row['id']

    print(f"{'#'*5} Selected event: {event_id}, mag: {row['mag']} in grid {unique_grid_codes[i]}")

    # if an event is already downloaded, skip
    if os.path.exists(f"{data_path}/{event_id}_station_inventory.txt"):
        print(f"{'#'*10} Event {event_id} already downloaded. Skipping...")
        continue

    # Get event time
    event_time = UTCDateTime(pd.to_datetime(row.time))
    starttime = event_time - 40
    endtime = event_time + 120

    # define the datacenters and channel list
    client_list = ['IRIS', 'NCEDC', 'SCEDC']
    channel_list = 'HH*,BH*,HN*,EH*' # select broadband and high sample rate channels

    # Create a folder for the event
    output_folder = data_path

    merged_inventory = Inventory()

    # Loop through each client (IRIS, NCEDC, SCEDC data centers)
    for client_name in client_list:
        client = Client(client_name, debug=False, timeout=60)
        try:
            inv = client.get_stations(
                network="*",
                station="*",
                location="*",
                channel=channel_list,
                starttime=starttime,
                endtime=endtime,
                level="channel",
                minlatitude=39,
                maxlatitude=42,
                minlongitude=-128,
                maxlongitude=-122.5, # extend the area by 0.5 deg
            )
            merged_inventory.networks.extend(inv.networks)
            
        except Exception as e:
            print(f"Error fetching data from {client_name}: {e}")    

    # write the whole inventory to a file
    # merged_inventory = merged_inventory.remove(network="SY")
    # merged_inventory.write(f"{data_path}/{event_id}_station_inventory.xml", format="STATIONXML")
    merged_inventory.write(f"{data_path}/{event_id}_station_inventory.txt", format="STATIONTXT")
    # merged_inventory.plot(projection="local", resolution="i", label=False, show=False);

    # break # test with only one grid



# Read the inventory files already downloaded 
# And download `.mseed` and station_data (`.xml & .txt`)

Here I will read the inventory file which contain details about all the stations that recorded a particular earthquake event. \
From that inventory file I will get all the necessary informations I need to download the seismic data (a numpy timeseries in `.mseed` format). \
I will also download the metadata for that record in `.xml & .txt` formats.\

This process will use `multiprocessing.Pool.imap_unordered` module for paraller processing of the download.\
For the code see `./code/my_funcs/get_waveforms_parallel_v3.py` where I defined the download fuction combined with parallel processing. \
This significantly improves the runtime.


In [83]:
# reload the module to get the latest changes
import sys
sys.path.append('./my_funcs')
# %load_ext autoreload
%autoreload 2
%reload_ext autoreload

import glob
import os 
from tqdm.notebook import tqdm
# import all the `get_waveforms` function
from my_funcs.get_waveforms_parallel_v3 import *

# define the client list i.e. the data centers to download data from
client_list = ['NCEDC', 'IRIS'] #, 'SCEDC']

# get a list of all the event id folders
event_paths = glob.glob("../data/eq_data/*")
event_ids = [os.path.basename(path) for path in event_paths if os.path.isdir(path)] # get the event ids from folder names

# Read earthquake data
eqdf = pd.read_csv("../data/above_slab_eq_df.csv", parse_dates=["time"])

# define the priority channels
priority_channels = ['HH*', 'BH*', 'HN*', 'EH*']

event_ids = ['nc73783911'] # test with one event #################################### change it ####################

# Create tqdm instance with the total number of iterations
progress_bar = tqdm(total=len(event_ids), desc="Downloading events")

# loop through each event id and download the data
for event_id in event_ids:

    # define the output folder
    output_folder = f"../data/eq_data/{event_id}/"

    # check if the event data is already downloaded
    if os.path.exists(f"../data/eq_data/{event_id}/event_waveforms.mseed"):
        print(f"Event {event_id} already downloaded. Skipping...")
        continue

    print(f"{' '*8}Getting data for event {event_id}")

    # Read the inventory
    inventory = read_inventory(f"../data/eq_data/{event_id}/inventory/station_inventory_{event_id}.xml")

    #get the event time, start time and end time
    eq = eqdf[eqdf.id == event_id] # get the event details
    event_time = UTCDateTime(pd.to_datetime(eq.time.values[0])) # get the event time in UTC format
    starttime = event_time - 30 # start time is 30 seconds before the event time
    endtime = event_time + 120 # end time is 120 seconds after the event time

    # Call the function with the desired parameters
    # this will downaload and write the data to a file, to change path, edit the function
    get_waveforms_parallel(client_list, inventory, starttime, endtime, output_folder, priority_channels)

    # update the progress bar
    progress_bar.update(1)

    # break # test with only one event

# close the progress bar
progress_bar.close()

Downloading events:   0%|          | 0/1 [00:00<?, ?it/s]

Event nc73783911 already downloaded. Skipping...



