# Earthquake Data Download
This notebook will used selected events in step 2.
Then download the `mseed` file and associated `inventory (txt)` files for each events for events in the study area.
Since this is a fairly small study area start time of 30s before the event time will be enough for the most distant ones as well.

---

### Import all necessary libraries here, this will make things easier later

In [2]:
import os
import time
from tqdm.auto import tqdm
import glob

import pandas as pd
import numpy as np
from obspy.clients.fdsn import Client
from obspy import UTCDateTime, read_inventory, Inventory, read, Stream
import matplotlib.pyplot as plt


# Setup all directories

In [3]:
# set up the project directory
project_dir = f"{os.getcwd()}/../"
data_dir = os.path.join(project_dir, "data/eq_data") # include data folders for eq catalogs, waveforms, and station catalogs
catalog_dir = os.path.join(data_dir, "1_eq_catalogs") # all eq catalogs: orig and edited
waveform_dir = os.path.join(data_dir, "2_waveforms") # mseed files
station_dir = os.path.join(data_dir, "3_station_catalogs") # station xml files

selected_eq = f'{catalog_dir}/usgs_aboveslab_3D_grid_coarse.csv'
selected_eq_df = pd.read_csv(selected_eq, parse_dates=['time'])

## 2.1 Make a master inventory file
As we select events from 2008 to end of 2023, a master station inventory with all stations within this timeframe should work for all operations.

In [5]:
# define the datacenters and channel list
client_list = ['IRIS', 'NCEDC', 'SCEDC']
channel_list = ['HHZ', 'BHZ', 'HNZ', 'EHZ', 'SHZ', 'DPZ', 'SLZ', 'CNZ', 'HN3', 'HH3', 'ENZ']
channel_list = ','.join(channel_list)

# time window for the waveform
starttime = UTCDateTime("2008-01-01")
endtime = UTCDateTime("2024-01-01")

# boundary 
minlat = 39.75 - 2
maxlat = 41.5 + 2
minlon = -125.5 - 2
maxlon = -123 + 3

# create an empty inventory object
merged_inventory = Inventory()

# Loop through each client (IRIS, NCEDC, SCEDC data centers)
for client_name in client_list:
    client = Client(client_name, debug=False, timeout=60)
    try:
        inv = client.get_stations(
            network="*",
            station="*",
            location="*",
            channel=channel_list,
            starttime=starttime,
            endtime=endtime,
            level="channel", # since we spacify the channel, we must use level=channel
            minlatitude=minlat,
            maxlatitude=maxlat,
            minlongitude=minlon,
            maxlongitude=maxlon,
        )
        merged_inventory.extend(inv.networks)
        
    except Exception as e:
        print(f"Error fetching data from {client_name}: {e}")    

# cleanup before writing
merged_inventory.write(f"temp_inv.txt", format="STATIONTXT")
inv = pd.read_csv("temp_inv.txt", delimiter='|').drop_duplicates(
    subset=['#Network', 'Station', 'Location', 'Channel'])
inv.to_csv('temp_inv.txt', sep='|', index=False)
inv = read_inventory('temp_inv.txt', format='STATIONTXT')
inv.write(f"{station_dir}/00_station_inventory_master.xml", format="STATIONXML")
inv.write(f"{station_dir}/00_station_inventory_master.txt", format="STATIONTXT")

In [7]:

print(sta_df.shape[0])
sta_df
print(sta_df.shape[0])

sta_df.to_csv(f"{station_dir}/00_station_inventory_master_921.txt", index=False, sep='|')

23520
6466


# 3. Download `.mseed` [and (optional) station_data]

## Read the inventory files already downloaded

Here I will read the inventory file which contain details about all the stations that recorded a particular earthquake event. \
From that inventory file I will get all the necessary informations I need to download the seismic data (a numpy timeseries in `.mseed` format). \
I will also download the metadata for that record in `.xml & .txt` formats.\

This process will use `multiprocessing.Pool.imap_unordered` module for paraller processing of the download.\
For the code see `./code/my_funcs/get_waveforms_parallel_v3.py` where I defined the download fuction combined with parallel processing. \
This significantly improves the runtime.


client_list = ['IRIS', 'NCEDC', 'SCEDC']
channels_string = 'HH*,BH*,HN*,EH*'                   # by default given to get_waveforms_parallel function
priority_channels = ['HH*', 'BH*', 'HN*', 'EH*']

 ### Class function [Latest (after v3)]

In [3]:
from classes_functions.get_WF_parallel_class import GetWFInvParallel
%reload_ext autoreload
%autoreload 2

os.makedirs(f"{waveform_dir}/inv_txt", exist_ok=True)
os.makedirs(f"{waveform_dir}/xml", exist_ok=True)

# get a list of mseed files without folder names
mseed_files = [os.path.basename(f) for f in glob.glob(f"{waveform_dir}/*.mseed")]

client_list = ['IRIS', 'NCEDC', 'SCEDC']
priority_channels_list = ['HHZ', 'BHZ', 'HNZ', 'EHZ']

barlen = len(selected_eq_df)# - len(mseed_files)
progress_bar = tqdm(total=barlen, desc="Downloading events", dynamic_ncols=True)

# loop through the selected events and download the waveforms
for i, row in selected_eq_df.iterrows():
    event_id = row.id

    # check if the event data is already downloaded
    if os.path.exists(f"{waveform_dir}/{event_id}.mseed"):
        print(f"Event {event_id} already downloaded")
        progress_bar.update(1)
        continue

    event_time = UTCDateTime(pd.to_datetime(row.time))
    starttime = event_time - 30
    endtime = event_time + 120

    # initialize the class
    gwfip  = GetWFInvParallel(starttime, endtime)

    # inventory file
    inv_xml_file = f"{waveform_dir}/xml/{event_id}_event_inv.xml"
    inv_txt_file = f"{waveform_dir}/inv_txt/{event_id}_event_inv.txt"

    # check existing 
    if os.path.exists(inv_xml_file):
        print('Using existing inventory')
        inv = read_inventory(inv_xml_file, format="STATIONXML")
    
    elif not os.path.exists(inv_xml_file) or len(inv) == 0:
        invdf = gwfip.get_inventory(
            radial_search=True,
            event_lat=row.latitude,
            event_lon=row.longitude,
        )
        # Drop duplicate stations, keep '**Z' channels [sorted by net-sta-cha]
        invdf = invdf.drop_duplicates(subset=['#Network', 'Station'], keep='last')
        invdf.to_csv(inv_txt_file, sep='|', index=False)
        inv = read_inventory(inv_txt_file, format="STATIONTXT")
        inv.write(inv_xml_file, format="STATIONXML")


    # get the waveforms for the event and save them
    t1 = time.time()

    # get the waveforms
    st, evinv = gwfip.get_waveforms_parallel(inv_txt_file)

    # save the waveforms if valid
    if len(st) > 0:
        st.write(f"{waveform_dir}/{event_id}.mseed", format="MSEED")
        print(f"Download complete for {i, event_id}:{len(st)}. Took {time.time()-t1:.2f} seconds")
        progress_bar.update(1)
        # break

    else:
        progress_bar.update(1)
        print(f"No waveforms downloaded for {event_id}")
        continue
        # break


Downloading events:   0%|          | 0/921 [00:00<?, ?it/s]

Event nc71356495 already downloaded
Event nc72075736 already downloaded
Event nc71648526 already downloaded
Event nc72182326 already downloaded
Event nc72182151 already downloaded
Event nc71977175 already downloaded
Event nc71759805 already downloaded
Event nc71755770 already downloaded
Event nc72090976 already downloaded
Event nc72082251 already downloaded
Event nc72182136 already downloaded
Event nc72807961 already downloaded
Event nc72930205 already downloaded
Event nc73397316 already downloaded
Event nc71810766 already downloaded
Event nc72239881 already downloaded
Event nc71166021 already downloaded
Event nc71180461 already downloaded
Event nc72538480 already downloaded
Event nc71102321 already downloaded
Event nc51214355 already downloaded
Event nc71118291 already downloaded
Event nc71163066 already downloaded
Event nc71592110 already downloaded
Event nc72729510 already downloaded
Event nc71157011 already downloaded
Event nc72310146 already downloaded
Event nc73794901 already dow

This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (884, 'nc72117960'):169. Took 17.01 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (885, 'nc72322446'):205. Took 17.02 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (886, 'nc73076250'):197. Took 16.68 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (887, 'nc73074680'):193. Took 14.69 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (888, 'nc72318396'):201. Took 16.58 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (889, 'nc73683196'):235. Took 18.52 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (890, 'nc72090751'):169. Took 16.23 seconds


This might have a negative influence on the compatibility with other programs.


Download complete for (891, 'nc72941716'):186. Took 16.14 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (892, 'nc72074151'):170. Took 14.70 seconds


This might have a negative influence on the compatibility with other programs.


Download complete for (893, 'nc71388040'):117. Took 12.61 seconds
Event nc73316461 already downloaded


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (895, 'nc72736795'):154. Took 14.99 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (896, 'nc51203864'):157. Took 14.13 seconds


This might have a negative influence on the compatibility with other programs.


Download complete for (897, 'nc51222078'):137. Took 14.29 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (898, 'nc71958960'):200. Took 16.95 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (899, 'nc72318721'):203. Took 15.87 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (900, 'nc51207671'):152. Took 12.63 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (901, 'nc71842710'):173. Took 21.03 seconds
Event nc73255220 already downloaded


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (903, 'nc72909621'):168. Took 14.40 seconds
Event nc51207214 already downloaded
Event nc72942761 already downloaded


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (906, 'nc72895391'):150. Took 12.40 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (907, 'nc73659055'):221. Took 17.99 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (908, 'nc72705351'):143. Took 12.84 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (909, 'nc73619486'):205. Took 14.88 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (910, 'nc72706951'):138. Took 12.46 seconds
Event nc40238541 already downloaded


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (912, 'nc40231307'):134. Took 14.35 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (913, 'nc73955565'):239. Took 17.91 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (914, 'nc73782386'):223. Took 15.14 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (915, 'nc72484636'):182. Took 13.52 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (916, 'nc72461801'):187. Took 15.59 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


Download complete for (917, 'nc71108755'):127. Took 12.90 seconds


This might have a negative influence on the compatibility with other programs.


Download complete for (918, 'nc72746035'):125. Took 11.99 seconds


This might have a negative influence on the compatibility with other programs.


Download complete for (919, 'nc71650676'):87. Took 9.96 seconds
Download complete for (920, 'nc71138195'):111. Took 10.12 seconds


This might have a negative influence on the compatibility with other programs.
This might have a negative influence on the compatibility with other programs.


In [4]:
mseed_files = [f for f in os.listdir(waveform_dir) if f.endswith('.mseed')]
print(len(mseed_files))

921


In [5]:
# # gather all the stationtxt files into one
# stationtxt_files = os.listdir(f"{waveform_dir}/inv_txt")

# # read all the stationtxt files into one dataframe
# stationtxt_df = pd.concat([pd.read_csv(f"{waveform_dir}/inv_txt/{f}", sep='|') for f in stationtxt_files])

# # drop duplicates
# stationtxt_df = stationtxt_df.sort_values(by=['#Network', 'Station', 'Channel'])
# stationtxt_df = stationtxt_df.drop_duplicates(subset=['#Network', 'Station', 'Channel'], keep='last')

# # save the stationtxt file
# stationtxt_df.to_csv(f"{data_dir}/3_station_catalogs/00_station_inventory_921.txt", sep='|', index=False)

# # read the stationtxt file
# station_inv = read_inventory(f"{data_dir}/3_station_catalogs/00_station_inventory_921.txt", format="STATIONTXT")
# station_inv.write(f"{data_dir}/3_station_catalogs/00_station_inventory_921.xml", format="STATIONXML")