# Catalogging Exercise  
*:auth: Nate Stevens (PNSN)*

In this exercise we'll use ObsPy and ObsPlus to create a highly translatable earthquake
catalog of located events from our analyses and those in the USGS Comprehensive Catalog
(ComCat) and do some quick intercomparisons between the two. Finally we'll save these
catalogs into a variety of standardized formats (schema) that are easy to re-load and
re-analyze with ObsPy and Pandas.

In [None]:
import os
from pathlib import Path
from glob import glob
import pandas as pd
from obspy import read_events, UTCDateTime
import obsplus
from obspy.geodetics import gps2dist_azimuth, kilometer2degrees
from obspy.clients.fdsn import Client


In [None]:
# TODO: Make sure this points at wherever you saved your HypoDD outputs
ROOT = Path.cwd()
DATA = ROOT/'data'
CATD = ROOT/'catalog_files'
os.makedirs(str(CATD), exist_ok=True)
print(f'The data directory is registered as {DATA}')

In [None]:
# Load the HypoDD output into an ObsPy `Catalog` object
flist = glob(str(DATA/'*.pha'))
for _e, _f in enumerate(flist):
    if _e == 0:
        cat = read_events(_f)
    else:
        cat += read_events(_f)


display(cat)

In [None]:
# Use ObsPlus to show a DataFrame representation of events (takes a little time)
df_events = cat.to_df()

## Look at all those empty fields, just waiting for you to populate them!

In [None]:
# Display our new table (conveniently formatted in nearly ANSS EVENT table format!)
display(df_events)

## Althought the ObsPlus documentation is sometimes sparese on examples, their coding is quite good!
Let's turn all of our picks into a dataframe

In [None]:
# Turns out the *.pha I/O reader has a little bug, so we need to apply a small correction to assign network and station codes to the correct fields
try:
    df_picks = cat.arrivals_to_df()
except:
    for event in cat.events:
        for pick in event.picks:
            sn = pick.waveform_id.station_code
            pick.waveform_id.station_code=sn.split('.')[0]
            pick.waveform_id.network_code=sn.split('.')[1]
    df_picks = cat.arrivals_to_df()

In [None]:
display(df_picks)

In [None]:
# Let's populate some source-receiver geometry information
client = Client('IRIS')
nets = ','.join(list(df_picks.network.unique()))
stas = ','.join(list(df_picks.station.unique()))
inv = client.get_stations(network=nets, station=stas, level='channel',starttime=UTCDateTime('20221220'), endtime=UTCDateTime('20221221'))

# Use ObsPlus added methods to convert the inventory into a dataframe
df_stations = inv.to_df()

display(df_stations)

In [None]:
# Add the maximum azimuthal gap to each origin
# Here's a starting point:

# Iterate across events
origin_gaps = []
for event in cat.events:
    # Iterate across origins
    for origin in event.origins:
        olon = origin.longitude
        olat = origin.latitude
        # Iterate across associated arrivals
        bazs = set([])
        for arrival in origin.arrivals:
            # Get pick observations
            pick = arrival.pick_id.get_referred_object()
            # Get station location
            network = pick.waveform_id.network_code
            station = pick.waveform_id.station_code
            _df_sta = df_stations[(df_stations.network==network) & (df_stations.station==station)][['station','network','latitude','longitude']]
            try:
                slon = _df_sta.longitude.values[0]
                slat = _df_sta.latitude.values[0]
            except:
                continue
            # Get distances
            dist_m, seaz, esaz = gps2dist_azimuth(slat, slon, olat, olon)
            # Convert distance to degrees
            arrival.distance = kilometer2degrees(dist_m*1e-3)
            # Assign back-azimuth
            arrival.azimuth = esaz
            bazs.add(esaz)
        # Calculate gaps
        bazs = list(bazs)
        bazs.sort()
        gaps = [bazs[_e+1] - bazs[_e] for _e in range(len(bazs)-1)] + [360 - bazs[-1] + bazs[0]]
        # Get maximum azimuthal gap
        maxgap = max(gaps)
        # associate with resourceID
        origin_gaps.append([origin.resource_id.id, maxgap])

display(pd.DataFrame(origin_gaps, columns=['resource_id','gap']))

# Now that we've populated an ObsPy Catalog object, we can write into a bunch of different formats