# QuakeML to REA
The goal of this notebook is to merge events from the QuakeML file into the REA database we created from the MiniSEED files in WAV.

In [17]:
import os
import glob
import pickle
from obspy import UTCDateTime, read, read_events
from obspy.core.event import Comment #Event, Origin, Arrival, Pick, WaveformStreamID, Catalog

SOURCE_DIR = '/data/Pinatubo/PHASE'
REPO_DIR = '/home/thompsong/Developer/Pinatubo1991SeismicData'
REA_DIR = '/data/SEISAN_DB/REA/PINAT2'
WAV_DIR = '/data/SEISAN_DB/WAV/PINAT'
catalogqml = os.path.join(REPO_DIR, 'metadata', 'pinatubo_catalog.xml') 
wavqmlfile = os.path.join(REPO_DIR, 'metadata', 'pinatubo_wavcatalog.xml') 
catalogpkl = catalogqml.replace('.xml', '.pkl')
if os.path.isfile(catalogpkl):
    with open(catalogpkl, "rb") as f:
        catalog = pickle.load(f)
else:
    catalog = read_events(catalogqml)
    with open(catalogpkl, "wb") as f:
        pickle.dump(catalog, f)


We need to try to associate each event in the catalog (which comes from picks, not waveforms), with a corresponding S-file. 

Let's associate each ObsPy Event object in the ObsPy Catalog object with the best matching Seisan WAV file.

In [20]:
import pandas as pd
def make_miniseed_dataframe(wav_directory):
    """
    Associates MiniSEED files from a Seisan WAV database with Pick objects in an ObsPy Catalog.
    If a Pick's time falls within a MiniSEED file's time range, a Comment is added to the Event.

    Parameters:
        catalog (obspy.core.event.Catalog): The ObsPy Catalog containing Events with Picks.
        wav_directory (str): Path to the Seisan WAV database (e.g., "WAV/").

    Returns:
        obspy.core.event.Catalog: The updated Catalog with associated MiniSEED file paths in Comments.
    """

    # 🔹 Step 1: Load MiniSEED files and extract time windows
    #miniseed_files = sorted(glob.glob(os.path.join(f"{wav_directory}/1991/??/1991*"), recursive=True))  # Adjust pattern if needed
    pattern = f"{wav_directory}/1991/[0-1][0-9]/1991*"
    miniseed_files = sorted(glob.glob(pattern))
    miniseed_time_ranges = []

    for file in miniseed_files:
        print(file)
        try:
            stream = read(file)
            start_time = min(tr.stats.starttime for tr in stream)
            end_time = max(tr.stats.endtime for tr in stream)
            miniseed_time_ranges.append((file, start_time, end_time))
        except Exception as e:
            print(f"❌ Error reading {file}: {e}")
    
    print(f"✅ Loaded {len(miniseed_time_ranges)} MiniSEED files for association.")
    # Create a pandas DataFrame
    df = pd.DataFrame(miniseed_time_ranges, columns=['file', 'start_time', 'end_time'])
    return df

print('catalog=',catalog)
csv_filename = os.path.join(REPO_DIR, 'metadata', "miniseed_time_ranges.csv")
if os.path.isfile(csv_filename):
    df = pd.read_csv(csv_filename, index_col=None)
    
    # Convert DataFrame columns to ObsPy UTCDateTime objects
    df["start_time"] = df["start_time"].apply(UTCDateTime)
    df["end_time"] = df["end_time"].apply(UTCDateTime)    
else:
    df = make_miniseed_dataframe(WAV_DIR)
    if len(df)>0:
        df.to_csv(csv_filename, index=False)
display(df)

catalog= 11135 Event(s) in Catalog:
1970-03-02T13:05:08.030000Z | None, None
1970-03-02T13:05:08.030000Z | None, None
...
1991-08-21T15:51:45.640000Z | None, None
1991-08-21T15:51:45.640000Z | None, None
To see all events call 'print(CatalogObject.__str__(print_all=True))'


Unnamed: 0,file,start_time,end_time
0,/data/SEISAN_DB/WAV/PINAT/1991/05/1991-05-01-0...,1991-05-01T03:10:09.950000Z,1991-05-01T03:10:50.572400Z
1,/data/SEISAN_DB/WAV/PINAT/1991/05/1991-05-01-0...,1991-05-01T03:39:42.521000Z,1991-05-01T03:40:58.696680Z
2,/data/SEISAN_DB/WAV/PINAT/1991/05/1991-05-01-1...,1991-05-01T10:45:26.647000Z,1991-05-01T10:46:12.348440Z
3,/data/SEISAN_DB/WAV/PINAT/1991/05/1991-05-01-1...,1991-05-01T10:49:40.597000Z,1991-05-01T10:50:26.298440Z
4,/data/SEISAN_DB/WAV/PINAT/1991/05/1991-05-01-1...,1991-05-01T11:20:14.116000Z,1991-05-01T11:20:59.817440Z
...,...,...,...
21446,/data/SEISAN_DB/WAV/PINAT/1991/12/1991-12-05-2...,1991-12-05T21:12:24.596000Z,1991-12-05T21:13:40.771680Z
21447,/data/SEISAN_DB/WAV/PINAT/1991/12/1991-12-05-2...,1991-12-05T21:19:46.469000Z,1991-12-05T21:21:02.644680Z
21448,/data/SEISAN_DB/WAV/PINAT/1991/12/1991-12-05-2...,1991-12-05T21:22:03.602000Z,1991-12-05T21:23:19.777680Z
21449,/data/SEISAN_DB/WAV/PINAT/1991/12/1991-12-05-2...,1991-12-05T23:13:17.408000Z,1991-12-05T23:14:33.583680Z


Now we need to loop through each Event and try to match this origin time against the wfdisc dataframe, and look for the corresponding S-file name. Then replace that S-file with a new one we write with _write_nordic. or write a new event, if no match found.

In [28]:
import pandas as pd
import shutil
from obspy.io.nordic.core import blanksfile, _write_nordic
def associate_miniseed_with_events(catalog, df):
    """
    Associates MiniSEED files from a Seisan WAV database with Pick objects in an ObsPy Catalog.
    If a Pick's time falls within a MiniSEED file's time range, a Comment is added to the Event.

    Parameters:
        catalog (obspy.core.event.Catalog): The ObsPy Catalog containing Events with Picks.
        wav_directory (str): Path to the Seisan WAV database (e.g., "WAV/").

    Returns:
        obspy.core.event.Catalog: The updated Catalog with associated MiniSEED file paths in Comments.
    """

    # Iterate through Events & Picks to check time association
    total_matched = 0
    total_unmatched = 0

    for event in catalog:
        associated_files = set()  # Track unique MiniSEED files per event
        
        for pick in event.picks:
            pick_time = pick.time  # Extract pick time

            # Filter DataFrame to find matching file_path
            matching_rows = df[(df["start_time"] <= pick_time) & (df["end_time"] >= pick_time)]
            
            # Add matching file paths to the set
            matching_files = matching_rows["file"].tolist()
            if matching_files:
                associated_files.update(matching_files)

        # Print or use associated_files as needed
        print(f"Event: {event.resource_id}, Associated Files: {associated_files}") 

        # Add MiniSEED file paths as Comments in Event
        if associated_files:
            associated_files = sorted(list(associated_files))
            basenames = [os.path.basename(f) for f in associated_files]
            comment_text = "Associated MiniSEED files:\n" + "\n".join(basenames)
            event.comments.append(Comment(text=comment_text))
            print(f"📌 Added MiniSEED association for Event at {event.origins[0].time}")

        otime = event.origins[0].time
        ymdir = os.path.join(REA_DIR, otime.strftime('%Y'), otime.strftime('%m'))
        os.makedirs(ymdir, exist_ok=True)    

        num_wavfiles = len(associated_files)
        if num_wavfiles>0:
            # rewrite the S-file, with the picks too
            wavfile = associated_files[-1]
            st = read(wavfile, format='MSEED')
            stime = st[0].stats.starttime
            filename = stime.strftime("%d-%H%M-%S") + "L.S" + stime.strftime("%Y%m") 
            print(f'Found {num_wavfiles} matching WAVs. creating {filename} in {ymdir}')
            _write_nordic(event, filename, userid='gt  ', evtype='L', outdir=ymdir, wavefiles=os.path.basename(wavfile), 
                      explosion=False, nordic_format='OLD', overwrite=True, high_accuracy=False)  
            total_matched += 1
        else:
            # write a new sfile with the picks
            ymdir2 = ymdir.replace('PINAT', 'PINATnew')
            os.makedirs(ymdir2, exist_ok=True)    
            sfilename = blanksfile('', 'L', 'gt  ', evtime=otime, nordic_format='OLD')
            sfile_path = os.path.join(ymdir2, sfilename) 
            print(f'Found {num_wavfiles} matching WAVs. creating {sfilename} in {ymdir2}')
            shutil.move(sfilename, sfile_path)
            total_unmatched += 1
        print(f'Matched {total_matched}, Unmatched {total_unmatched}')
         
# Associate MiniSEED files with events
wav_catalog = catalog.copy()
associate_miniseed_with_events(wav_catalog, df)

# Save the updated catalog with comments
wav_catalog.write(wavqmlfile, format="QUAKEML")
print(f"\n✅ Updated catalog saved as {wavqmlfile}")

Event: smi:local/38f0539c-f72c-47b6-ae20-450a556f4d12, Associated Files: set()
Found 0 matching WAVs. creating 02-1305-09L.S197003 in /data/SEISAN_DB/REA/PINATnew2/1970/03
Matched 0, Unmatched 1




Event: smi:local/416d7503-22fc-42a6-863c-7db30b0923fa, Associated Files: set()
Found 0 matching WAVs. creating 02-1305-09L.S197003 in /data/SEISAN_DB/REA/PINATnew2/1970/03
Matched 0, Unmatched 2
Event: smi:local/6fd8a2f8-329d-4987-8fdb-ca8f6185bc1f, Associated Files: {'/data/SEISAN_DB/WAV/PINAT/1991/05/1991-05-01-0310-09M.PINAT_005'}
📌 Added MiniSEED association for Event at 1991-05-01T03:10:19.170000Z
Found 1 matching WAVs. creating 01-0310-09L.S199105 in /data/SEISAN_DB/REA/PINAT2/1991/05
Matched 1, Unmatched 2




Event: smi:local/df4fef67-7c57-4b2c-bf60-64b89a82e609, Associated Files: {'/data/SEISAN_DB/WAV/PINAT/1991/05/1991-05-01-0339-42M.PINAT_005'}
📌 Added MiniSEED association for Event at 1991-05-01T03:39:53.670000Z
Found 1 matching WAVs. creating 01-0339-42L.S199105 in /data/SEISAN_DB/REA/PINAT2/1991/05
Matched 2, Unmatched 2
Event: smi:local/fbddf66a-2bf3-4fbe-901a-320d8d3e721a, Associated Files: {'/data/SEISAN_DB/WAV/PINAT/1991/05/1991-05-01-1045-26M.PINAT_004'}
📌 Added MiniSEED association for Event at 1991-05-01T10:45:39.420000Z
Found 1 matching WAVs. creating 01-1045-26L.S199105 in /data/SEISAN_DB/REA/PINAT2/1991/05
Matched 3, Unmatched 2
Event: smi:local/d7d1a59c-d5dc-4bb6-9c43-c47577b8ee11, Associated Files: {'/data/SEISAN_DB/WAV/PINAT/1991/05/1991-05-01-1049-40M.PINAT_004'}
📌 Added MiniSEED association for Event at 1991-05-01T10:49:51.510000Z
Found 1 matching WAVs. creating 01-1049-40L.S199105 in /data/SEISAN_DB/REA/PINAT2/1991/05
Matched 4, Unmatched 2
Event: smi:local/44641197-fc

NordicParsingError: Pick is 6940 days from the origin, must be < 48 hours