Convert KSC event files

In [None]:
import os
import glob
import pandas as pd
from obspy import read, UTCDateTime, Stream
import numpy as np

def split_trace_at_midnight(tr):
    """
    Split a Trace at UTC midnight boundaries. Return list of Trace objects.
    """
    out = []
    t1 = tr.stats.starttime
    t2 = tr.stats.endtime

    while t1 < t2:
        next_midnight = UTCDateTime(t1.date) + 86400
        trim_end = min(t2, next_midnight)
        tr_piece = tr.copy().trim(starttime=t1, endtime=trim_end, nearest_sample=True)
        out.append(tr_piece)
        t1 = trim_end
    return out

def traces_overlap_and_match(tr1, tr2):
    latest_start = max(tr1.stats.starttime, tr2.stats.starttime)
    earliest_end = min(tr1.stats.endtime, tr2.stats.endtime)

    if latest_start >= earliest_end:
        return False, True

    tr1_overlap = tr1.slice(starttime=latest_start, endtime=earliest_end, nearest_sample=True)
    tr2_overlap = tr2.slice(starttime=latest_start, endtime=earliest_end, nearest_sample=True)

    if len(tr1_overlap.data) != len(tr2_overlap.data):
        return True, False

    return True, np.array_equal(tr1_overlap.data, tr2_overlap.data)

def write_trace_to_sds(tr, base_folder='/data/SDS_EVENTS'):
    pieces = split_trace_at_midnight(tr)

    for tr_piece in pieces:
        YYYY = tr_piece.stats.starttime.year
        JJJ = f'{int(tr_piece.stats.starttime.julday):03d}'
        sdsfolder = os.path.join(base_folder, str(YYYY), tr_piece.stats.network,
                                 tr_piece.stats.station, f'{tr_piece.stats.channel}.D')
        os.makedirs(sdsfolder, exist_ok=True)

        sdsfile = f'{tr_piece.id}.D.{YYYY}.{JJJ}'
        sdsfullpath = os.path.join(sdsfolder, sdsfile)

        if not os.path.isfile(sdsfullpath):
            tr_piece.write(sdsfullpath, format='MSEED')
        else:
            try:
                existing_st = read(sdsfullpath)
                match = True
                for existing_tr in existing_st.select(id=tr_piece.id):
                    overlap, values_match = traces_overlap_and_match(existing_tr, tr_piece)
                    if overlap and not values_match:
                        match = False
                        break

                if match:
                    combined = existing_st + tr_piece
                    combined.merge(method=1, fill_value=None)
                    combined.write(sdsfullpath, format='MSEED')
                    print(f"Merged and wrote to: {sdsfullpath}")
                else:
                    raise ValueError("Conflicting overlapping data")

            except Exception as e:
                print(f"Conflict or error: {e}")
                index = 1
                while True:
                    sdsindexpath = sdsfullpath + f'.{index:02d}'
                    if not os.path.isfile(sdsindexpath):
                        tr_piece.write(sdsindexpath, format='MSEED')
                        print(f"Wrote indexed file: {sdsindexpath}")
                        break
                    index += 1


def samplingrate2id(tr):
    fs = tr.stats.sampling_rate
    original_band_code = tr.stats.channel[0]
    broadband = 0
    if original_band_code in ['B', 'H', 'D', 'G']:
        broadband = 1
    if fs>=1000.0:
        band_code = 'FG'[broadband]
    elif fs>=250.0:
        band_code = 'CD'[broadband]
    elif fs>=80.0:
        band_code = 'EH'[broadband]
    elif fs>=20.0:
        band_code = 'SB'[broadband]
    elif fs>1.0:
        band_code = 'M'
    elif fs>0.5:
        band_code = 'L'
    elif fs>0.05:
        band_code = 'V'
    elif fs > 0.005:
        band_code = 'U'
    elif fs >= 0.0001:
        band_code = 'R'
    elif fs >= 0.00001:
        band_code = 'P'
    elif fs >= 0.000001:
        band_code = 'T'
    tr.stats.channel = band_code + tr.stats.channel[1:]
    #print(tr.id)

def expand_channels(df):
    expanded_rows = []

    for idx, row in df.iterrows():
        chan = row["channel"]
        if isinstance(chan, str) and len(chan) > 3:# and len(chan) % 3 == 0:
            basechan = chan[0:2]
            for channum in range(len(chan)-2):
                newchan = basechan + chan[channum+2]
                new_row = row.copy()
                new_row["channel"] = newchan
                expanded_rows.append(new_row)

    # Create new DataFrame from expanded rows
    expanded_df = pd.DataFrame(expanded_rows)
    #print(expanded_df)

    # Drop original multi-channel rows and append expanded ones
    df = df[df["channel"].apply(lambda x: len(str(x)) == 3)]
    df = pd.concat([df, expanded_df], ignore_index=True)

    return df    



# --- Step 1: Load the station metadata from Excel ---
# Update path if necessary
excel_path = "/home/thompsong/Dropbox/DATA/station_metadata/ksc_stations_master_v2.xls" # this version has FL code replaced with 1R
df = pd.read_excel(excel_path, dtype={"location": str})

# --- Step 2: Prepare the DataFrame for lookup ---
# Ensure time columns are parsed and converted to UTCDateTime
#df = df.dropna(subset=["ondate", "offdate"])
df["ondate"] = pd.to_datetime(df["ondate"], errors='coerce')
df["offdate"] = pd.to_datetime(df["offdate"], errors='coerce')
df["ondate"] = df["ondate"].apply(lambda x: UTCDateTime(x) if pd.notnull(x) else None)
df["offdate"] = df["offdate"].apply(lambda x: UTCDateTime(x) if pd.notnull(x) else None)
df = expand_channels(df)
#print(df)
#print('****')

TOP_INPUT_DIR='/data/KSC/event_files_to_convert_to_sds/'
alreadyexists = []
unmatched_ids = {}

matches = 0
unmatches = 0

for f in sorted(glob.glob(os.path.join(TOP_INPUT_DIR, '*'))):
    #print('***')
    st = Stream()
    this_st = read(f)
    #print('Original:\n',this_st)
    for tr in this_st:
        if tr.stats.sampling_rate<50.0 or tr.stats.station=='LLS02':
            continue
        samplingrate2id(tr)
        if tr.stats.station=='CARL0':
            tr.stats.station='BCHH'
        if tr.stats.station=='378':
            tr.stats.station='DVEL1'
        if tr.stats.station=='FIRE' and tr.stats.starttime.year == 2018:
            tr.stats.station = 'DVEL2'

        if tr.stats.network=='FL':
            tr.stats.network='1R'

        if tr.stats.location in ['00', '0', '--', '', '10']:
            tr.stats.location='00'

        
        st.append(tr)
    try:
        st.merge(fill_value=0, method=0)
    except:
        ids = tr

    #print('After applying sampling rate:\n',st)

    # --- Step 4: Match and set Trace.stats.location ---
    for tr in st:
        net = tr.stats.network
        sta = tr.stats.station
        cha = tr.stats.channel
        start = tr.stats.starttime
        end = tr.stats.endtime

        # Filter metadata rows matching net, sta, cha, and time overlap
        match = df[
            (df["network"] == net) &
            (df["station"] == sta) &
            (df["channel"] == cha) &
            (df["ondate"] <= start) &
            (df["offdate"] >= end-86400)
        ]

        if not match.empty:
            location = match.iloc[0]["location"]
            tr.stats.location = str(location).zfill(2)  # pad if necessary
            matches += 1
        else:
            #print(f"No metadata match for {net}.{sta}.{cha} between {start}–{end}")
            #tr.plot()
            ymd = start.strftime('%Y-%m-%d')
            if tr.id in unmatched_ids:
                if not ymd in unmatched_ids[tr.id]:
                    unmatched_ids[tr.id].append(ymd)
                    unmatches += 1
            else:
                unmatched_ids[tr.id] = [ymd]
                unmatches += 1
            continue

    for tr in st:
        write_trace_to_sds(tr)

    del st

print(f'Matches: {matches}, Unmatches {unmatches}')
print('\nUnmatched IDs:\n')
for key in unmatched_ids:
    print(key, '->', unmatched_ids[key])


Merged and wrote to: /data/SDS_EVENTS/2018/1R/BCHH1/DD1.D/1R.BCHH1.00.DD1.D.2018.037
Merged and wrote to: /data/SDS_EVENTS/2018/1R/BCHH1/DD2.D/1R.BCHH1.00.DD2.D.2018.037
Merged and wrote to: /data/SDS_EVENTS/2018/1R/BCHH1/DD3.D/1R.BCHH1.00.DD3.D.2018.037
Merged and wrote to: /data/SDS_EVENTS/2018/1R/BCHH1/DHE.D/1R.BCHH1.00.DHE.D.2018.037
Merged and wrote to: /data/SDS_EVENTS/2018/1R/BCHH1/DHN.D/1R.BCHH1.00.DHN.D.2018.037
Merged and wrote to: /data/SDS_EVENTS/2018/1R/BCHH1/DHZ.D/1R.BCHH1.00.DHZ.D.2018.037
Merged and wrote to: /data/SDS_EVENTS/2018/1R/BCHH2/DD4.D/1R.BCHH2.00.DD4.D.2018.037
Merged and wrote to: /data/SDS_EVENTS/2018/1R/BCHH2/DD5.D/1R.BCHH2.00.DD5.D.2018.037
Merged and wrote to: /data/SDS_EVENTS/2018/1R/BCHH2/DD6.D/1R.BCHH2.00.DD6.D.2018.037
Merged and wrote to: /data/SDS_EVENTS/2018/1R/BCHH2/DD7.D/1R.BCHH2.00.DD7.D.2018.037
Merged and wrote to: /data/SDS_EVENTS/2018/1R/BCHH2/DD8.D/1R.BCHH2.00.DD8.D.2018.037
Merged and wrote to: /data/SDS_EVENTS/2018/1R/BCHH2/DD9.D/1R.BCHH

"\nprint(f'Matches: {matches}, Unmatches {unmatches}')\nprint('\nUnmatched IDs:\n')\nfor key in unmatched_ids:\n    print(key, '->', unmatched_ids[key])\n"