# RomyEvents - Automatic Eventplots

Creates automatic event plots based on catalog 

In [None]:
import os
import gc
import obspy as obs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from obspy.geodetics import gps2dist_azimuth
from pprint import pprint
from functions.add_distances_and_backazimuth import __add_distances_and_backazimuth
from functions.querrySeismoData import __querrySeismoData
from obspy import read

### Methods

In [None]:
def __makeplot(config, st):


    st_in = st.copy()

    fig, ax = plt.subplots(11,1, figsize=(15,20), sharex=True)    #eig fig, ax = plt.subplots(6,1, figsize=(15,10), sharex=True)

    font = 14

    time_scaling, time_unit = 1, "sec"
    #Möglichkeit für bestimmte Einheit
    #rot_scaling = 1    #urspr. 1e9
    #trans_scaling = 1    #urspr. 1e6

    for i, tr in enumerate(st_in):

        if i in [0,1,2]:
            #ax[i].set_ylabel(r"$\omega$ (nrad/s)", fontsize=font)
            ax[i].plot(tr.times()/time_scaling, tr.data, 'k', label=tr.stats.station+"."+tr.stats.channel)

        elif i in [3,4,5,6,7,8,9,10,11]:
            #ax[i].set_ylabel(r"u ($\mu$m/s)", fontsize=font)
            ax[i].plot(tr.times()/time_scaling, tr.data, 'k', label=tr.stats.station+"."+tr.stats.channel)

        ax[i].legend(loc=1)

    ax[10].set_xlabel(f"Time ({time_unit}) from {st[0].stats.starttime.date} {str(st[0].stats.starttime.time).split('.')[0]} UTC", fontsize=font)
    ax[0].set_title(config['title']+f" | {config['fmin']} - {config['fmax']} Hz", fontsize=font, pad=10)

    plt.show();
    del st_in
    return fig

In [None]:
def __makeplotStreamSpectra2(st, config, fscale=None):

    from scipy import fftpack
    from andbro__fft import __fft
    import matplotlib.pyplot as plt

    st_in = st.copy()

    NN = len(st_in)
    rot_scaling, rot_unit = 1e9, r"nrad/s"
    trans_scaling, trans_unit = 1e6, r"$\mu$m/s"

    fig, axes = plt.subplots(NN,2,figsize=(15,int(NN*2)), sharex='col')

    font = 14

    plt.subplots_adjust(hspace=0.3)

    ## _______________________________________________

    st.sort(keys=['channel'], reverse=True)

    for i, tr in enumerate(st_in):

#         comp_fft = abs(fftpack.fft(tr.data))
#         ff       = fftpack.fftfreq(comp_fft.size, d=1/tr.stats.sampling_rate)
#         comp_fft = fftpack.fftshift(comp_fft)
#         ff, spec = ff[1:len(ff)//2], abs(fftpack.fft(tr.data)[1:len(ff)//2])

        if tr.stats.channel[-2] == "J":
            scaling = rot_scaling
        elif tr.stats.channel[-2] == "H":
            scaling = trans_scaling

        spec, ff, ph = __fft(tr.data*scaling, tr.stats.delta, window=None, normalize=None)


        ## _________________________________________________________________
        if tr.stats.channel[-2] == "J":
            axes[i,0].plot(
                        tr.times(),
                        tr.data*rot_scaling,
                        color='black',
                        label='{} {}'.format(tr.stats.station, tr.stats.channel),
                        lw=1.0,
                        )

        elif tr.stats.channel[-2] == "H":
            axes[i,0].plot(
                        tr.times(),
                        tr.data*trans_scaling,
                        color='black',
                        label='{} {}'.format(tr.stats.station, tr.stats.channel),
                        lw=1.0,
                        )
        ## _________________________________________________________________
        if fscale == "loglog":
            axes[i,1].loglog(ff, spec, color='black', lw=1.0)
        elif fscale == "loglin":
            axes[i,1].semilogx(ff, spec, color='black', lw=1.0)
        elif fscale == "linlog":
            axes[i,1].semilogy(ff, spec, color='black', lw=1.0)
        else:
            axes[i,1].plot(ff, spec, color='black', lw=1.0)         


        if tr.stats.channel[1] == "J":
            sym, unit = r"$\Omega$", rot_unit
        elif tr.stats.channel[1] == "H":
            sym, unit = "v", trans_unit
        else:
            unit = "Amplitude", "a.u."

        axes[i,0].set_ylabel(f'{sym} ({unit})',fontsize=font)    
        axes[i,1].set_ylabel(f'ASD \n({unit}/Hz)',fontsize=font)        
        axes[i,0].legend(loc='upper left',bbox_to_anchor=(0.8, 1.10), framealpha=1.0)

#         axes[i,0].ticklabel_format(axis='y', style='sci', scilimits=(0,0))
#         axes[i,1].ticklabel_format(axis='y', style='sci', scilimits=(0,0))

    if "fmin" in config.keys() and "fmax" in config.keys():
        axes[i,1].set_xlim(config['fmin'],config['fmax'])

    axes[NN-1,0].set_xlabel(f"Time from {tr.stats.starttime.date} {str(tr.stats.starttime.time)[:8]} (s)",fontsize=font)     
    axes[NN-1,1].set_xlabel(f"Frequency (Hz)",fontsize=font)     

    del st_in
    return fig

### Configurations

In [None]:
config = {}

# ROMY coordinates
config['ROMY_lon'] = 11.275501
config['ROMY_lat'] = 48.162941

# duration of event in seconds
config['duration'] = 7200

# frequency range for bandpass filter
config['fmin'] = 0.01 # in Hz   urspr. 0.01
config['fmax'] = 2 # in Hz    urspr. 0.1

# path for figures to store
config['outpath_figs'] = "C:/Bachelorarbeit/figures/"

# path for output data
config['outpath_data'] = "C:/Bachelorarbeit/data/waveformsROMYRLAS/"

# specify seed codes of stations that should be used for the analysis
config['seeds'] = [#"BW.ROMY.10.BJZ",# "BW.ROMY..BJU", "BW.ROMY..BJV", "BW.ROMY..BJW", # ringlaser ROMY
                   "BW.RLAS..BJZ", # ringlaser G
                  #"GR.FUR..BHZ", "GR.FUR..BHN", "GR.FUR..BHE", # seismometer ROMY
                  #"GR.WET..BHZ", "GR.WET..BHN", "GR.WET..BHE" # seismometer G
                  ]

# path to catalogs
config['path_to_catalog'] = "C:/Bachelorarbeit/data/catalogs/"

config['catalog'] = "ROMY_global_catalog_20190101_20250531.pkl"

# set if existing files should be skipped
config['skip_existing'] = True

# set if figures should be saved
config['save_figures'] = True

# set if waveform data should be stored
config['store_waveforms'] = True

## Load Events

In [None]:
events = pd.read_pickle(config['path_to_catalog']+config['catalog'])

In [None]:
events['origin'] = events.timestamp

In [None]:
# make sure only events with magnitude > 6 are considered
events = events[events.magnitude > 6]
print("Event number: ", events.shape[0])

In [None]:
# avoid events that are too close to each other in time
events['elapsed_time'] = events.timestamp.diff()
events = events[events.elapsed_time > pd.Timedelta(minutes=60)]
print("Event number: ", events.shape[0])

In [None]:
# Filter catalog events by date
#events = events[(events.timestamp > "2024-05-01") & (events.timestamp < "2024-12-24")]
#events = events[
 #   (events.timestamp > "2020-01-01") &
  #  (events.timestamp < "2025-06-03") 
   # ]
#print("Event number: ", events.shape[0])

Prepare a status dataframe to mark if data was obtained for specific event and seed

In [None]:
# Assuming 'events' is your existing DataFrame with event information
# Create a new DataFrame with event number and origin time
event_status = pd.DataFrame({
    'event_number': [f"{i:03d}" for i in range(events.shape[0])],
    'origin_time': events.origin.values
})

# Set index to event_number for easier reference
event_status.set_index('event_number', inplace=True)

# Add a column for each seed, initialized to False
for seed in config['seeds']:
    event_status[seed] = False


# RUN LOOP

Loop over events in catalog and request data for each event. Store data or event plots if
- skip existing files
- save figures
- store waveforms

is set in configurations, respectively.



In [None]:
global errors

errors = []
adr_status = []
i=0
# loop over all events
for jj in range(events.shape[0]):
   
      # make event number
    num = str(jj).rjust(3, "0")
    print(f"\n -> {num} {events.origin.iloc[jj]} ")

    # adjust event name string
    try:
        event_name = str(events.origin.iloc[jj]).replace("-","").replace(":","").replace(" ", "_").split(".")[0]
    except:
        print(f" -> {num}: error for {events.origin.iloc[jj]}")
        continue

    # check if output directory exists
    if not os.path.isdir(config['outpath_figs']+"raw/"):
        print("created: ", config['outpath_figs']+"raw/")
        os.makedirs(config['outpath_figs']+"raw/")

    # check if file already exists
    config['skip_existing'] = False
    
    if config['skip_existing']:
        if i<10:
            filename = config['outpath_figs']+"raw/"+f"00{i}_{event_name}_raw.png"
        if i>=10:
            filename = config['outpath_figs']+"raw/"+f"0{i}_{event_name}_raw.png"
        i += 1
        print(filename)
        print(os.path.isfile(filename))
        if os.path.isfile(filename):
            print(f" -> file alread exits for {event_name}")
            continue
    
    #um die entfernung zu berechnen
    ev_lat = events.latitude.iloc[jj]
    ev_lon = events.longitude.iloc[jj]
    sta_lat = config['ROMY_lat']
    sta_lon = config['ROMY_lon']
    distance_m, az, baz = gps2dist_azimuth(ev_lat, ev_lon, sta_lat, sta_lon)
    distance_km = distance_m / 1000
    ev_depth = events.depth.iloc[jj] / 1000
    origin_time = events.origin.iloc[jj]
    magnitude = events.magnitude.iloc[jj]
    # configuration adjustments for plots
    config['title'] = (f"M{magnitude:.1f} - {distance_km:.0f} km @ {ev_depth:.0f} km | {origin_time} UTC")
    config['tbeg'] = obs.UTCDateTime(str(events.origin.iloc[jj]))

    # same endtime for all
    config['tend'] = config['tbeg'] + config['duration']

    st0 = obs.Stream()
    if os.path.isfile(config['outpath_data']+f"{num}_{event_name}.mseed"):
        st0 = read(config['outpath_data']+f"{num}_{event_name}.mseed")
        data_vorhanden = True
        print(data_vorhanden)
    else:
        data_vorhanden = False
        st0 = obs.Stream()
        print(data_vorhanden)
        for seed in config['seeds']:
    
            try:
                # request data for FUR
                if "FUR" in seed:
                    stx, invx = __querrySeismoData( seed_id=seed,
                                                    starttime=config['tbeg'],
                                                    endtime=config['tend'],
                                                    repository='online',
                                                    path=None,
                                                    restitute=True,
                                                    detail=None,
                                                    fill_value=None,
                                                )
                    if len(stx) == 0:
                        print(f" -> data missing for {seed}")
                    else:
                        event_status.loc[num, seed] = True
                    st0 += stx
                
                # request data for WET
                elif "WET" in seed:
                    stx, invx = __querrySeismoData( seed_id=seed,
                                                    starttime=config['tbeg'],
                                                    endtime=config['tend'],
                                                    repository='online',
                                                    path=None,
                                                    restitute=True,
                                                    detail=None,
                                                    fill_value=None,
                                                )
                    if len(stx) == 0:
                        print(f" -> data missing for {seed}")
                    else:
                        event_status.loc[num, seed] = True
                    st0 += stx
                
                # request ADR data from archive
                
                elif "ROMY.22" in seed:
                    stx, invx = __querrySeismoData( seed_id=seed,
                                                    starttime=config['tbeg'],
                                                    endtime=config['tend'],
                                                    repository='archive',
                                                    path=config['path_to_archive']+"temp_archive/",
                                                    restitute=True,
                                                    detail=None,
                                                    fill_value=None,
                                                )
    
                    if len(stx) == 0:
                        print(f" -> data missing for {seed}")
                    else:
                        event_status.loc[num, seed] = True
                    st0 += stx
                
                # request ringlaser data from george
               # elif "ROMY" in seed:
                #    print("ROMY")
                 #   stx, invx = __querrySeismoData( seed_id=seed,
                  #                                  starttime=config['tbeg'],
                   #                                 endtime=config['tend'],
                    #                                repository='george',
                     #                               path=None,
                      #                              restitute=True,
                       #                         detail=None,
                        #                        fill_value=None,
                         #                   )
                  #  if len(stx) == 0:
                   #     print(f" -> data missing for {seed}")
                    #else:
                     #   event_status.loc[num, seed] = True
                    #st0 += stx
                elif "ROMY" in seed:
                    print("ROMY")
                    try:
                        stx, invx = __querrySeismoData(
                            seed_id=seed,
                            starttime=config['tbeg'],
                            endtime=config['tend'],
                            repository='george',
                            path=None,
                            restitute=True,
                            detail=None,
                            fill_value=None,
                        )
                        
                        if len(stx) == 0:
                            print(f" -> data missing for {seed}")
                        else:
                            event_status.loc[num, seed] = True
                            st0 += stx

                    except Exception as e:
                        print(f" -> error retrieving data for {seed}: {e}")

                elif "RLAS" in seed:
                    print("RLAS")
                    stx, invx = __querrySeismoData( seed_id=seed,
                                                    starttime=config['tbeg'],
                                                    endtime=config['tend'],
                                                    repository='george',
                                                    path=None,
                                                    restitute=True, 
                                                    detail=None,
                                                    fill_value=None,
                                                )
                    if len(stx) == 0:
                        print(f" -> data missing for {seed}")
                    else:
                        event_status.loc[num, seed] = True
                    st0 += stx
    
                else:
                    print(f" -> {seed} not found")
                                                
            except Exception as e:
                print(e)
                print(f" -> failed to request {seed} for event: {events.origin.iloc[jj]}")
                errors.append(f" -> failed to request {seed} for event: {events.origin.iloc[jj]}")
                continue

    # stort stream by channel
    st0 = st0.sort()

    # check if any data is masked
    for tr in st0:
        if isinstance(tr.data, np.ma.MaskedArray):
            print(f" -> {tr.stats.channel} has masked data. Filled with zeros.")
            tr.data = tr.data.filled(fill_value=0)

    # processing data stream
    print(" -> processing data stream ...")
    st1 = st0.copy();
    st1 = st1.detrend("linear");
    st1 = st1.taper(0.1);
    st1 = st1.filter("bandpass", freqmin=config['fmin'], freqmax=config['fmax'], corners=4, zerophase=True);

    # trim data stream
    st1 = st1.trim(config['tbeg'], config['tend']);
    st0 = st0.trim(config['tbeg'], config['tend']);
    if len(st0) < 2:
        print("st0<5")
        continue
    st1.plot(equal_scale=False);

    # store waveform data
    if config['store_waveforms']:

        # define filename
        waveform_filename = f"{num}_{str(events.origin.iloc[jj]).split('.')[0].replace('-','').replace(':','').replace(' ','_')}.mseed"

        # check if subdirectory exists
        if not os.path.isdir(config['outpath_data']):
            print("created: ", config['outpath_data'])
            os.makedirs(config['outpath_data'])

        # store waveform data
        try:
            st0.write(config['outpath_data']+waveform_filename);
            print(f" -> stored at: {config['outpath_data']+waveform_filename}")
        except Exception as e:
            print(f" -> error storing waveform: {e}")
            errors.append(f" -> error storing waveform: {e}")

    # saving figures
    if config['save_figures']:

        # check if subdirectory exists
        if not os.path.isdir(config['outpath_figs']+"raw/"):
            print("created: ", config['outpath_figs']+"raw/")
            os.makedirs(config['outpath_figs']+"raw/")

        # plotting figure
        fig1 = st0.plot(equal_scale=False, show=False)#;  eig code
        #fig1 = __makeplot(config, st0)
        # store figure
        fig1.savefig(config['outpath_figs']+"raw/"+f"{num}_{event_name}_raw.png", 
                     dpi=150, bbox_inches='tight', pad_inches=0.05)
        
        # check if subdirectory exists  
        if not os.path.isdir(config['outpath_figs']+"filteredROMYRLAS/"):
            print("created: ", config['outpath_figs']+"filteredROMYRLAS/")
            os.makedirs(config['outpath_figs']+"filteredROMYRLAS/")

        fig2 = st1.plot(equal_scale=False, show=False);

        fig2.savefig(config['outpath_figs']+"filteredROMYRLAS/"+f"{num}_{event_name}_filtered.png", 
                        dpi=150, bbox_inches='tight', pad_inches=0.05)      
        print("Saved")
        del fig1, fig2
        gc.collect()

pprint(errors)

Plot status of data retrieval

In [None]:
def create_interactive_status_plot(event_status, title="Event Data Retrieval Status"):
    # Make a copy and prepare data
    df = event_status.copy()
    
    if 'origin_time' in df.columns:
        # Create labels with event number and origin time
        y_labels = [f"{idx} ({str(time).split('.')[0]})" 
                   for idx, time in zip(df.index, df['origin_time'])]
        df = df.drop(columns=['origin_time'])
    else:
        y_labels = df.index.tolist()
    
    # Create the heatmap
    fig = go.Figure(data=go.Heatmap(
        z=df.values.astype(int),
        x=df.columns,
        y=y_labels,
        colorscale=[[0, 'rgb(255,80,80)'], [1, 'rgb(80,220,80)']],
        showscale=False,
        text=[[("✓" if val else "✗") for val in row] for row in df.values],
        texttemplate="%{text}",
        textfont={"color":"white"}
    ))
    
    # Update layout
    fig.update_layout(
        title=title,
        # xaxis_title="Seed IDs",
        # yaxis_title="Event Number (Origin Time)",
        xaxis={'side': 'top'},
        height=max(500, len(df) * 25),  # Dynamic height
        width=max(800, len(df.columns) * 60),  # Dynamic width
    )
    
    return fig

In [None]:
# Example usage
fig = create_interactive_status_plot(event_status)
fig.write_html(config['outpath_figs'] + "event_status_interactive.html")
fig.show()