# Ground Motion Displacement RMS vs Time

*an example simple tutorial for getting seismic data, computing the power spectral densities, extracting the RMS and plotting*

Required:

- python
- obspy (and its dependencies)
- pandas
- jupyter
- notebook
- tqdm

this should be easy to set up in a conda env: ``conda create -c conda-forge -n covid python=3.7 obspy pandas jupyter notebook tqdm``

Author: Thomas Lecocq @seismotom, Fred Massin @fmassin, Claudio Satriano @claudiodsf

# Import Required Libraries

Sets up the Python environment with:
- Basic utilities: datetime, os, glob
- Data processing: numpy, pandas
- Visualization: matplotlib (configured for Adobe Illustrator compatibility)
- Seismic processing: obspy components (UTCDateTime, read, read_inventory, PPSD)
- Progress tracking: tqdm
- Custom utilities: seismosocialdistancing module

Also enables automatic module reloading for development.

In [None]:
import datetime
import os
from glob import glob

import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42  # to edit text in Illustrator
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.patheffects as pe
import numpy as np
import pandas as pd
import tqdm
import warnings

from obspy import UTCDateTime, read, read_inventory
from obspy.clients.fdsn import Client
from obspy.clients.fdsn.client import FDSNNoDataException
from obspy.signal import PPSD
from msnoise.api import psd_ppsd_to_dataframe

%load_ext autoreload
%autoreload 2
import seismosocialdistancing

In [None]:
# Path to where you have copied the DATA/ folder (that contains the SDS, RESP etc folders)
DATA_PATH = "DATA"

# Configuration Parameters

Sets up analysis parameters:
- Time period: 2024-01-01 to 2024-01-31
- Station details:
  - Network: 8N
  - Station: HB04
  - Location: 00
  - Channel: EHN
- Additional settings:
  - Dataset name: "example"
  - Timezone: Europe/Brussels
  - Site description: "in Grenoble (FR)"
- Event markers:
  - New Year: 2024-01-01
  - Half: 2024-01-15
  - End of Record: 2024-01-31

In [None]:
# Make sure you take at least a full week (>=7 days) before the first "ban"
start = UTCDateTime("2024-01-01")
# Leaving UTCDateTime() empty means "now", but let's first compute 1 week:
end = UTCDateTime("2024-01-08")

network = "8N"
station = "HB04"
location = "00"
channel = "EHZ"
dataset = "hautbois-grenoble"
time_zone = "Europe/Brussels"
sitedesc = "in Grenoble (FR)"

logo = None # 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/44/Logo_SED_2014.png/220px-Logo_SED_2014.png'
bans = {"2024-01-01 00:00":'New Year', 
        "2024-01-15 00:00":'Mid Month',
        "2024-01-31 23:59": "End of Record"}

datelist = pd.date_range(start.datetime, min(end, UTCDateTime()).datetime, freq="D")

# Station Metadata Loading

Loads instrument response data from XML files in DATA/RESP/ directory.
This information is crucial for accurate ground motion calculations.

In [None]:
resp = read_inventory(os.path.join(DATA_PATH, "RESP", "*.xml"))
resp

# Power Spectral Density Computation

Processes daily seismic data with custom PPSD parameters:
- PPSD length: 1200 seconds
- Overlap: 50%
- Period settings:
  - Smoothing width: 0.025 octaves
  - Step size: 0.0125 octaves
  - Limits: 0.008 to 50 seconds
- Amplitude range: -200 to 20 dB (0.25 dB steps)

Saves results as NPZ files fo

Those parameters are a 10x more "nervous" than the default ObsPy (and McNamarra) values!r each day.

### First, Let's have a look at the importance of the PPSD parameters

In [None]:
day = datelist[0]
datestr = "%03i"%int(day.strftime("%j"))
year = day.strftime("%Y")

fn = os.path.join(DATA_PATH, "SDS", year, network, station, f"{channel}.D", f"*.{datestr}")
st = read(fn)
st.attach_response(resp)
print(st)

First cell: ObsPy's default parameters (mimicing the ones from McNamarra et al)

In [None]:
ppsd_length = 3600 # seconds
overlap = 0.5 # 0-1 range
period_smoothing_width_octaves = 1.0 # defaults to 1.0
period_step_octaves = 0.125 # defaults to 0.125
period_limits = None # default from 0 to Nyquist
db_bins = (-200, -50, 1.) # defaults to (-200, -50, 1.)

ppsd = PPSD(st[0].stats, metadata=resp,
            ppsd_length=ppsd_length,
            overlap=overlap,
            period_smoothing_width_octaves=period_smoothing_width_octaves,
            period_step_octaves=period_step_octaves,
            period_limits=period_limits,
            db_bins=db_bins)
ppsd.add(st)
ppsd.plot()

Now, let's define a finer computatino grid (less smoothing):

In [None]:
ppsd_length = 1800 # seconds
overlap = 0.5 # 0-1 range
period_smoothing_width_octaves = 0.025 # defaults to 1.0
period_step_octaves = 0.0125 # defaults to 0.125
period_limits = (0.01, 50) # seconds default from 0 to Nyquist
db_bins = (-200, 20, 0.25) # defaults to (-200, -50, 1.)

ppsd_nervous = PPSD(st[0].stats, metadata=resp,
                ppsd_length=ppsd_length,
                overlap=overlap,
                period_smoothing_width_octaves=period_smoothing_width_octaves,
                period_step_octaves=period_step_octaves,
                period_limits=period_limits,
                db_bins=db_bins)
ppsd_nervous.add(st)
# Default plot:
ppsd_nervous.plot()

# Playing with the color scale, since less "points" fall into each small grid size:
ppsd_nervous.plot(max_percentage=10)

ObsPy allows you to check the day-night variations of noise, for example:

In [None]:
ppsd_nervous.calculate_histogram( time_of_weekday=[(-1, 0, 2), (-1, 22, 24)])
ppsd_nervous.plot(max_percentage=10)
periods, night_noise = ppsd_nervous.get_mean()
night_noise[night_noise > -50] *= np.nan

ppsd_nervous.calculate_histogram( time_of_weekday=[(-1, 9, 16)])
ppsd_nervous.plot(max_percentage=10)
periods, day_noise = ppsd_nervous.get_mean()
day_noise[day_noise > -50] *= np.nan

In [None]:
from obspy.signal.spectral_estimation import get_nhnm, get_nlnm

plt.figure(figsize=(16,8))
plt.semilogx(periods, night_noise, label="Night")
plt.semilogx(periods, day_noise, label="Day")

plt.semilogx(*get_nlnm(), label="NLNM", ls='--')
plt.semilogx(*get_nhnm(), label="NHNM", ls='--')

plt.legend()
plt.grid()
plt.xlabel("Period (s)")
plt.ylabel("Amplitude")
plt.xlim(0.01, 100)
plt.show()

## Looping over all files with the nervous parameters:

In [None]:
ppsd_length = 1200 # seconds
overlap = 0.5 # 0-1 range
period_smoothing_width_octaves = 0.025 # defaults to 1.0
period_step_octaves = 0.0125 # defaults to 0.125
period_limits = (0.01, 50) # seconds default from 0 to Nyquist
db_bins = (-200, 20, 0.25) # defaults to (-200, -50, 1.)

In [None]:
force_reprocess = False
pbar = tqdm.tqdm(datelist)
os.makedirs("seismoRMS", exist_ok=True)
for day in pbar:
    datestr = "%03i"%int(day.strftime("%j"))
    year = day.strftime("%Y")
    
    fn = os.path.join(DATA_PATH, "SDS", year, network, station, f"{channel}.D", f"*.{datestr}")
    pbar.set_description("Processing %s" % fn)
    try:
        stall = read(fn, headonly=True)
    except:
        continue

    for mseedid in list(set([tr.id for tr in stall])):
        fn_out = os.path.join("seismorms", "{}_{}_{}.npz".format(dataset, datestr, mseedid))
        if os.path.isfile(fn_out) and not force_reprocess:
            continue
        st = read(fn, sourcename=mseedid)
        st.attach_response(resp)
        ppsd = PPSD(st[0].stats, metadata=resp,
                ppsd_length=ppsd_length,
                overlap=overlap,
                period_smoothing_width_octaves=period_smoothing_width_octaves,
                period_step_octaves=period_step_octaves,
                period_limits=period_limits,
                db_bins=db_bins)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            ppsd.add(st)
        ppsd.save_npz(fn_out[:-4])
        del st, ppsd
    del stall


# Load and Combine PPSDs

Reloads all computed PPSD files:
- Iterates through date range
- Combines data for each seismic channel
- Creates consolidated PPSD objects

In [None]:
ppsds = {}
pbar = tqdm.tqdm(datelist)
for day in pbar:
    datestr = "%03i"%int(day.strftime("%j"))
    fn_pattern = os.path.join("seismorms", "{}_{}_*.npz".format(dataset, datestr))
    pbar.set_description("Reading %s" % fn_pattern)
    for fn in glob(fn_pattern):
        mseedid = fn.replace(".npz", "").split("_")[-1]
        if mseedid not in ppsds:
            ppsds[mseedid] = PPSD.load_npz(fn)#, allow_pickle=True)
        else:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                ppsds[mseedid].add_npz(fn)#, allow_pickle=True)

# Generate Standard PPSD Plots

Creates three standard ObsPy visualizations for each channel:
- Probability density functions
- Temporal evolution at target period (0.1s)
- Spectrogram representation

In [None]:
target_period = 0.1
for mseedid, ppsd in ppsds.items():
    print(f"OsbPy plots for {mseedid}")
    ppsd.plot(max_percentage=10)
    ppsd.plot_temporal(target_period)
    ppsd.plot_spectrogram(clim=(-160,-80))

# Advanced Analysis and Visualization

Multi-step process:
1. Select specific seed ID for analysis
2. Convert PPSD to DataFrame format
3. Create period vs time plots
4. Create frequency vs time plots
5. Calculate RMS displacement for multiple frequency bands:
   - 0.1-1.0 Hz
   - 1.0-20.0 Hz
   - 4.0-14.0 Hz
   - 4.0-20.0 Hz
   - 2.0-100.0 Hz

Final visualization options:
- Timeseries plots
- Daily patterns
- Clock plots (24-hour analysis)
- Clock maps (time distribution)
- Grid maps (spatial distribution)

Parameters c
- be customized for:
- Fre- ency band - lection
- Time z- e
- Site descr- tion
- Logo i- Output
- Event markers
- O

utput units and resampling
You'll have the repeat the next steps for different seed_ids:

In [None]:
seed_id = "8N.HB04.00.EHZ"

Loading the content of the PPSD into a DataFrame:

In [None]:
data = psd_ppsd_to_dataframe(ppsds[seed_id])
data = data.dropna(axis=1, how="all")
data

## Period vs Time plot

In [None]:
%matplotlib inline
fig, axes = plt.subplots(1,1, figsize=(16,7), sharex=True)
vmin, vmax = np.percentile(data, [5,95])
plt.pcolormesh(data.index, data.columns, data.T, cmap="viridis",
                       vmin=vmin, vmax=vmax, rasterized=True)
plt.colorbar(shrink=0.7).set_label("Amplitude (dB)")
plt.title(seed_id)
plt.ylim(0.05,20)
plt.yscale("log")
plt.ylabel("Period (s)")

minx, maxx = plt.xlim()

fig.autofmt_xdate()

plt.tight_layout()


## Frequency vs Time plot

In [None]:
%matplotlib inline
fig, axes = plt.subplots(1,1, figsize=(16,7), sharex=True)
vmin, vmax = np.percentile(data, [5,95])
data_f = data.copy()
data_f.columns = 1./ data_f.columns
data_f = data_f.sort_index(axis=1)
plt.pcolormesh(data_f.index, data_f.columns, data_f.T, cmap="viridis",
                       vmin=vmin, vmax=vmax, rasterized=True)
plt.colorbar(shrink=0.7).set_label("Amplitude (dB)")
plt.title(seed_id)
plt.ylim(0.05,20)
plt.yscale("log")
plt.ylabel("Frequency (Hz)")

minx, maxx = plt.xlim()
fig.autofmt_xdate()

plt.tight_layout()


## Process PSDs to extract the RMS(displacement)

This can be done for multiple filters at once (``freqs`` below):

In [None]:
# Define frequency bands of interest:
freqs = [(0.1,1.0),(1.0,20.0),(4.0,14.0),(4.0,20.0),(2.0,100.0)]

displacement_RMS = {}
pbar = tqdm.tqdm(ppsds.items())
for mseedid, ppsd in pbar:
    if mseedid != seed_id:
        continue
    pbar.set_description(f"Processing {mseedid}")
    ind_times = pd.DatetimeIndex([d.datetime for d in ppsd.current_times_used])
    data = pd.DataFrame(ppsd.psd_values, index=ind_times, columns=1./ppsd.period_bin_centers)
    data = data.sort_index(axis=1)
    displacement_RMS[mseedid] = seismosocialdistancing.df_rms(data, freqs, output="DISP")
    displacement_RMS[mseedid].to_csv("%s.csv" % mseedid)

## Weekday / Time of day Analysis

In [None]:
args = {'band':"4.0-14.0",       # might be None or commented ("4.0-14.0" per default) or any of the tupples in freqs
        'time_zone':time_zone,   # required for clockplots
        'sitedesc':sitedesc,     # might be None or commented
        'logo':logo,             # might be None or commented
        'bans':bans,             # might be None or commented
        'save':'./output/',      # might be None or commented or a path 
        'unit':'nm',
        'resample': ("30", "min")
       }
seismosocialdistancing.plot(displacement_RMS,
                            type='timeseries',
                            **args)

In [None]:
seismosocialdistancing.plot(displacement_RMS,
                            type='dailyplots',
                            **args)

In [None]:
seismosocialdistancing.plot(displacement_RMS,
                            type='clockplots',
                            **args)

## Noise distribution over time of the day  

In [None]:
seismosocialdistancing.plot(displacement_RMS,
                            type='clockmaps',
                            **args)

In [None]:
seismosocialdistancing.plot(displacement_RMS,
                            type='gridmaps',
                            **args)

# Background computation

While we go to the I95 noteboon and Koen introduces the next topic, please let your computer run the following cells:

In [None]:
# Make sure you take at least a full week (>=7 days) before the first "ban"
start = UTCDateTime("2024-01-01")
# Leaving UTCDateTime() empty means "now":
end = UTCDateTime("2024-02-01")

network = "8N"
station = "HB0*"
location = "00"
channel = "EH*"
dataset = "hautbois-grenoble"
time_zone = "Europe/Brussels"
sitedesc = "in Grenoble (FR)"

logo = None # 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/44/Logo_SED_2014.png/220px-Logo_SED_2014.png'
bans = {"2024-01-01 00:00":'New Year', 
        "2024-01-15 00:00":'Half',
        "2024-01-31 23:59": "End of Record"}

datelist = pd.date_range(start.datetime, min(end, UTCDateTime()).datetime, freq="D")

In [None]:
ppsd_length = 3600 # seconds
overlap = 0.0 # 0-1 range
period_smoothing_width_octaves = 0.025 # defaults to 1.0
period_step_octaves = 0.0125 # defaults to 0.125
period_limits = (0.01, 50) # seconds default from 0 to Nyquist
db_bins = (-200, 20, 0.25) # defaults to (-200, -50, 1.)

In [None]:
force_reprocess = False
pbar = tqdm.tqdm(datelist)
os.makedirs("seismoRMS", exist_ok=True)
for day in pbar:
    datestr = "%03i"%int(day.strftime("%j"))
    year = day.strftime("%Y")
    
    fn = os.path.join(DATA_PATH, "SDS", year, network, station, f"{channel}.D", f"*.{datestr}")
    
    try:
        stall = read(fn, headonly=True)
    except:
        continue

    for mseedid in list(set([tr.id for tr in stall])):
        fn_out = os.path.join("seismorms", "{}_{}_{}.npz".format(dataset, datestr, mseedid))
        if os.path.isfile(fn_out) and not force_reprocess:
            continue
        pbar.set_description("Processing %s" % fn_out)
        st = read(fn, sourcename=mseedid)
        st.attach_response(resp)
        ppsd = PPSD(st[0].stats, metadata=resp,
                ppsd_length=ppsd_length,
                overlap=overlap,
                period_smoothing_width_octaves=period_smoothing_width_octaves,
                period_step_octaves=period_step_octaves,
                period_limits=period_limits,
                db_bins=db_bins)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            ppsd.add(st)
        ppsd.save_npz(fn_out[:-4])
        del st, ppsd
    del stall
