## Prepare the data for the model used in the paper ##

Read each file and segment them into one hour windows. Each file will contain 28,800 samples per hour at an 8 Hz sampling rate.

In [None]:
import os
from obspy import read
import glob
import numpy as np
from obspy import UTCDateTime

# Define the processed and output folder paths
processed_folder = os.getcwd() + '/data/processed'
segmented_output_folder = os.getcwd() + '/data/segmented'
os.makedirs(segmented_output_folder, exist_ok=True)  # Create output directory if it doesn't exist

# Define the target segment duration (1 hour) in seconds
segment_duration = 3600  # seconds

# Process each processed file for segmentation
for file_path in glob.glob(f"{processed_folder}/*.mseed"):
    # Read the processed file
    st = read(file_path)
    
    # Split each trace in the Stream object into one-hour segments
    for tr in st:
        start_time = tr.stats.starttime
        end_time = tr.stats.endtime
        segment_start = start_time

        # Loop over each one-hour segment
        while segment_start + segment_duration <= end_time:
            # Define the end time for the current segment
            segment_end = segment_start + segment_duration

            # Slice the trace to create a one-hour segment
            segment = tr.slice(starttime=segment_start, endtime=segment_end)
            
            # Format the filename for the segment
            segment_filename = f"{tr.stats.network}_{tr.stats.station}_{tr.stats.channel}_{segment_start.strftime('%Y%m%dT%H%M%S')}.mseed"
            segment_filepath = os.path.join(segmented_output_folder, segment_filename)

            # Save the one-hour segment as a new file
            segment.write(segment_filepath, format="MSEED")
            print(f'Saved segment file: {segment_filepath}')

            # Move to the next hour
            segment_start = segment_end

Apply the Fourier Transform with the `specified window_length` and `hop_length parameters` to produce a spectrogram of size (96, 128) for each one-hour segment. The data is saved in `segmented_output_folder` from the cell above.

In [None]:
import librosa
import numpy as np
import glob
import os

# Define parameters
window_length = 256
hop_length = 224
sampling_rate = 8  # in Hz
target_shape = (96, 128)

# Process each segmented file for Fourier Transform and spectrogram generation
for segment_file in glob.glob(f"{segmented_output_folder}/*.mseed"):
    # Load the segment using ObsPy and convert to a numpy array
    st = read(segment_file)
    tr = st[0]  # assuming one trace per file
    signal = tr.data  # raw signal data from mseed file

    # Compute STFT using librosa
    stft_result = librosa.stft(signal, n_fft=window_length, hop_length=hop_length, win_length=window_length)

    # Convert to magnitude (power) spectrogram
    spectrogram = np.abs(stft_result) ** 2

    # Resize spectrogram to the target shape if needed
    spectrogram_resized = librosa.util.fix_length(spectrogram, size=target_shape[1], axis=1)[:target_shape[0], :]

    # Define the output filename for the spectrogram
    spectrogram_filename = os.path.basename(segment_file).replace(".mseed", "_spectrogram.npy")
    spectrogram_filepath = os.path.join(segmented_output_folder, spectrogram_filename)

    # Save the spectrogram as a .npy file
    np.save(spectrogram_filepath, spectrogram_resized)
    print(f'Saved spectrogram file: {spectrogram_filepath}')