### Cleaning/Processing Steps
The data will be demeaned similar to Zali et al., using a more conservative downsample rate of 20 Hz, given volcanic tremor is typically between 1-9 Hz.

*Read in mseed files <br>
*Check all are the same length <br>
*Demean and detrend <br>
*Anti-alias filter and downsample data to 8 Hz from 100 Hz

In [1]:
import warnings

# Suppress the specific warning from obspy
warnings.filterwarnings(
    "ignore",
    message="The encoding specified in trace.stats.mseed.encoding does not match the dtype of the data.*",
    category=UserWarning,
    module="obspy.io.mseed.core"
)


In [None]:
import os
from obspy import read
import glob
import numpy as np

# Define the folder paths
#input_folder = os.getcwd() + '/data/raw'
input_folder = 'data/raw'
processed_folder = os.getcwd() + '/data/processed'


display(input_folder)
display(processed_folder)

os.makedirs(processed_folder, exist_ok=True)  # Create output directory if it doesn't exist

# Target sampling rate after downsampling
target_sampling_rate = 8  # Hz

length = np.array([])

# Process each mseed file in the folder
for file_path in glob.glob(f"{input_folder}/*.mseed"):
    # Read the file
    st = read(file_path)
    #print(f'processing file: {file_path}')
       
    # Demean, detrend, and downsample each trace
    for tr in st:
        tr.detrend("demean")   # Remove mean
        tr.detrend("linear")   # Remove linear trend
        tr.filter("lowpass", freq=target_sampling_rate / 2)  # Anti-aliasing filter
        tr.resample(target_sampling_rate)  # Downsample to 8 Hz
        
        # tr.normalize()
        
    #add data length to array
    length = np.append(length, len(tr.data))

    # Save the processed data
    output_file = os.path.join(processed_folder, os.path.basename(file_path))
    st.write(output_file, format="MSEED")
    print(f'processed file: {file_path}')

'data/raw'

'/home/jen/snap/MLGEO2024_Geldingadalir/notebooks/NUPH_analysis/data/processed'

processed file: data/raw/20210430_9fnuph.mseed
processed file: data/raw/20210512_9fnuph.mseed
processed file: data/raw/20210608_9fnuph.mseed
processed file: data/raw/20210323_9fnuph.mseed
processed file: data/raw/20210505_9fnuph.mseed
processed file: data/raw/20210319_9fnuph.mseed
processed file: data/raw/20210406_9fnuph.mseed
processed file: data/raw/20210613_9fnuph.mseed
processed file: data/raw/20210504_9fnuph.mseed
processed file: data/raw/20210621_9fnuph.mseed
processed file: data/raw/20210429_9fnuph.mseed
processed file: data/raw/20210617_9fnuph.mseed
processed file: data/raw/20210409_9fnuph.mseed
processed file: data/raw/20210601_9fnuph.mseed
processed file: data/raw/20210407_9fnuph.mseed
processed file: data/raw/20210604_9fnuph.mseed
processed file: data/raw/20210322_9fnuph.mseed
processed file: data/raw/20210312_9fnuph.mseed
processed file: data/raw/20210503_9fnuph.mseed
processed file: data/raw/20210507_9fnuph.mseed
processed file: data/raw/20210609_9fnuph.mseed
processed fil

Check the lengths of the files.

In [3]:
# import os
# from obspy import read
# import glob
# import numpy as np

# Define the processed folder path = output_folder
processed_folder = os.getcwd() + '/data/processed'

# Array to store the lengths of each file
lengths = []

# Check each processed file's length
for file_path in glob.glob(f"{processed_folder}/*.mseed"):
    # Read only metadata
    st = read(file_path, headonly=True)
    
    # Extract the number of data points in each trace
    for tr in st:
        lengths.append(tr.stats.npts)

# Convert to numpy array for easier analysis
lengths = np.array(lengths)

# Check if all files have the same length
if np.all(lengths == lengths[0]):
    print(f"All files have the same length: {lengths[0]} data points.")
else:
    print("Files have different lengths:", lengths)

Files have different lengths: [760320 760320 760320 760320 760320 760320 194785 760320 760320 760320
 760320 760320 760320 760320 760320 760320 760320 331681 760320 760320
 760320 760320 760320 760320 760320 760320 760320 760320 760320 760320
 760320 760320 760320 760320 760320 760320 760320 760320 760320 760320
 760320 760320 760320 760320 760320 760320 760320 760320 760320 760320
 760320 760320 760320 760320 760320 760320 760320 760320 760320 760320
 760320 760320 760320 760320 760320 760320 760320 760320 760320 760320
 760320 760320 760320 760320 760320 760320  88059 760320 760320 760320
 760320 760320 760320 760320 760320 760320 760320 760320 760320 760320
 760320 760320 760320 760320 760320 760320 760320 760320 760320 760320]


~~This is the correct number of points (+1) for 24 hours of data sampled at 25 Hz. The single extra point is a product of the decimation, will be removed in the preparing ai ready data notebook.~~