### Cleaning/Processing Steps
The data will be demeaned similar to Zali et al., using a more conservative downsample rate of 20 Hz, given volcanic tremor is typically between 1-9 Hz.

*Read in mseed files <br>
*Check all are the same length <br>
*Demean and detrend <br>
*Anti-alias filter and downsample data to 8 Hz from 100 Hz

In [None]:
import warnings

# Suppress the specific warning from obspy
warnings.filterwarnings(
    "ignore",
    message="The encoding specified in trace.stats.mseed.encoding does not match the dtype of the data.*",
    category=UserWarning,
    module="obspy.io.mseed.core"
)

In [None]:
import os
from obspy import read
import glob
import numpy as np

# Define the folder paths
input_folder = os.getcwd() + '/data/raw'
processed_folder = os.getcwd() + '/data/processed'
# number of files in data/raw

files_to_process = len(glob.glob(f'{input_folder}/*.mseed'))
count = 1

# Display folder paths for confirmation
# display(input_folder)
# display(processed_folder)

os.makedirs(processed_folder, exist_ok=True)  # Create output directory if it doesn't exist

# Target sampling rate after downsampling
target_sampling_rate = 8  # Hz

# Initialize a list to store trace lengths
lengths = []

# Process each mseed file in the folder
for file_path in glob.glob(f"{input_folder}/*.mseed"):
    try:
        # Read the file
        st = read(file_path)
        st_check = st.copy()
        
        # Process each trace
        for tr in st:
            # Preprocessing steps
            tr.detrend("demean")  # Remove mean
            tr.detrend("linear")  # Remove linear trend
            tr.filter("lowpass", freq=target_sampling_rate / 2)  # Anti-aliasing filter
            tr.resample(target_sampling_rate)  # Downsample to 8 Hz
            
            tr.normalize()  # Normalize the trace to between -1 and 1
            
            # Record trace length
            lengths.append(len(tr.data))
        
        # Save the processed data
        output_file = os.path.join(processed_folder, os.path.basename(file_path))
        st.write(output_file, format="MSEED")
        print(f'Processed file {count}/{files_to_process}: {os.path.basename(file_path)}')
    
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        continue

    count += 1

# Convert lengths to a NumPy array
lengths = np.array(lengths)
print(f"Lengths of processed traces: {lengths}")
