In [5]:
import os
import logging
from obspy import read, Stream, UTCDateTime
import numpy as np
from obspy import Trace

year = '2024'
#CI.MAN..BHE | 2024-01-16T00:00:00.000000Z - 2024-01-16T14:35:00.500000Z | 40.0 Hz, 2100021 samples
file_path = f'/home/gridsan/mknuth/01_Earthquake_Prediction/01_Data/01_Seismic_Wave_Data/{year}/MAN/BHE/MAN_BHE_{year}-01-16.mseed'
#file_path = '01_Seismic_Wave_Data/2024/BRE/BHE/BRE_BHE_2024-01-01.mseed'

stream = read(file_path)
stream

1423 Trace(s) in Stream:

CI.MAN..BHE | 2024-01-16T00:00:00.000000Z - 2024-01-16T14:35:00.500000Z | 40.0 Hz, 2100021 samples
...
(1421 other traces)
...
CI.MAN..BHE | 2024-01-16T23:59:50.625000Z - 2024-01-16T23:59:59.975000Z | 40.0 Hz, 375 samples

[Use "print(Stream.__str__(extended=True))" to print all Traces]

In [2]:
file_path = f'/home/gridsan/mknuth/01_Earthquake_Prediction/01_Data/01_Seismic_Wave_Data/{year}/MAN/BHN/MAN_BHN_{year}-01-01.mseed'

stream = read(file_path)
stream

1 Trace(s) in Stream:
CI.MAN..BHN | 2024-01-01T00:00:00.000000Z - 2024-01-01T23:59:59.975000Z | 40.0 Hz, 3456000 samples

In [3]:
file_path = f'/home/gridsan/mknuth/01_Earthquake_Prediction/01_Data/01_Seismic_Wave_Data/{year}/MAN/BHZ/MAN_BHZ_{year}-01-01.mseed'

stream = read(file_path)
stream

1 Trace(s) in Stream:
CI.MAN..BHZ | 2024-01-01T00:00:00.000000Z - 2024-01-01T23:59:59.975000Z | 40.0 Hz, 3456000 samples

In [6]:
stream[0].stats

         network: CI
         station: MAN
        location: 
         channel: BHE
       starttime: 2024-01-16T00:00:00.000000Z
         endtime: 2024-01-16T14:35:00.500000Z
   sampling_rate: 40.0
           delta: 0.025
            npts: 2100021
           calib: 1.0
         _format: MSEED
           mseed: AttribDict({'dataquality': 'D', 'number_of_records': 4596, 'encoding': 'STEIM2', 'byteorder': '>', 'record_length': 512, 'filesize': 4396032})

In [None]:
import os
import logging
from obspy import read, Stream, UTCDateTime
import numpy as np
from obspy import Trace

def format_duration(duration):
    """Convert duration from seconds to hours:minutes format."""
    hours = int(duration // 3600)
    minutes = int((duration % 3600) // 60)
    return f"{hours:02d}:{minutes:02d}"

def save_processed_trace(trace, original_path):
    """Save the processed trace to a new file with '_processed' appended to the original name."""
    directory, filename = os.path.split(original_path)
    name, ext = os.path.splitext(filename)
    new_filename = f"{name}_processed{ext}"
    new_path = os.path.join(directory, new_filename)
    trace.write(new_path, format='MSEED')
    logging.info(f"Saved processed trace to: {new_path}")

def process_waveform(file_path):
    try:
        stream = read(file_path)
        #print(f"Number of traces: {len(stream)}")
        if len(stream) > 1:
            print(stream)
        
        
        stream.merge(method=1, fill_value=0)
        duration = stream[0].stats.endtime - stream[0].stats.starttime
        duration_str = format_duration(duration)
        logging.info(f"Stream goes from {stream[0].stats.starttime} to {stream[0].stats.endtime} (duration: {duration_str})")
        #print(f"Stream goes from {stream[0].stats.starttime} to {stream[0].stats.endtime} (duration: {duration_str})")

        # Save the processed trace
        save_processed_trace(stream[0], file_path)

    except Exception as e:
        logging.error(f"Error processing file {stream[0]}: {str(e)}")
        print(f"Error processing file {stream[0]}: {str(e)}")


year = '2023'
station = 'MAN'
base_dir = f"01_Data/01_Seismic_Wave_Data/{year}/{station}"

logging.basicConfig(filename=f"{year}_{station}_trace_preprocessing.log", level=logging.INFO, format='%(asctime)s - %(message)s', filemode='w')
# Base directory to start searching


print(f'processing year {year}')
# Collect all mseed files recursively in a list
file_list = []

for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith(".mseed"):
            file_path = os.path.join(root, file)
            file_list.append(file_path)

# Sort the list of files alphabetically
file_list.sort()
print(f"In total {len(file_list)} files")

# Process each file that has not been processed yet, total 1095 in 2021
for file_path in file_list:
    # Skip already processed files
    if "_processed" in os.path.basename(file_path):
        logging.info(f"Skipping already processed file: {os.path.basename(file_path)}")
        continue
    
    #print(f"Processing file {file_path}")
    logging.info(f"Processing file {os.path.basename(file_path)}")
    process_waveform(file_path)

processing year 2023
In total 1294 files
8 Trace(s) in Stream:
CI.MAN..BHE | 2023-01-03T00:00:00.000000Z - 2023-01-03T13:23:41.450000Z | 40.0 Hz, 1928859 samples
CI.MAN..BHE | 2023-01-03T13:23:50.850000Z - 2023-01-03T13:25:49.375000Z | 40.0 Hz, 4742 samples
CI.MAN..BHE | 2023-01-03T13:25:58.325000Z - 2023-01-03T13:26:59.700000Z | 40.0 Hz, 2456 samples
CI.MAN..BHE | 2023-01-03T13:27:08.525000Z - 2023-01-03T13:30:44.050000Z | 40.0 Hz, 8622 samples
CI.MAN..BHE | 2023-01-03T13:30:52.800000Z - 2023-01-03T13:31:20.075000Z | 40.0 Hz, 1092 samples
CI.MAN..BHE | 2023-01-03T13:31:29.375000Z - 2023-01-03T13:32:05.300000Z | 40.0 Hz, 1438 samples
CI.MAN..BHE | 2023-01-03T13:32:14.425000Z - 2023-01-03T13:32:41.475000Z | 40.0 Hz, 1083 samples
CI.MAN..BHE | 2023-01-03T13:32:50.725000Z - 2023-01-03T23:59:59.975000Z | 40.0 Hz, 1505171 samples
8 Trace(s) in Stream:
CI.MAN..BHE | 2023-01-04T00:00:00.000000Z - 2023-01-04T16:14:48.175000Z | 40.0 Hz, 2339528 samples
CI.MAN..BHE | 2023-01-04T16:14:54.800000Z 

In [13]:
file_path = '01_Data/01_Seismic_Wave_Data/2024/SDD/BHZ/SDD_BHZ_2024-11-21.mseed'

stream = read(file_path)
stream.merge()
stream

1 Trace(s) in Stream:
CI.SDD..BHZ | 2024-11-21T00:00:00.019539Z - 2024-11-21T23:59:59.994539Z | 40.0 Hz, 3456000 samples (masked)