In [1]:
import numpy as np
import matplotlib.pyplot as plt
from obspy import read, Stream
import os
import glob
import sys

# Add project root to path
sys.path.append('..')
from code.config import RAW_DATA_DIR, FIGURES_DIR

# Set matplotlib style
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10

ModuleNotFoundError: No module named 'code.config'; 'code' is not a package

In [None]:
def load_masw_data(data_dir):
    """Load all SAC files from directory"""
    sac_files = sorted(glob.glob(os.path.join(data_dir, "*.SAC")))
    
    if not sac_files:
        raise FileNotFoundError(f"No SAC files found in {data_dir}")
    
    print(f"Found {len(sac_files)} SAC files")
    print(f"First file: {os.path.basename(sac_files[0])}")
    print(f"Last file: {os.path.basename(sac_files[-1])}")
    
    # Load all traces into a Stream
    stream = Stream()
    for sac_file in sac_files:
        st = read(sac_file)
        stream += st
    
    return stream, sac_files

In [None]:
# Load data
print("=" * 60)
print("LOADING GEOPHYDOG MASW DATA")
print("=" * 60)
stream, sac_files = load_masw_data(RAW_DATA_DIR)
print(f"\nLoaded {len(stream)} traces successfully\n")

In [None]:
def analyze_trace_info(stream):
    """Extract and display comprehensive trace information"""
    
    info = {
        'n_traces': len(stream),
        'sampling_rates': [],
        'n_samples': [],
        'durations': [],
        'distances': [],
        'stations': [],
        'start_times': [],
        'end_times': []
    }
    
    print("=" * 60)
    print("TRACE INFORMATION")
    print("=" * 60)
    
    for i, tr in enumerate(stream):
        # Basic info
        info['sampling_rates'].append(tr.stats.sampling_rate)
        info['n_samples'].append(tr.stats.npts)
        info['durations'].append(tr.stats.npts / tr.stats.sampling_rate)
        info['stations'].append(tr.stats.station)
        info['start_times'].append(tr.stats.starttime)
        info['end_times'].append(tr.stats.endtime)
        
        # Distance info (if available in SAC header)
        if hasattr(tr.stats.sac, 'dist'):
            info['distances'].append(tr.stats.sac.dist)
        elif hasattr(tr.stats, 'distance'):
            info['distances'].append(tr.stats.distance)
        
        # Print first trace details
        if i == 0:
            print(f"\nFirst Trace Details (Trace {i}):")
            print(f"  Station: {tr.stats.station}")
            print(f"  Channel: {tr.stats.channel}")
            print(f"  Sampling rate: {tr.stats.sampling_rate} Hz")
            print(f"  Delta (sample interval): {tr.stats.delta} s")
            print(f"  Number of samples: {tr.stats.npts}")
            print(f"  Duration: {tr.stats.npts / tr.stats.sampling_rate:.3f} s")
            print(f"  Start time: {tr.stats.starttime}")
            print(f"  End time: {tr.stats.endtime}")
            
            # Check SAC header fields
            if hasattr(tr.stats, 'sac'):
                print(f"\n  SAC Header Fields:")
                if hasattr(tr.stats.sac, 'dist'):
                    print(f"    Distance: {tr.stats.sac.dist} m")
                if hasattr(tr.stats.sac, 'az'):
                    print(f"    Azimuth: {tr.stats.sac.az}°")
                if hasattr(tr.stats.sac, 'baz'):
                    print(f"    Back-azimuth: {tr.stats.sac.baz}°")
    
    # Summary statistics
    print(f"\n" + "=" * 60)
    print("SUMMARY STATISTICS")
    print("=" * 60)
    print(f"Total number of traces: {info['n_traces']}")
    print(f"Sampling rate: {info['sampling_rates'][0]} Hz (all traces)")
    print(f"Nyquist frequency: {info['sampling_rates'][0]/2:.1f} Hz")
    print(f"Number of samples per trace: {info['n_samples'][0]} (all traces)")
    print(f"Record duration: {info['durations'][0]:.3f} seconds")
    
    if info['distances']:
        print(f"\nReceiver Array Configuration:")
        print(f"  Minimum offset (source to nearest receiver): {min(info['distances']):.1f} m")
        print(f"  Maximum offset: {max(info['distances']):.1f} m")
        print(f"  Array length: {max(info['distances']) - min(info['distances']):.1f} m")
        
        # Calculate spacing
        sorted_dist = sorted(info['distances'])
        spacings = np.diff(sorted_dist)
        print(f"  Receiver spacing: {np.median(spacings):.1f} m (median)")
        if len(set(spacings.round(2))) > 1:
            print(f"  Warning: Spacing varies from {min(spacings):.1f} to {max(spacings):.1f} m")
    
    return info, stream