In [None]:
# Import required libraries
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from obspy import UTCDateTime
from obspy.clients.fdsn import Client
from datetime import timezone
import pytz
import matplotlib.dates as mdates

# Add parent directory to path
sys.path.insert(0, str(Path.cwd().parent))

from src.seisbench_models import create_detector, create_classifier, convert_picks_to_detections
from src.detect import (
    smooth_moving_avg, detect_event_windows, multi_class_detection,
    classify_waveform_windows, merge_picks_and_classifications,
    filter_noise_events, get_event_type_counts
)
from src.interactive_plots import plot_detection_results, interactive_detection_viewer, plot_event_summary
from src.utils import ensure_dir

# For interactive plots in Jupyter
%matplotlib inline

print("✓ Imports successful!")

## 1. Configuration

In [None]:
# Configuration for UW.DREAM mudslide detection
config = {
    # Station settings
    'network': 'UW',
    'station': 'DREAM',
    'channel': 'HH*',  # High-bandwidth seismometer
    'location': '*',
    
    # Time window - last 5 days
    'days_back': 5,
    
    # Picker model settings
    'picker_model': 'phasenet',
    'picker_version': 'stead',
    
    # Classifier model settings
    'classifier_model': 'quakexnet',
    'classifier_version': 'base',
    
    # Device settings
    'device': 'auto',
    
    # Picker detection parameters
    'picker_threshold': 0.5,
    'min_duration': 10,
    'merge_distance': 50,
    'apply_smoothing': True,
    'smooth_window': 100,
    
    # Classification parameters
    'window_duration': 100.0,
    'stride': 50.0,
    'batch_size': 12,
    'include_sliding': True,
    'include_event_centered': True,
    
    # Merging parameters
    'time_tolerance': 10.0,
    
    # Output settings
    'save_plots': True,
    'save_results': True,
    'local_timezone': 'America/Los_Angeles',
}

# Calculate time window
endtime = UTCDateTime.now()
starttime = endtime - config['days_back'] * 24 * 3600

print(f"UW.DREAM Mudslide Detection")
print(f"Time window: {starttime} to {endtime}")
print(f"Duration: {config['days_back']} days ({(endtime - starttime) / 3600:.1f} hours)")

# Create output directories
ensure_dir('../plots')
ensure_dir('../logs')

## 2. Download Data from IRIS

In [None]:
# Download waveform data
print(f"Downloading data for {config['network']}.{config['station']}...")

client = Client("IRIS")

try:
    stream = client.get_waveforms(
        network=config['network'],
        station=config['station'],
        channel=config['channel'],
        location=config['location'],
        starttime=starttime,
        endtime=endtime
    )
    
    # Merge traces and fill gaps
    stream.merge(method=1, fill_value=0)
    
    print(f"✓ Downloaded {len(stream)} traces")
    print(f"\nStream info:")
    print(stream)
    
    # Get station inventory for response removal
    print("\nFetching station inventory...")
    inventory = client.get_stations(
        network=config['network'],
        station=config['station'],
        channel=config['channel'],
        location=config['location'],
        starttime=starttime,
        endtime=endtime,
        level="response"
    )
    print("✓ Inventory retrieved")
    
except Exception as e:
    print(f"✗ Error downloading data: {e}")
    raise

## 3. Data Preprocessing and Visualization

In [None]:
# Remove instrumental response
print("Removing instrumental response...")
stream_corrected = stream.copy()
stream_corrected.remove_response(inventory=inventory, output="VEL", water_level=60)

print(f"✓ Instrumental response removed")
print(f"Output: Velocity (m/s)")

# Plot raw waveforms
local_tz = pytz.timezone(config['local_timezone'])
fig, axes = plt.subplots(3, 1, figsize=(16, 10), sharex=True)

for i, tr in enumerate(stream_corrected):
    # Efficient timezone conversion
    start_datetime = tr.stats.starttime.datetime.replace(tzinfo=timezone.utc)
    start_local = start_datetime.astimezone(local_tz)
    times_local = [start_local + pd.Timedelta(seconds=float(t)) for t in tr.times()]
    
    # Plot
    axes[i].plot(times_local, tr.data, 'k-', linewidth=0.3, alpha=0.8)
    axes[i].set_ylabel(f'{tr.stats.channel}\nVelocity (m/s)', fontsize=10)
    axes[i].grid(True, alpha=0.3)
    axes[i].text(0.01, 0.95, f'{tr.stats.network}.{tr.stats.station}.{tr.stats.location}.{tr.stats.channel}',
                transform=axes[i].transAxes, fontsize=9, va='top',
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# Format x-axis
axes[-1].xaxis.set_major_formatter(mdates.DateFormatter('%m-%d %H:%M', tz=local_tz))
axes[-1].xaxis.set_major_locator(mdates.HourLocator(interval=12))
plt.setp(axes[-1].xaxis.get_majorticklabels(), rotation=45, ha='right')
axes[-1].set_xlabel(f'Time ({local_tz.zone})', fontsize=11)

fig.suptitle(f'UW.DREAM - {config["days_back"]} Day Waveform Overview', 
             fontsize=14, fontweight='bold')
plt.tight_layout()

if config['save_plots']:
    save_path = f"../plots/DREAM_{starttime.date}_waveforms.png"
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    print(f"✓ Plot saved to {save_path}")

plt.show()

## 4. Load Detection Models

In [None]:
# Load PhaseNet picker
print(f"Loading {config['picker_model']} picker...")
picker = create_detector(
    model_name=config['picker_model'],
    version=config['picker_version'],
    device=config['device']
)
print(f"✓ Picker loaded on {picker.device}")

# Load QuakeXNet classifier
print(f"\nLoading {config['classifier_model']} classifier...")
classifier = create_classifier(
    model_name=config['classifier_model'],
    version=config['classifier_version'],
    device=config['device']
)
print(f"✓ Classifier loaded")
print(f"\n✓ Both models ready for detection")

## 5. Phase Picking with PhaseNet

In [None]:
print("Running PhaseNet phase picker...")

# Run picker inference
annotated_stream = picker.model.annotate(stream)
print(f"✓ Picker inference complete")

# Extract probability traces
pick_probabilities = picker._extract_predictions(annotated_stream)

print(f"\nPhase probabilities extracted:")
for phase, probs in pick_probabilities.items():
    print(f"  {phase}: {len(probs)} samples, max prob: {np.max(probs):.3f}")

# Get deterministic picks
picks_output = picker.model.classify(stream)
print(f"\nDeterministic picks: {len(picks_output.picks)} picks found")

if len(picks_output.picks) > 0:
    print(f"\nFirst 10 picks:")
    for i, pick in enumerate(picks_output.picks[:10]):
        print(f"  {i+1}. {pick.phase} at {pick.peak_time} (confidence: {pick.peak_value:.3f})")

In [None]:
# Visualize picker probabilities
annotated_stream.plot(size=(900, 600))
plt.suptitle('PhaseNet Phase Probabilities', fontsize=14, fontweight='bold')
plt.tight_layout()

## 6. Detect Pick Windows

In [None]:
print("Detecting pick windows from probabilities...")

# Apply threshold-based detection
pick_detections = multi_class_detection(
    pick_probabilities,
    threshold=config['picker_threshold'],
    min_duration=config['min_duration'],
    merge_distance=config['merge_distance'],
    apply_smoothing=config['apply_smoothing'],
    smooth_window=config['smooth_window']
)

print(f"✓ Pick detection complete")
print(f"\nDetected picks by phase:")
total_picks = 0
for phase_name, phase_picks in pick_detections.items():
    print(f"  {phase_name}: {len(phase_picks)} picks")
    total_picks += len(phase_picks)

print(f"\nTotal picks: {total_picks}")

## 7. Event Classification with QuakeXNet

In [None]:
print("Running QuakeXNet event classification...")
print("This will classify windows as: earthquake (eq), explosion (px), noise (no), or surface event (su)")

# Run hybrid classification
classification_results = classify_waveform_windows(
    stream=stream,
    classifier=classifier,
    picker_detections=pick_detections,
    window_duration=config['window_duration'],
    stride=config['stride'],
    batch_size=config['batch_size'],
    include_sliding=config['include_sliding'],
    include_event_centered=config['include_event_centered']
)

print(f"\n✓ Classification complete")
print(f"Total windows classified: {len(classification_results)}")

# Show classification distribution
if classification_results:
    class_labels = [r['class_label'] for r in classification_results]
    print("\nClassification distribution:")
    print(f"  Earthquakes (eq): {class_labels.count('eq')}")
    print(f"  Explosions (px): {class_labels.count('px')}")
    print(f"  Noise (no): {class_labels.count('no')}")
    print(f"  Surface events (su): {class_labels.count('su')}")
    
    # Create DataFrame
    df_classifications = pd.DataFrame(classification_results)
    print(f"\nClassification results preview:")
    print(df_classifications.head(15))

## 8. Merge Picks and Classifications

In [None]:
print("Merging picks and classifications...")

# Prepare picker results for merging
sampling_rate = stream[0].stats.sampling_rate
picker_results = []

for phase_name, phase_picks in pick_detections.items():
    for pick in phase_picks:
        pick_record = pick.copy()
        pick_record['phase'] = phase_name
        pick_record['class_name'] = phase_name
        picker_results.append(pick_record)

# Merge results
df_merged = merge_picks_and_classifications(
    picker_results=picker_results,
    classification_results=classification_results,
    time_tolerance=config['time_tolerance'],
    sampling_rate=sampling_rate
)

print(f"✓ Merging complete")
print(f"\nMerged events summary:")
print(f"  Total events: {len(df_merged)}")
print(f"  Matched (pick + class): {len(df_merged[df_merged['match_type'] == 'matched'])}")
print(f"  Pick only: {len(df_merged[df_merged['match_type'] == 'pick_only'])}")
print(f"  Classification only: {len(df_merged[df_merged['match_type'] == 'class_only'])}")

# Save merged results
if config['save_results']:
    csv_path = f"../logs/DREAM_{starttime.date}_merged_events.csv"
    df_merged.to_csv(csv_path, index=False)
    print(f"\n✓ Merged results saved to {csv_path}")

## 9. Identify Surface Events (Mudslides)

In [None]:
# Filter for surface events
surface_events = df_merged[df_merged['class_label'] == 'su'].copy()

print(f"SURFACE EVENT DETECTION RESULTS")
print(f"="*70)
print(f"Total surface events detected: {len(surface_events)}")

if len(surface_events) > 0:
    # Sort by probability
    surface_events = surface_events.sort_values('class_prob', ascending=False)
    
    print(f"\nTop surface events by confidence:")
    print(surface_events[['pick_time', 'pick_phase', 'class_prob', 'match_type']].head(20))
    
    # Statistics
    print(f"\nStatistics:")
    print(f"  Mean confidence: {surface_events['class_prob'].mean():.3f}")
    print(f"  Max confidence: {surface_events['class_prob'].max():.3f}")
    print(f"  Events with confidence > 0.8: {len(surface_events[surface_events['class_prob'] > 0.8])}")
    print(f"  Events with confidence > 0.9: {len(surface_events[surface_events['class_prob'] > 0.9])}")
    
    # Save surface events
    if config['save_results']:
        surface_csv = f"../logs/DREAM_{starttime.date}_surface_events.csv"
        surface_events.to_csv(surface_csv, index=False)
        print(f"\n✓ Surface events saved to {surface_csv}")
else:
    print("\nNo surface events detected in this time window.")

print(f"="*70)

## 10. Visualization - Event Timeline

In [None]:
# Create timeline plot of all event types
fig, ax = plt.subplots(figsize=(16, 6))

# Define colors for each event type
colors = {
    'eq': 'red',
    'px': 'orange',
    'no': 'gray',
    'su': 'green'
}

labels = {
    'eq': 'Earthquake',
    'px': 'Explosion',
    'no': 'Noise',
    'su': 'Surface Event (Mudslide)'
}

# Plot each event type
for event_type in ['eq', 'px', 'no', 'su']:
    events = df_merged[df_merged['class_label'] == event_type]
    if len(events) > 0:
        times = [starttime + t for t in events['pick_time'].values]
        probs = events['class_prob'].values
        
        # Convert to local timezone for plotting
        times_datetime = [t.datetime.replace(tzinfo=timezone.utc).astimezone(local_tz) for t in times]
        
        ax.scatter(times_datetime, probs, 
                  c=colors[event_type], 
                  label=f"{labels[event_type]} (n={len(events)})",
                  alpha=0.6,
                  s=50 if event_type == 'su' else 20,
                  edgecolors='black' if event_type == 'su' else 'none',
                  linewidths=1.5 if event_type == 'su' else 0)

ax.set_xlabel(f'Time ({local_tz.zone})', fontsize=12)
ax.set_ylabel('Classification Confidence', fontsize=12)
ax.set_title(f'UW.DREAM Event Timeline - {config["days_back"]} Days', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)
ax.legend(loc='upper right', fontsize=10)
ax.set_ylim([0, 1.05])

# Format x-axis
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d %H:%M', tz=local_tz))
ax.xaxis.set_major_locator(mdates.HourLocator(interval=12))
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right')

plt.tight_layout()

if config['save_plots']:
    save_path = f"../plots/DREAM_{starttime.date}_event_timeline.png"
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    print(f"✓ Plot saved to {save_path}")

plt.show()

## 11. Summary Statistics

In [None]:
print("\n" + "="*70)
print("UW.DREAM MUDSLIDE DETECTION SUMMARY")
print("="*70)
print(f"Station: UW.DREAM")
print(f"Time window: {starttime} to {endtime}")
print(f"Duration: {config['days_back']} days")

print(f"\n--- Detection Results ---")
print(f"Total events detected: {len(df_merged)}")
print(f"\nBy event type:")
class_counts = df_merged[df_merged['class_label'].notna()]['class_label'].value_counts()
for event_type, count in class_counts.items():
    print(f"  {labels.get(event_type, event_type)}: {count}")

print(f"\n--- Surface Events (Mudslides) ---")
if len(surface_events) > 0:
    high_conf = surface_events[surface_events['class_prob'] > 0.8]
    print(f"Total surface events: {len(surface_events)}")
    print(f"High confidence (>0.8): {len(high_conf)}")
    
    if len(high_conf) > 0:
        print(f"\nMost likely mudslide event:")
        best = surface_events.iloc[0]
        event_time = starttime + best['pick_time']
        print(f"  Time: {event_time}")
        print(f"  Confidence: {best['class_prob']:.3f}")
        print(f"  Phase: {best.get('pick_phase', 'N/A')}")
else:
    print("No surface events detected")

print(f"\n--- Phase Picks ---")
phase_counts = df_merged[df_merged['pick_phase'].notna()]['pick_phase'].value_counts()
for phase, count in phase_counts.items():
    print(f"  {phase}: {count}")

print("="*70)

## Notes

### Interpreting Results
- **Surface events (su)** are the primary indicator of mudslides/landslides
- High confidence scores (>0.8) indicate strong mudslide signatures
- Look for clusters of surface events in time for sustained slope failure
- Compare with local weather/precipitation data for correlation

### Next Steps
1. Examine waveforms around high-confidence surface events
2. Check for spectral characteristics typical of mass movements
3. Correlate with precipitation data and known slope failures
4. Compare with nearby stations for spatial extent
5. Export results for further geotechnical analysis