<a href="https://colab.research.google.com/github/cedamusk/final-year/blob/main/weontosomething22.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install obspy tensorflow  matplotlib

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from obspy import UTCDateTime, Stream, Trace
from obspy.clients.fdsn import Client
from obspy.signal.trigger import classic_sta_lta
from obspy.signal.filter import bandpass
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns

def fetch_iris_data(network, station, location, channel, start_time, end_time):
    """
    Fetch seismic data from IRIS database
    """
    try:
        client = Client("IRIS")
        stream = client.get_waveforms(network, station, location, channel, start_time, end_time)
        return stream
    except Exception as e:
        print(f"Error fetching data from IRIS: {str(e)}")
        return None

def generate_synthetic_data(num_samples, sample_rate, event_duration, noise_level):
    """
    Generate synthetic seismic data for testing and validation
    """
    time = np.arange(num_samples) / sample_rate
    background = np.random.normal(0, noise_level, num_samples)

    # Create multiple events for more complex training
    num_events = np.random.randint(1, 4)
    events = np.zeros(num_samples)
    event_locations = []

    for _ in range(num_events):
        event_start = np.random.randint(num_samples // 8, num_samples * 7 // 8)
        event_end = event_start + int(event_duration * sample_rate)
        event_locations.append((event_start, event_end))

        # Create event with varying frequency and amplitude
        freq = np.random.uniform(3, 8)
        amp = np.random.uniform(0.8, 1.2)
        decay = np.random.uniform(0.1, 0.3)

        event = np.sin(2 * np.pi * freq * (time[event_start:event_end] - time[event_start])) * \
                amp * np.exp(-(time[event_start:event_end] - time[event_start]) / decay)
        events[event_start:event_end] = event

    data = background + events
    return data, events, event_locations

def create_windows(data, window_size, step):
    """
    Create sliding windows for RNN input with overlap
    """
    windows = []
    labels=[]

    for i in range(0, len(data) - window_size + 1, step):
        window = data[i:i + window_size]
        # Add feature engineering
        window_features = np.column_stack((
            window,  # Raw signal
            np.abs(window),  # Absolute amplitude
            np.gradient(window),  # First derivative
            np.gradient(np.gradient(window))  # Second derivative
        ))
        windows.append(window_features)

        if len(window)==window_size:
          labels.append(window.mean())
    return np.array(windows), np.array(labels)

def build_enhanced_rnn_model(input_shape):
    """
    Build an enhanced RNN model with multiple LSTM layers and additional features
    """
    model = Sequential([
        # First Bidirectional LSTM layer
        Bidirectional(LSTM(128, return_sequences=True), input_shape=input_shape),
        BatchNormalization(),
        Dropout(0.3),

        # Second Bidirectional LSTM layer
        Bidirectional(LSTM(96, return_sequences=True)),
        BatchNormalization(),
        Dropout(0.3),

        # Third LSTM layer
        LSTM(64),
        BatchNormalization(),
        Dropout(0.2),

        # Dense layers for classification
        Dense(32, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    # Use Adam optimizer with custom learning rate
    optimizer = Adam(learning_rate=0.001)

    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=['accuracy', 'Precision', 'Recall']
    )

    return model

def process_stream(stream, freqmin=0.5, freqmax=20):
    """
    Process a seismic stream with enhanced filtering
    """
    processed_stream = stream.copy()
    processed_stream.detrend('linear')
    processed_stream.taper(max_percentage=0.05)
    processed_stream.filter('bandpass', freqmin=freqmin, freqmax=freqmax,
                          corners=4, zerophase=True)
    return processed_stream

def evaluate_model(model, X_test, y_test, predictions):
  binary_predictions=(predictions>0.5).astype(int)

  accuracy=accuracy_score(y_test, binary_predictions)
  precision=precision_score(y_test, binary_predictions)
  recall=recall_score(y_test, binary_predictions)
  f1=f1_score(y_test, binary_predictions)

  report=classification_report(y_test, binary_predictions)

  cm=confusion_matrix(y_test, binary_predictions)

  print("\n=== Model Performance Metrics ===")
  print(f"Accuracy:{accuracy:.4f}")
  print(f"Precision:{precision:.4f}")
  print(f"Recall:{recall:.4f}")
  print(f"F1 Score:{f1:.4f}")

  print("\n=== Classification report ===")
  print(report)

  plt.figure(figsize=(8,6))
  sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
  plt.title('Confusion Matrix')
  plt.ylabel('True Label')
  plt.xlabel('Predicted label')
  plt.show()

  return accuracy, precision, recall, f1

def main(use_real_data=True):
    # Configuration parameters
    config = {
        'window_size': 200,
        'step': 20,
        'sample_rate': 100,
        'sta_length': 50,  # 0.5 seconds
        'lta_length': 500,  # 5 seconds
        'threshold': 1.5,
        'num_samples': 20000
    }

    if use_real_data:
        # Fetch real data from IRIS
        end_time = UTCDateTime.now()
        start_time = end_time - 3600  # One hour

        stream = fetch_iris_data('YS', 'BAOP', '', 'BHZ', start_time, end_time)
        if stream is None:
            print("Falling back to synthetic data...")
            use_real_data = False
        else:
            filtered_stream = process_stream(stream)

    if not use_real_data:
        # Generate synthetic data with multiple events
        synthetic_data, true_events, event_locations = generate_synthetic_data(
            config['num_samples'],
            config['sample_rate'],
            event_duration=2,
            noise_level=0.1
        )

        trace = Trace(data=synthetic_data)
        trace.stats.starttime = UTCDateTime("2021-01-01T00:00:00")
        trace.stats.delta = 1.0/config['sample_rate']
        trace.stats.channel = 'SHZ'
        filtered_stream = Stream([trace])
        filtered_stream = process_stream(filtered_stream)

    # Create training data
    X, y= create_windows(
        filtered_stream[0].data,
        config['window_size'],
        config['step']
    )

    if use_real_data:
        # For real data, use STA/LTA triggers as initial labels
        triggers, sta_lta = sta_lta_detection(
            filtered_stream,
            config['sta_length'],
            config['lta_length'],
            config['threshold']
        )

        y=(y>config['threshold']).astype(int)
    else:
        y=(y>0).astype(int)

    print(f"X shape:{X.shape}")
    print(f"y shape: {y.shape}")
    # Prepare data for training
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Normalize data
    mean = X_train.mean(axis=(0, 1), keepdims=True)
    std = X_train.std(axis=(0, 1), keepdims=True)
    X_train = (X_train - mean) / std
    X_test = (X_test - mean) / std

    # Build and train model
    input_shape = (config['window_size'], X.shape[2])
    model = build_enhanced_rnn_model(input_shape)

    # Add callbacks
    callbacks = [
        EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-6
        )
    ]

    # Train model
    history = model.fit(
        X_train, y_train,
        epochs=100,
        batch_size=32,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=1
    )

    # Make predictions
    predictions = model.predict(X_test)

    accuracy, precision, recall, f1=evaluate_model(model, X_test, y_test, predictions)




    # Plot results
    plot_results(filtered_stream, predictions, config, use_real_data, metrics={'accuracy':accuracy, 'precision':precision, 'recall':recall, 'f1':f1})
    plot_training_history(history)

def plot_results(stream, predictions, config, use_real_data, metrics=None):
    plt.figure(figsize=(15, 10))

    # Plot original data
    plt.subplot(3, 1, 1)
    plt.plot(stream[0].data)
    if metrics:
      plt.title(f"Original Seismic Data\nAccuracy:{metrics['accuracy']:.4f},"
      f"Precision:{metrics['precision']:.4f},"
      f"Recall{metrics['recall']:.4f},"
      f"F1:{metrics['f1']:.4f}")
    else:
      plt.title("Original Seismic Data")
    plt.xlabel("Samples")
    plt.ylabel("Amplitude")

    # Plot predictions
    plt.subplot(3, 1, 2)
    plt.plot(predictions)
    plt.title("Detection Predictions")
    plt.xlabel("Windows")
    plt.ylabel("Probability")

    plt.subplot(4,1,3)
    plt.plot((predictions>0.5).astype(int))
    plt.title("Binary Predictions (Threshold=0.5)")
    plt.xlabel("Windows")
    plt.ylabel('Detection')

    # Plot spectrogram
    plt.subplot(3, 1, 3)
    stream.spectrogram(show=False)
    plt.title("Spectrogram")

    plt.tight_layout()
    plt.show()

def plot_training_history(history):
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1,3,3)
    plt.plot(history.history['Precision'], label='Precision')
    plt.plot(history.history['Recall'], label='Recall')
    plt.title('Precision and Recall')
    plt.xlabel('Epoch')
    plt.ylabel("Score")
    plt.legend()

    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    main(use_real_data=True)