<a href="https://colab.research.google.com/github/cedamusk/final-year/blob/main/Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install obspy tensorflow numpy pandas matplotlib

In [None]:
import obspy
from obspy.clients.fdsn import Client
from obspy import UTCDateTime
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
import tensorflow as tf
from concurrent.futures import ThreadPoolExecutor, as_completed
import matplotlib.pyplot as plt

SOURCES = [
    {"client": "IRIS", "network": "IU", "station": "ANMO", "location": "00", "channel": "BHZ"},
    {"client": "USGS", "network": "GS", "station": "TPNV", "location": "00", "channel": "BHZ"},
    {"client": "GEOFON", "network": "GE", "station": "RUE", "location": "", "channel": "BHZ"}
]

def download_and_preprocess_data(start_time, end_time, source):
    client = Client(source["client"])
    try:
        st = client.get_waveforms(source["network"], source["station"], source["location"], source["channel"], start_time, end_time)

        st.filter('bandpass', freqmin=1, freqmax=20)

        st.trim(starttime=start_time, endtime=end_time)

        scaler = StandardScaler()
        normalized_data = scaler.fit_transform(st[0].data.reshape(-1, 1)).flatten()

        return normalized_data
    except Exception as e:
        print(f"Error downloading data from {source['client']} for {source['station']}: {str(e)}")
        return None



def parallel_download(start_time, end_time, sources):
    with ThreadPoolExecutor(max_workers=len(sources)) as executor:
        futures = [executor.submit(download_and_preprocess_data, start_time, end_time, source) for source in sources]
        results = [future.result() for future in as_completed(futures) if future.result() is not None]

    max_len=max(len(result) for result in results)
    padded_results=[np.pad(result, (0, max_len-len(result)), 'constant')for result in results]
    return np.array(padded_results)



def sta_lta(data, nsta, nlta):
    sta = np.cumsum(data**2)
    sta = np.require(sta, dtype=float)
    sta[nsta:] = sta[nsta:] - sta[:-nsta]
    sta /= nsta
    lta = np.cumsum(data**2)
    lta = np.require(lta, dtype=float)
    lta[nlta:] = lta[nlta:] - lta[:-nlta]
    lta /= nlta
    return sta / lta

def create_rnn_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.SimpleRNN(64, input_shape=input_shape, return_sequences=True),
        tf.keras.layers.SimpleRNN(32),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

def integrated_detection(data, sta_lta_threshold, rnn_model, window_size):
    sta_lta_ratio = sta_lta(data, nsta=int(0.5*window_size), nlta=int(2*window_size))
    detections = []
    for i in range(0, len(data)-window_size, window_size):
        if max(sta_lta_ratio[i:i+window_size]) > sta_lta_threshold:
            window = data[i:i+window_size].reshape(1, window_size, 1)
            rnn_prediction = rnn_model.predict(window)[0][0]
            if rnn_prediction > 0.5:
                detections.append(i)
    return detections

def evaluate_performance(true_events, detected_events, total_windows):
    true_positives = len(set(true_events) & set(detected_events))
    false_positives = len(detected_events) - true_positives
    false_negatives = len(true_events) - true_positives

    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    return precision, recall, f1_score

def load_earthquake_catalog(start_time, end_time):
    num_days = (end_time - start_time) / (24 * 3600)
    num_events = int(num_days * 5)
    time_diff_seconds=(end_time-start_time)
    event_times = sorted(start_time + offset for offset in np.random.rand(num_events) * time_diff_seconds)
    return pd.DataFrame({'time': event_times, 'magnitude': np.random.uniform(2, 6, num_events)})

if __name__ == "__main__":
    start_time = UTCDateTime("2023-01-01T00:00:00")
    end_time = UTCDateTime("2023-01-02T00:00:00")  # Changed to one day for quicker execution

    print("Downloading and preprocessing data...")
    data = parallel_download(start_time, end_time, SOURCES)

    print("Loading earthquake catalog...")
    catalog = load_earthquake_catalog(start_time, end_time)

    window_size = 3600
    step_size = 1800
    X = []
    y = []

    print("Preparing data for RNN...")
    for station_data in data:
        for i in range(0, len(station_data) - window_size, step_size):
            window_start = start_time + i * (end_time - start_time) / len(station_data)
            window_end = window_start + window_size * (end_time - start_time) / len(station_data)
            window = station_data[i:i+window_size]
            X.append(window)

            y.append(1 if len(catalog[(catalog['time'] >= window_start) & (catalog['time'] < window_end)]) > 0 else 0)

    X = np.array(X).reshape(-1, window_size, 1)
    y = np.array(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    print("Training RNN model...")
    rnn_model = create_rnn_model((window_size, 1))
    rnn_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

    print("Evaluating RNN model...")
    y_pred = rnn_model.predict(X_test)
    rnn_precision, rnn_recall, rnn_f1, _ = precision_recall_fscore_support(y_test, y_pred.round(), average='binary')
    rnn_auc = roc_auc_score(y_test, y_pred)

    print("RNN Performance:")
    print(f"Precision: {rnn_precision:.2f}")
    print(f"Recall: {rnn_recall:.2f}")
    print(f"F1 Score: {rnn_f1:.2f}")
    print(f"AUC: {rnn_auc:.2f}")

    print("Evaluating Integrated STA/LTA and RNN...")
    sta_lta_threshold = 2.5
    detected_events = []
    for station_data in data:
        detected_events.extend(integrated_detection(station_data, sta_lta_threshold, rnn_model, window_size))

    true_events = [int((event_time - start_time) / (end_time - start_time) * len(data[0]))
                   for event_time in catalog['time']]

    integrated_precision, integrated_recall, integrated_f1 = evaluate_performance(true_events, detected_events, len(data[0]))

    print("\nIntegrated STA/LTA and RNN Performance:")
    print(f"Precision: {integrated_precision:.2f}")
    print(f"Recall: {integrated_recall:.2f}")
    print(f"F1 Score: {integrated_f1:.2f}")

    plt.figure(figsize=(15, 10))
    plt.subplot(2, 1, 1)
    plt.plot(data[0])
    plt.title("Seismic Data (First Station)")
    plt.xlabel('Time')
    plt.ylabel("Amplitude")

    plt.subplot(2, 1, 2)
    plt.plot(catalog['time'], catalog['magnitude'], 'ro', markersize=3)
    plt.title("Earthquake Catalog")
    plt.xlabel('Time')
    plt.ylabel("Magnitude")

    plt.tight_layout()
    plt.show()