In [None]:
import numpy as np
import pandas as pd
from obspy import read
import pywt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import os
import matplotlib.pyplot as plt

In [None]:
# Define the directory for the training and test data
train_data_directory = './data/mars/training/data/'
test_data_directory = './data/mars/test/data/'

# Load the training catalog
cat_directory = './data/mars/training/catalogs/'
cat_file = cat_directory + 'Mars_InSight_training_catalog_final.csv'
cat = pd.read_csv(cat_file)

In [None]:
# Wavelet Transform Function
def wavelet_transform(signal, wavelet='db4', level=6):  
    coeffs = pywt.wavedec(signal, wavelet, level=level)
    features = []
    for coeff in coeffs:
        features.append(np.mean(coeff))
        features.append(np.std(coeff))
        features.append(np.max(coeff))
        features.append(np.min(coeff))
    return features

In [None]:

# Prepare the training data
X_all = []
arrival_times = []

# Extract features and target from the training data
for index, row in cat.iterrows():
    test_filename = row['filename'].replace('.csv', '')  # Remove .csv extension if it exists
    mseed_file = f'{train_data_directory}{test_filename}.mseed'  # Append .mseed instead of .csv
    
    # Check if the .mseed file exists
    if os.path.exists(mseed_file):
        stream = read(mseed_file)
        
        # Check if the stream contains data
        if len(stream) == 0:
            print(f"Warning: No data found in {mseed_file}")
            continue
        
        # Get the signal data
        signal_values = stream[0].data
        
        # Check if the signal is not empty
        if len(signal_values) == 0:
            print(f"Warning: No signal data found in {mseed_file}")
            continue
        
        # Extract wavelet features
        wavelet_features = wavelet_transform(signal_values)
        
        # Append only if we got features
        if wavelet_features:
            X_all.append(wavelet_features)
        else:
            print(f"Warning: No features extracted from {mseed_file}")
            continue
    else:
        print(f"Warning: No data found for file {mseed_file}")
        continue
    
    # Append the corresponding arrival time
    arrival_times.append(row['time_rel(sec)'])

# Check if features were extracted
if len(X_all) == 0:
    print("Error: No features were extracted.")
else:
    # Convert lists to numpy arrays
    X_all = np.array(X_all)
    arrival_times = np.array(arrival_times)

    # Handle NaNs and Infs in the data (if needed)
    X_all = np.nan_to_num(X_all, nan=0.0, posinf=0.0, neginf=0.0)
    arrival_times = np.nan_to_num(arrival_times, nan=0.0, posinf=0.0, neginf=0.0)

    # Ensure X_all is 2D
    if len(X_all.shape) == 1:
        X_all = X_all.reshape(-1, 1)

    # Scale the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_all)

    # Since the dataset is small, skip validation and train the model on the full training set
    X_train = X_scaled
    y_train = arrival_times

    # Example model
    model = Sequential()
    model.add(Dense(128, input_dim=X_train.shape[1]))
    model.add(LeakyReLU(alpha=0.1))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(64))
    model.add(LeakyReLU(alpha=0.1))
    model.add(Dropout(0.3))
    model.add(Dense(1))

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

  

    # Train the model (without validation split)
    history = model.fit(X_train, y_train, epochs=100,   batch_size=64)

    # Test the model on new test data located in `./data/mars/test/data/`
    test_filenames = [f for f in os.listdir(test_data_directory) if f.endswith('.mseed')]
    test_X_all = []
    test_filenames_clean = []

    for test_file in test_filenames:
        mseed_file = os.path.join(test_data_directory, test_file)
        
        # Load the test mseed file
        if os.path.exists(mseed_file):
            stream = read(mseed_file)
            
            # Check if the stream contains data
            if len(stream) == 0:
                print(f"Warning: No data found in {mseed_file}")
                continue
            
            # Get the signal data
            signal_values = stream[0].data
            
            # Extract wavelet features from the test data
            wavelet_features = wavelet_transform(signal_values)
            
            # Append only if we got features
            if wavelet_features:
                test_X_all.append(wavelet_features)
                test_filenames_clean.append(test_file)
            else:
                print(f"Warning: No features extracted from {mseed_file}")
                continue
        else:
            print(f"Warning: No data found for file {test_file}")
            continue

    # If we have test data, proceed with predictions
    if len(test_X_all) > 0:
        test_X_all = np.array(test_X_all)
        test_X_all = np.nan_to_num(test_X_all, nan=0.0, posinf=0.0, neginf=0.0)

        # Ensure test data is scaled the same way as training data
        test_X_scaled = scaler.transform(test_X_all)

        # Generate predictions
        test_y_pred = model.predict(test_X_scaled)

        # Create a DataFrame for the catalog
        test_catalog_df = pd.DataFrame({
            'filename': test_filenames_clean,
            'predicted_arrival_time': test_y_pred.flatten()
        })

        # Save the catalog as CSV in the same directory as the notebook
        output_file = 'predicted_arrival_times_catalog.csv'
        test_catalog_df.to_csv(output_file, index=False)
        print(f"Catalog file saved: {output_file}")
    else:
        print("No valid test data available for predictions.")