# Library Import
This section imports all the necessary libraries required for data processing, feature extraction, and cloud interaction.

In [3]:
import pandas as pd
import numpy as np
from obspy import read
from utils.feature_extraction_mars import *
from timeit import default_timer as timer
from multiprocessing import Pool, cpu_count
import os
# from google.cloud import storage # Uncomment this line if you want to use Cloud Storage to upload the dataframe

In [2]:
folder_path = './data/mars/test/data'
files = {os.path.splitext(f)[0] for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))}
print(files)

{'XB.ELYSE.02.BHV.2019-07-26HR12_evid0033', 'XB.ELYSE.02.BHV.2019-05-23HR02_evid0041', 'XB.ELYSE.02.BHV.2022-05-04HR23_evid0001', 'XB.ELYSE.02.BHV.2022-04-09HR22_evid0002', 'XB.ELYSE.02.BHV.2019-09-21HR03_evid0032', 'XB.ELYSE.02.BHV.2021-12-24HR22_evid0007', 'XB.ELYSE.02.BHV.2021-10-11HR23_evid0011', 'XB.ELYSE.02.BHV.2021-05-02HR01_evid0017', 'XB.ELYSE.02.BHV.2019-07-26HR12_evid0034'}


# Function for Processing Events (CSV and mseed)
- All data from the CSV files and the statuses from the mseed files are being extracted.
- **Additionally, we have a utility file for creating new event features using mathematical equations with the Scipy library; all these new features are utilized in training the neural network.**

## Each row represents an event from the lunar dataset. For each event:
- CSV data is loaded and processed. **We are adding the filename to the dataframe**.
- Data from mseed files (including network and station status) is extracted.
- The CSV and mseed data are combined and returned as a single DataFrame.

In [3]:
# Initialize an empty DataFrame to hold all concatenated data
df_combined = pd.DataFrame()

for index, filename in enumerate(files):
    try:
        print(index)
        start = timer()
        
        data_directory = f'./data/mars/test/data/{filename}'
        
        # Read CSV data
        df_data_csv = pd.read_csv(f"{data_directory}.csv", parse_dates=['time(%Y-%m-%dT%H:%M:%S.%f)'])
        df_data_csv['filename']=filename
    
        # Concatenate features from mseed
        mseed_file = f'{data_directory}.mseed'
        st = read(mseed_file)
        df_data_csv['network'] = st[0].stats['network']
        df_data_csv['station'] = st[0].stats['station']
        df_data_csv['location'] = st[0].stats['location']
        df_data_csv['channel'] = st[0].stats['channel']
        df_data_csv['sampling_rate'] = st[0].stats['sampling_rate']
        df_data_csv['delta'] = st[0].stats['delta']
        df_data_csv['npts'] = st[0].stats['npts']
        df_data_csv['calib'] = st[0].stats['calib']
        
        # Concatenando novas features
        sampling_rate = st[0].stats['sampling_rate']
        features = process_seismic_data(df_data_csv, sampling_rate)
        df_data_csv['mean_velocity']= features['mean_velocity']
        df_data_csv['std_velocity']= features['std_velocity']
        df_data_csv['max_velocity']= features['max_velocity']
        df_data_csv['min_velocity']= features['min_velocity']
        df_data_csv['total_energy']= features['total_energy']
        df_data_csv['rms_value']= features['rms_value']
        df_data_csv['peak_count']= features['peak_count']
        df_data_csv['valley_count']= features['valley_count']
        df_data_csv['fft_values']= features['fft_values']
        df_data_csv['fft_freqs']= features['fft_freqs']
        df_data_csv['autocorrelation']= features['autocorrelation']
        df_data_csv['acceleration']= features['acceleration']
        df_data_csv['jerk']= features['jerk']
        df_data_csv['cumulative_energy']= features['cumulative_energy']
    
        # Concatenate df_data_csv to the main DataFrame
        df_combined = pd.concat([df_combined, df_data_csv], ignore_index=True)
        end = timer()  # Stop the timer
        elapsed_time = end - start  # Calculate elapsed time
        print(f"Took {elapsed_time:.4f} seconds to process index {index}",end='\n')
    except Exception as e:
        print(e)
        continue
    

0
Took 23.3906 seconds to process index 0
1
Took 22.3285 seconds to process index 1
2
Took 22.0801 seconds to process index 2
3
Took 22.1471 seconds to process index 3
4
Took 21.7625 seconds to process index 4
5
Took 21.5100 seconds to process index 5
6
Took 21.3077 seconds to process index 6
7
Took 21.5255 seconds to process index 7
8
Took 22.0616 seconds to process index 8


In [4]:
df_combined.head()

Unnamed: 0,time(%Y-%m-%dT%H:%M:%S.%f),rel_time(sec),velocity(c/s),filename,network,station,location,channel,sampling_rate,delta,...,total_energy,rms_value,peak_count,valley_count,fft_values,fft_freqs,autocorrelation,acceleration,jerk,cumulative_energy
0,2019-07-26 12:00:00.010,0.0,0.0,XB.ELYSE.02.BHV.2019-07-26HR12_evid0033,XB,ELYSE,2,BHV,20.0,0.05,...,1925330000.0,163.525816,21827,21826,-7.653055e+03-0.000000e+ 00j,0.0,1925330000.0,0.001337,-0.015385,0.0
1,2019-07-26 12:00:00.060,0.05,6.7e-05,XB.ELYSE.02.BHV.2019-07-26HR12_evid0033,XB,ELYSE,2,BHV,20.0,0.05,...,1925330000.0,163.525816,21827,21826,5.032086e+05+4.519214e+ 04j,0.000278,1877502000.0,0.000568,-0.179246,1.11761e-10
2,2019-07-26 12:00:00.110,0.1,5.7e-05,XB.ELYSE.02.BHV.2019-07-26HR12_evid0033,XB,ELYSE,2,BHV,20.0,0.05,...,1925330000.0,163.525816,21827,21826,-1.293472e+06-3.854668e+ 05j,0.000556,1860496000.0,-0.016587,0.155938,3.041675e-10
3,2019-07-26 12:00:00.160,0.15,-0.001592,XB.ELYSE.02.BHV.2019-07-26HR12_evid0033,XB,ELYSE,2,BHV,20.0,0.05,...,1925330000.0,163.525816,21827,21826,-7.829146e+05+4.120607e+ 05j,0.000833,1875679000.0,0.016162,0.657648,6.373614e-08
4,2019-07-26 12:00:00.210,0.2,0.001673,XB.ELYSE.02.BHV.2019-07-26HR12_evid0033,XB,ELYSE,2,BHV,20.0,0.05,...,1925330000.0,163.525816,21827,21826,1.182444e+06+8.483111e+ 05j,0.001111,1848085000.0,0.049177,-0.520651,1.970583e-07


# Saving the File Locally

In [5]:
df_combined.to_csv("./test_mars.csv")