# Feature extraction
In order to obtain more information about earthquakes, additional variables are calculated in order to develop a model that allows us to interpret these characteristics and determine whether a noise is an moonsquake/marsquake or not.

In [None]:
# We import the necessary libraries to process the data for each earthquake in the catalog (for both Mars and the Moon).
from NASA_Seismic_Detection_COD import process_file, extract_features
import os
import pandas as pd

The function *extract_features*, processes data from the .mseed files and creates a dataframe with important information such as:
- Statistical Measures
- Frequency-Based Features
- Time-Domain Features
- Energy-Based Features
- Waveform Shape Features


## Mars
We processs training and test data an transform into one table.

### Train Set

In [None]:
# Appliying process_file to get if the information corresponds a marsquake or not

# Create an empty list to store results
results = []

# Loop through all miniseed files in the directory
directory_path='data/mars/training/data/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print(filename)
        directory = directory_path + filename
        processed_data = process_file(directory)  # Call the function
        results.append(processed_data)
        i=i+1

# Create a DataFrame from the results list
df_1 = pd.DataFrame(results, columns=["filename", "seismic_detected"])
print("Finished processing all files.")
print("Total of files analyzed: "+str(i))

In [None]:
# Appliying extract_features to get new features about each mseed file

# Loop through all miniseed files in the directory
directory_path='data/mars/training/data/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print('Processing: ', filename)
        directory = directory_path + filename
        if i==0:
            df_2 = extract_features(directory) # Call the function
            i = i+1
        else:
            df_add = extract_features(directory) # Call the function
            df_2 = pd.concat([df_2,df_add])

In [None]:
# We merge the two dataframes into one
df_train = df_1.merge(df_2,on='filename',how='left')

# And finally we create adittional features based on the data and planet
df_train['test'] = 0
df_train['source'] = 'mars'

### Test Set

In [None]:
# Appliying process_file to get if the information corresponds a marsquake or not

# Create an empty list to store results
results = []

# Loop through all miniseed files in the directory
directory_path='data/mars/test/data/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print(filename)
        directory = directory_path + filename
        processed_data = process_file(directory)  # Call the function
        results.append(processed_data)
        i=i+1

# Create a DataFrame from the results list
df_1 = pd.DataFrame(results, columns=["filename", "seismic_detected"])
print("Finished processing all files.")
print("Total of files analyzed: "+str(i))

In [None]:
# Appliying extract_features to get new features about each mseed file

# Loop through all miniseed files in the directory
directory_path='data/mars/test/data/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print('Processing: ', filename)
        directory = directory_path + filename
        if i==0:
            df_2 = extract_features(directory) # Call the function
            i = i+1
        else:
            df_add = extract_features(directory) # Call the function
            df_2 = pd.concat([df_2,df_add])

In [None]:
# We merge the two dataframes into one
df_test = df_1.merge(df_2,on='filename',how='left')

# And finally we create adittional features based on the data and planet
df_test['test'] = 1
df_test['source'] = 'mars'

In [None]:
df = pd.concat([df_train,df_test])
df.to_csv('features_mars.csv',header=True,index=False)

## Moon
We processs training and test data an transform into one table.

### Train Set

In [None]:
# Appliying process_file to get if the information corresponds a moonquake or not

# Create an empty list to store results
results = []

# Loop through all miniseed files in the directory
directory_path='data/lunar/training/data/S12_GradeA/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print(filename)
        directory = directory_path + filename
        processed_data = process_file(directory)  # Call the function
        results.append(processed_data)
        i=i+1

# Create a DataFrame from the results list
df_1 = pd.DataFrame(results, columns=["filename", "seismic_detected"])
print("Finished processing all files.")
print("Total of files analyzed: "+str(i))

In [None]:
# Appliying extract_features to get new features about each mseed file

# Loop through all miniseed files in the directory
directory_path='data/lunar/training/data/S12_GradeA/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print('Processing: ', filename)
        directory = directory_path + filename
        if i==0:
            df_2 = extract_features(directory) # Call the function
            i = i+1
        else:
            df_add = extract_features(directory) # Call the function
            df_2 = pd.concat([df_2,df_add])

In [None]:
# We merge the two dataframes into one
df_train = df_1.merge(df_2,on='filename',how='left')

# And finally we create adittional features based on the data and planet
df_train['test'] = 0
df_train['source'] = 'moon'

### Test Set

In [None]:
# Appliying process_file to get if the information corresponds a moonquake or not

# Create an empty list to store results
results = []

# Loop through all miniseed files in the directory
directory_path='data/lunar/test/data/S15_GradeA/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print(filename)
        directory = directory_path + filename
        processed_data = process_file(directory)  # Call the function
        results.append(processed_data)
        i=i+1

# Create a DataFrame from the results list
df_1 = pd.DataFrame(results, columns=["filename", "seismic_detected"])
print("Finished processing all files.")
print("Total of files analyzed: "+str(i))


###################################################################################################

# Create an empty list to store results
results = []

# Loop through all miniseed files in the directory
directory_path='data/lunar/test/data/S15_GradeB/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print(filename)
        directory = directory_path + filename
        processed_data = process_file(directory)  # Call the function
        results.append(processed_data)
        i=i+1

# Create a DataFrame from the results list
df_2 = pd.DataFrame(results, columns=["filename", "seismic_detected"])
print("Finished processing all files.")
print("Total of files analyzed: "+str(i))


###################################################################################################

# Create an empty list to store results
results = []

# Loop through all miniseed files in the directory
directory_path='data/lunar/test/data/S16_GradeA/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print(filename)
        directory = directory_path + filename
        processed_data = process_file(directory)  # Call the function
        results.append(processed_data)
        i=i+1

# Create a DataFrame from the results list
df_3 = pd.DataFrame(results, columns=["filename", "seismic_detected"])
print("Finished processing all files.")
print("Total of files analyzed: "+str(i))


###################################################################################################

# Create an empty list to store results
results = []

# Loop through all miniseed files in the directory
directory_path='data/lunar/test/data/S16_GradeB/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print(filename)
        directory = directory_path + filename
        processed_data = process_file(directory)  # Call the function
        results.append(processed_data)
        i=i+1

# Create a DataFrame from the results list
df_4 = pd.DataFrame(results, columns=["filename", "seismic_detected"])
print("Finished processing all files.")
print("Total of files analyzed: "+str(i))

In [None]:
df_5 = pd.concat([df_1,df_2,df_3,df_4])

In [None]:
# Appliying extract_features to get new features about each mseed file

# Loop through all miniseed files in the directory
directory_path='data/lunar/test/data/S15_GradeA/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print('Processing: ', filename)
        directory = directory_path + filename
        if i==0:
            df_6 = extract_features(directory) # Call the function
            i = i+1
        else:
            df_add = extract_features(directory) # Call the function
            df_6 = pd.concat([df_6,df_add])


###################################################################################################


# Loop through all miniseed files in the directory
directory_path='data/lunar/test/data/S15_GradeB/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print('Processing: ', filename)
        directory = directory_path + filename
        if i==0:
            df_7 = extract_features(directory) # Call the function
            i = i+1
        else:
            df_add = extract_features(directory) # Call the function
            df_7 = pd.concat([df_7,df_add])


###################################################################################################


# Loop through all miniseed files in the directory
directory_path='data/lunar/test/data/S16_GradeA/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print('Processing: ', filename)
        directory = directory_path + filename
        if i==0:
            df_8 = extract_features(directory) # Call the function
            i = i+1
        else:
            df_add = extract_features(directory) # Call the function
            df_8 = pd.concat([df_8,df_add])


###################################################################################################


# Loop through all miniseed files in the directory
directory_path='data/lunar/test/data/S16_GradeB/'
i=0
for filename in os.listdir(directory_path):
    if filename.endswith(".mseed"):
        print('Processing: ', filename)
        directory = directory_path + filename
        if i==0:
            df_9 = extract_features(directory) # Call the function
            i = i+1
        else:
            df_add = extract_features(directory) # Call the function
            df_9 = pd.concat([df_9,df_add])

In [None]:
df_10 = pd.concat([df_6,df_7,df_8,df_9])

In [None]:
# We merge the two dataframes into one
df_test = df_5.merge(df_10,on='filename',how='left')

# And finally we create adittional features based on the data and planet
df_test['test'] = 1
df_test['source'] = 'moon'

In [None]:
df = pd.concat([df_train,df_test])
df.to_csv('features_moon.csv',header=True,index=False)