In [10]:
!wget http://dtn-max.ifa.hawaii.edu/SpaceApps/2024/spaceappschallenge_seismicdetection_data.zip

--2024-10-05 21:27:47--  http://dtn-max.ifa.hawaii.edu/SpaceApps/2024/spaceappschallenge_seismicdetection_data.zip
Resolving dtn-max.ifa.hawaii.edu (dtn-max.ifa.hawaii.edu)... 206.196.177.123
Connecting to dtn-max.ifa.hawaii.edu (dtn-max.ifa.hawaii.edu)|206.196.177.123|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 756718846 (722M) [application/zip]
Saving to: ‘spaceappschallenge_seismicdetection_data.zip’


2024-10-05 21:27:59 (62.2 MB/s) - ‘spaceappschallenge_seismicdetection_data.zip’ saved [756718846/756718846]



In [11]:
!unzip spaceappschallenge_seismicdetection_data.zip

Archive:  spaceappschallenge_seismicdetection_data.zip
   creating: data/
   creating: data/lunar/
   creating: data/lunar/catalog/
  inflating: data/lunar/catalog/apollo12_catalog_GradeA_final.csv  
   creating: data/lunar/train/
  inflating: data/lunar/train/xa.s12.00.mhz.1971-04-17HR00_evid00030.mseed  
  inflating: data/lunar/train/xa.s12.00.mhz.1973-08-21HR00_evid00127.mseed  
  inflating: data/lunar/train/xa.s12.00.mhz.1973-07-04HR00_evid00114.mseed  
  inflating: data/lunar/train/xa.s12.00.mhz.1975-05-04HR00_evid00192.mseed  
  inflating: data/lunar/train/xa.s12.00.mhz.1973-01-18HR00_evid00088.mseed  
  inflating: data/lunar/train/xa.s12.00.mhz.1974-06-25HR00_evid00149.mseed  
  inflating: data/lunar/train/xa.s12.00.mhz.1971-05-13HR00_evid00033.mseed  
  inflating: data/lunar/train/xa.s12.00.mhz.1974-04-26HR00_evid00144.mseed  
  inflating: data/lunar/train/xa.s12.00.mhz.1971-01-28HR00_evid00023.mseed  
  inflating: data/lunar/train/xa.s12.00.mhz.1973-03-01HR00_evid00093.mseed  

In [48]:
%cd /content
!ls

/content
data  sample_data  spaceappschallenge_seismicdetection_data.zip


In [None]:
#Install prerequisite packages
%pip install obspy

In [1]:
#Import modules
import numpy as np
import pandas as pd
import obspy
from obspy import read
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import os
import typing
from typing import *

In [50]:
#GLOBAL VARIABLES
DATASET_ROOT_PATH="data"

In [173]:
#Dataset
'''
class to assimilate data ,
Input: paths to dataset
Output: Multi-dim matrix,  np.ndarray or torch.tensor
'''
class SeismicDataset:

  def __init__(self,dataset_root:str)->None:
    self.dataset_root=dataset_root
    self.list_of_planets=["lunar","mars"]
    self.modes=["train","test"]
    self.dataset=None

  def loadDataset(self)->np.ndarray:
      '''
      dataset of shape (Number of samples, timestamps)
      '''
      signal_stack=[]
      for filename in self.dataset["filename"]:
        signal_stack.append(obspy.read(filename+".mseed")[0].data)
      return np.array(signal_stack)

  def generatePathToLogs(self)->None:
    '''
    returns a DataFrame with columns: filename,logname,time_rel(label)
    '''
    # Extract Catalog for time_rel aka label data
    database_roots={"catalog":
                    {"lunar":
                    {"train":"lunar/catalog"},
                    "mars":
                    {"train":"mars/catalog"}
                    }
                  }
    list_of_csvs_path=[]
    for planet_data in database_roots.values():
      for planet,data_cap in planet_data.items():
        for data_category,path in data_cap.items():
          path1=os.path.join(self.dataset_root,path)
          list_dirs=os.listdir(path1)
          for csvpath in list_dirs:
              path2=os.path.join(path1,csvpath)
              assert os.path.isfile(path2), "Path not found"
              list_of_csvs_path.append(path2)

    catalog=[]

    for csv_path in list_of_csvs_path:
      df=pd.read_csv(csv_path)
      df["logname"]=df["filename"].apply(lambda name: name.strip(".csv").strip(".mseed"))
      df = df.sort_values(by='logname')
      df=df.reset_index(drop=True)
      df.name="_".join(csv_path.split("/")[1:3])
      catalog.append(df)
    lunar_catalog=catalog[0]
    mars_catalog=pd.merge(catalog[1],catalog[2])
    #Extract signal data

    database_roots={"data":
                  {"lunar":
                  {"train":"lunar/train",
                  "test":"lunar/test"},
                  "mars":
                  {"train":"mars/train",
                  "test":"mars/test"}
                  }
                }
    list_of_csvs=[]

    for planet_data in database_roots.values():
        for planet,data_cap in planet_data.items():
            for data_category,path in data_cap.items():
                df=pd.DataFrame(columns=["filename",])

                path1=os.path.join(self.dataset_root,path)
                scandir=os.scandir(path1)
                respaths=[]
                lognames=[]
                for sample in scandir:

                  res_path=os.path.join(path1,sample.name.strip(".csv").strip(".mseed"))
                  log_name=sample.name.strip(".csv").strip(".mseed")
                  respaths.append(res_path)
                  lognames.append(log_name)

                df=pd.concat([df,pd.DataFrame({"filename":res_path,"logname":lognames})])
                df.attrs["metadata"]={"name":planet,"type":data_category}
                df= df.groupby(level=0).first()
                df = df.sort_values(by='logname')
                df = df.reset_index(drop=True)
                list_of_csvs.append(df)
                del df

    lunar_train_signals_table=list_of_csvs[0]
    lunar_test_signals_table=list_of_csvs[1]
    mars_train_signals_table=list_of_csvs[2]
    mars_test_signals_table=list_of_csvs[3]

    # Perform an inner join with logname to get a dataframe
    lunar_training_data=pd.merge(lunar_train_signals_table,lunar_catalog, on='logname')
    lunar_training_data=lunar_training_data.drop(columns=["filename_y"])
    lunar_training_data=lunar_training_data.rename(columns={"filename_x":"filename"})

    self.dataset=lunar_training_data


'''
Test block
'''
dataTester=SeismicDataset(DATASET_ROOT_PATH)
dataTester.generatePathToLogs()
train_data=dataTester.loadDataset()

In [None]:
#PreProcessing
'''
Input:Signal
Output: Transformed / Augmented data
'''

In [None]:
# Model Definition
'''
Define model architectures
Input: tensor signal
Output: binary signal
'''

In [None]:
# Model Training
'''
Define optimizers, hyperparams,
Start/end Training, store model checkpoint

'''

In [None]:
#Evaluation
'''
Input: Model, test data
Output: pd.Dataframe or similar table with evaluation results
'''

In [None]:
#Demo app
'''
Input: Model, sample input signal
'''