In [19]:
import obspy
from obspy import read
import numpy as np
import pandas as pd
import os
from scipy.signal import stft

In [6]:
#Make folder for AI ready data
! mkdir data/ai_ready
filepath = os.getcwd() + '/data/ai_ready/'
clean_filepath = os.getcwd() + '/data/clean/'

mkdir: data/ai_ready: File exists


In [7]:
#read in cleaned mseed files, save as daily npy files and one giant 100 day file
hundred_data = np.array([])

for i in range(100):
    i += 1

    #read mseed
    st = read(clean_filepath+str(i)+'_cleaned.mseed')

    #save data to day long npys
    data = st[0].data
    np.save(filepath+str(i)+'_ready.npy', data)

    #append to overall array
    hundred_data = np.append(hundred_data, data)

#save hundred day data npy
np.save(filepath+'100_days.npy', hundred_data)

----------------------------------------------------------
This section of the notebook computes stft and statistics on the data seismic data

In [57]:
#function for calculating desired stats
def stats(x): #x being an array
    return np.array([np.nanmean(x), np.nanmax(x), np.nanmin(x), np.nanmedian(x), np.ptp(x), np.nanstd(x)])

In [60]:
#run stft following Zali et al. -- detailed in Figure 4
#want 96 frequency bins and 128 time bins, 691200 data points
fs = 8 #sampling rate Hz
nperseg = 256 #samples per window
noverlap = 32 #window overlap

#initialize variable to hold spectrograms
spectras = []

#initialize variable for waveform and spectral statistics
time = ['mean', 'max', 'min', 'median','range', 'std']
spectral = ['spec_' + t for t in time]
df = pd.DataFrame(columns=time+spectral)

for i in range(100):
    i += 1

    #read mseed
    st = read(clean_filepath+str(i)+'_cleaned.mseed')
    data = st[0].data

    #go hour by hour computing stfts and saving to array
    for hour in np.split(data, 24):
        out = stft(hour, fs=fs, nperseg=nperseg, noverlap=noverlap, return_onesided=True)

        #get magnitude of complex spectra
        spectra = np.abs(out[2])

        #flatten spectra to feed into clustering
        spectra = spectra.flatten()

        #add row to dataframe
        df.loc[len(df)] = np.concatenate([stats(data), stats(spectra)])

        #normalize spectra
        spectra /= np.max(spectra)

        if len(spectras) < 1:
            spectras = spectra

        else: #catch for hours with missing data
            try:
                spectras = np.vstack([spectras, spectra])
            except:
                print('Unequal data lengths')

#save hourly spectrogram file
np.save(filepath+'spectra.npy', spectras)

#save statistics file
df.to_csv(path_or_buf=filepath+'statistics.csv', index=False)

In [14]:
#inspect the dimensions of spectras array. We expect the first dimension to be 24 * n_days
spectras.shape

(2400, 16770)

In [61]:
#inspect statistics df
df.tail()

Unnamed: 0,mean,max,min,median,range,std,spec_mean,spec_max,spec_min,spec_median,spec_range,spec_std
2395,-0.000609,1.0,-0.643724,-0.000736,1.643724,0.041933,0.001138,0.074044,1.365549e-06,0.000318,0.074043,0.002634
2396,-0.000609,1.0,-0.643724,-0.000736,1.643724,0.041933,0.001117,0.041984,3.754943e-07,0.000329,0.041984,0.002499
2397,-0.000609,1.0,-0.643724,-0.000736,1.643724,0.041933,0.001135,0.141904,2.73548e-06,0.00032,0.141902,0.002977
2398,-0.000609,1.0,-0.643724,-0.000736,1.643724,0.041933,0.001167,0.06822,2.887628e-07,0.000327,0.068219,0.003127
2399,-0.000609,1.0,-0.643724,-0.000736,1.643724,0.041933,0.001215,0.079719,1.247811e-06,0.000348,0.079718,0.003212
