In [1]:
import matplotlib

# Memory leak with Matplotlib when in interactive mode and writing 1000+ plots
matplotlib.use('agg')
matplotlib.interactive(False)

import obspy, os, glob

CSV = 'Benz_catalog.csv' # 'OK_2014-2015-2016.csv' # 
FOLDER_NAME = 'benz_monthly'

# In seconds
DURATION = 20 

# Seconds before event
PRE_PADDING = 10

# Seconds after event
POST_PADDING = 10

DATA_PATH = os.path.join(os.getcwd(), 'data')
SPECTROGRAM_PATH = os.path.join(os.getcwd(), f'spectrograms/{FOLDER_NAME}')

stream_paths = glob.glob(os.path.join(DATA_PATH, 'train_mseed/*.mseed'))

In [2]:
# IMPORT SEISMIC TOOLBOX CODE
import sys
sys.path.insert(0, '/data/seismic_toolbox')

In [3]:
import seismic_code.spectrograms as spectro
import seismic_code.filter as filt
from seismic_code import helpers
from seismic_code.spectrograms import write_spectrogram

In [4]:
from pathlib import Path
from datetime import date
from collections import defaultdict
from obspy import read

class StreamPath:
    def __init__(self, path):
        self.raw_path = path
        self.path = Path(path)
        self.station = self.path.parts[-1].split('_')[0]
        date_str = self.path.parts[-1].split('_')[1].replace('.mseed', "")
        month, year = date_str.split('-')
        self.date_start = date(int(year), int(month), day=1)
        
    def load(self):
        """ Loads a stream from the path """
        return read(self.raw_path)
        
    def __str__(self):
        return self.raw_path
    
paths = list(map(StreamPath, stream_paths))
s1_date_paths = {stream.date_start: stream for stream in paths if stream.station == 'GSOK029'}

In [5]:
from obspy import Stream
from operator import add
from functools import reduce
from obspy import read

# Combine all streams into one
# stream = reduce(add, map(read, stream_paths))

In [6]:
import pandas as pd

quake_csv = pd.read_csv(os.path.join(DATA_PATH, CSV))

In [7]:
def change_column_name(df, column_index, new_name):
    columns = df.columns.values
    columns[column_index] = new_name
    df.columns = columns
    
change_column_name(quake_csv, 0, 'EventNum')

quake_csv.head()

Unnamed: 0,EventNum,Date,Time,Magnitude,Variance,origintime,utc_timestamp
0,0,02/15/2014,00:02:41,0.37,0.95,2014-02-15T00:02:41.000000Z,1392423000.0
1,1,02/15/2014,00:03:45,-0.44,0.86,2014-02-15T00:03:45.000000Z,1392423000.0
2,2,02/15/2014,00:08:07,-0.18,0.93,2014-02-15T00:08:07.000000Z,1392423000.0
3,3,02/15/2014,00:12:52,0.1,0.93,2014-02-15T00:12:52.000000Z,1392423000.0
4,4,02/15/2014,00:14:09,-0.47,0.89,2014-02-15T00:14:09.000000Z,1392423000.0


In [8]:
from itertools import islice
from obspy import UTCDateTime

def gen_row_date(df):
    for index, row in df.iterrows():
        year, month, day = row.origintime[:10].split('-')
        yield date(int(year), int(month), day=1), row
        
        
def gen_filter_waves(df, date_paths, pre_padding=PRE_PADDING, post_padding=POST_PADDING):
    curr_date = None
    stream = None

    for dat, row in gen_row_date(df):
        if dat != curr_date:
            try:
                stream_path = date_paths[dat]
            except KeyError:
                print(f"{dat} not in the stream_path")
                continue
                
            stream = stream_path.load()
            curr_date = dat
            
        # Start PRE_PADDING before event_time, end POST_PADDING after event_time
        yield filt.filter_waveform(stream, UTCDateTime(row.origintime), pre_padding, post_padding)
        
def gen_filter_waves_from_times(times, date_paths, pre_padding=PRE_PADDING, post_padding=POST_PADDING):
    curr_date = None
    stream = None
    i = 0
    for time in times:
        dat = date(int(time.year), int(time.month), day=1)
        if dat != curr_date:
            try:
                stream_path = date_paths[dat]
            except KeyError:
                print(f"{dat} not in the stream_path ({i})", end='\r')
                i += 1
                continue
                
            stream = stream_path.load()
            curr_date = dat
            
        # Start PRE_PADDING before event_time, end POST_PADDING after event_time
        yield filt.filter_waveform(stream, time, pre_padding, post_padding)


In [17]:
date_paths = s1_date_paths
amount_quakes = None   # Write all events if None
amount_noise = 10000

In [10]:
import warnings

### Create Paths for Each Month

In [11]:
quake_csv['Date'] = pd.to_datetime(quake_csv['Date'])

In [12]:
unique_months = quake_csv['Date'].dt.strftime("%m/%y").unique().tolist()

In [13]:
date_selector = lambda month_year: quake_csv['Date'].dt.strftime("%m/%y") == month_year

In [14]:
# The first month is not complete, so discard
del unique_months[0]

### Write Quakes

In [None]:
with warnings.catch_warnings():   
    warnings.simplefilter("ignore")
    
    for month_year in unique_months:
        path = os.path.join(os.path.join(SPECTROGRAM_PATH, month_year.replace('/', '-')), 'local')
        csv = quake_csv[date_selector(month_year)]
        quake_waves = gen_filter_waves(csv, date_paths)
        spectro.async_write_spectrograms(islice(quake_waves, amount_quakes), path, ignoreexceptions=True, write_streams=True)

Writing Files...
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_path
2014-03-01 not in the stream_pa

Process ForkPoolWorker-166:
Process ForkPoolWorker-205:
Process ForkPoolWorker-159:
Process ForkPoolWorker-121:
Process ForkPoolWorker-172:
Process ForkPoolWorker-218:
Process ForkPoolWorker-184:
Process ForkPoolWorker-163:
Process ForkPoolWorker-209:
Process ForkPoolWorker-157:
Process ForkPoolWorker-279:
Process ForkPoolWorker-164:
Process ForkPoolWorker-186:
Process ForkPoolWorker-217:
Process ForkPoolWorker-118:
Process ForkPoolWorker-221:
Process ForkPoolWorker-211:
Process ForkPoolWorker-103:
Process ForkPoolWorker-161:
Process ForkPoolWorker-207:
Process ForkPoolWorker-154:
Process ForkPoolWorker-170:
Process ForkPoolWorker-212:
Process ForkPoolWorker-156:
Process ForkPoolWorker-68:
Process ForkPoolWorker-84:
Process ForkPoolWorker-198:
Process ForkPoolWorker-199:
Process ForkPoolWorker-155:
Process ForkPoolWorker-150:
Process ForkPoolWorker-204:
Process ForkPoolWorker-189:
Process ForkPoolWorker-52:
Process ForkPoolWorker-108:
Process ForkPoolWorker-195:
Process ForkPoolWorker-

Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-17-4d4e221ab8fb>", line 8, in <module>
    spectro.async_write_spectrograms(islice(quake_waves, amount_quakes), path, ignoreexceptions=True, write_streams=True)
  File "/data/seismic_toolbox/seismic_code/spectrograms.py", line 133, in async_write_spectrograms
    pool.starmap(write_spectrogram_ignore_exceptions, work)  # Map the 'write_spectrogram' function to the work
  File "/opt/conda/lib/python3.7/multiprocessing/pool.py", line 276, in starmap
    return self._map_async(func, iterable, starmapstar, chunksize).get()
  File "/opt/conda/lib/python3.7/multiprocessing/pool.py", line 651, in get
    self.wait(timeout)
  File "/opt/conda/lib/python3.7/multiprocessing/pool.py", line 648, in wait
    self._event.wait(timeout)
  File "/opt/conda/lib/python3.7/threading.py", line

Process ForkPoolWorker-43:
Process ForkPoolWorker-179:


KeyboardInterrupt: 

Process ForkPoolWorker-90:
Process ForkPoolWorker-196:
Traceback (most recent call last):
Process ForkPoolWorker-107:
Traceback (most recent call last):
Process ForkPoolWorker-130:
Process ForkPoolWorker-147:
Process ForkPoolWorker-193:
Traceback (most recent call last):
Process ForkPoolWorker-5:
Process ForkPoolWorker-192:
Process ForkPoolWorker-91:
Process ForkPoolWorker-139:
Process ForkPoolWorker-116:
Process ForkPoolWorker-222:
Traceback (most recent call last):
Process ForkPoolWorker-74:
Traceback (most recent call last):
Process ForkPoolWorker-126:
Process ForkPoolWorker-219:
Traceback (most recent call last):
Process ForkPoolWorker-110:
Process ForkPoolWorker-185:
Process ForkPoolWorker-61:
Process ForkPoolWorker-44:
Process ForkPoolWorker-32:
Process ForkPoolWorker-57:
Process ForkPoolWorker-2:
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkPoolWorker-138:
Process ForkPoolWorker-21:
Process ForkPoolWorker-132:
Process ForkPoolWorker-1:
Proces



Process ForkPoolWorker-4:
Process ForkPoolWorker-140:
Traceback (most recent call last):
Process ForkPoolWorker-248:
Process ForkPoolWorker-37:




Process ForkPoolWorker-51:
  File "/data/seismic_toolbox/seismic_code/spectrograms.py", line 93, in write_spectrogram_ignore_exceptions
    return write_spectrogram(*args, **kwargs)
  File "/opt/conda/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.7/multiprocessing/pool.py", line 47, in starmapstar
    return list(itertools.starmap(args[0], args[1]))
Process ForkPoolWorker-280:
  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
Process ForkPoolWorker-50:
KeyboardInterrupt
  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/conda/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
Traceback (most rece















Process ForkPoolWorker-229:




<class 'SystemError'>


Process ForkPoolWorker-230:
Process ForkPoolWorker-240:
Process ForkPoolWorker-264:
Process ForkPoolWorker-262:
Process ForkPoolWorker-260:
Process ForkPoolWorker-226:
Process ForkPoolWorker-236:
Process ForkPoolWorker-249:
Traceback (most recent call last):
Process ForkPoolWorker-231:
Process ForkPoolWorker-246:
Process ForkPoolWorker-228:
Process ForkPoolWorker-233:
Process ForkPoolWorker-27:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()




  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
Process ForkPoolWorker-281:




Process ForkPoolWorker-284:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/multiprocessing/pool.py", line 110, in worker
    task = get()
  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/conda/lib/python3.7/multiprocessing/queues.py", line 351, in get
    with self._rlock:
  File "/opt/conda/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
KeyboardInterrupt
  File "/opt/conda/lib/python3.7/multiprocessing/pool.py", line 110, in worker
    task = get()
  File "/opt/cond

### Write Noise

In [28]:
def get_csv_times(df):
    times = df[['origintime']]
    for row in times.iterrows():
        time = row[1].origintime
        yield UTCDateTime(time)



In [30]:
# Noise times centers around the given time by default... so pad X seconds in each direction
noise_pad = DURATION / 2

In [32]:
with warnings.catch_warnings():   
    warnings.simplefilter("ignore")
    
    for month_year in unique_months:
        path = os.path.join(os.path.join(SPECTROGRAM_PATH, month_year.replace('/', '-')), 'noise')
        csv = quake_csv[date_selector(month_year)]
        
        # Choose noise times only from the month
        csv_month = quake_csv[date_selector(month_year)]
        times = sorted(get_csv_times(csv_month))
        noise_times = helpers.get_noise_times(times_to_exclude=times, 
                                              startafter=times[0], 
                                              endbefore=times[-1],
                                              amount=amount_noise, 
                                              duration=DURATION)
        noise_times = sorted(noise_times)  # important, to make lazy_loading the streams better for the next function        noise_waves = gen_filter_waves_from_times(noise_times, date_paths, noise_pad, noise_pad)
        noise_waves = gen_filter_waves_from_times(noise_times, date_paths, noise_pad, noise_pad)
        
        # Write
        spectro.async_write_spectrograms(noise_waves, path, ignoreexceptions=False, write_streams=True)

Writing Files...
2014-03-01 not in the stream_path (9999)
Wrote Files
Writing Files...
2014-04-01 not in the stream_path (9999)
Wrote Files
Writing Files...
2014-05-01 not in the stream_path (9999)
Wrote Files
Writing Files...
2014-06-01 not in the stream_path (9999)
Wrote Files
Writing Files...

Wrote Files
Writing Files...
2014-08-01 not in the stream_path (9999)
Wrote Files


### Clean Up

In [None]:
def clean_up(path):
    """ Removes empty dirs """
    folders = glob.glob(os.path.join(path, '*/*/*/'))
    for folder in folders:
        if not os.listdir(folder):
             os.rmdir(folder)

clean_up(SPECTROGRAM_PATH)
