diff --git a/.gitignore b/.gitignore index a0c58d23f..cb53a92f1 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,6 @@ station.dat *L.S?????? *R.S?????? .idea +.coverage.* +eqcorrscan/core/subspace_statistic.c +eqcorrscan/core/sliding_normxcorr.c diff --git a/.travis.yml b/.travis.yml index d57bf8284..4700a455b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,16 +6,19 @@ matrix: env: PYTHON_VERSION=2.7 - os: linux env: PYTHON_VERSION=3.5 + - os: linux + env: PYTHON_VERSION=3.4 + # - os: linux + # env: PYTHON_VERSION=3.3 + # There isn't and openCV3 for python 3.3 on Conda -# Conda install on OSX is an issue - gives 0.238 xcorr when should give 1... -# - os: osx -# env: PYTHON_VERSION=2.7 -# - os: osx -# env: PYTHON_VERSION=3.5 + - os: osx + env: PYTHON_VERSION=2.7 + - os: osx + env: PYTHON_VERSION=3.5 sudo: false -# Test with multiple obspy versions, allow some backwards compatability env: global: - OBSPY_VERSION=1.0.1 @@ -26,7 +29,7 @@ install: export py=$PYTHON_VERSION; else export OS="Linux"; - export py=$TRAVIS_PYTHON_VERSION; + export py=$PYTHON_VERSION; fi - if [[ "$py" == "2.7_with_system_site_packages" ]]; then export py="2.7"; @@ -41,6 +44,7 @@ install: - hash -r - conda config --set always_yes yes --set changeps1 no - conda config --add channels menpo + - conda config --add channels obspy - conda update -q conda # Useful for debugging any issues with conda - conda info -a @@ -52,36 +56,47 @@ install: env_file="misc/py2_test_env.lis" PYFLAKES="pyflakes=0.9.0" fi + - echo $PYTHON_VERSION - | if [[ "$OS" == "MacOSX" ]]; then - conda create -q -n test-environment python=$PYTHON_VERSION numpy scipy matplotlib basemap pyproj flake8 future lxml decorator sqlalchemy mock nose gdal docopt coverage requests - else + conda create -q -n test-environment python=$PYTHON_VERSION numpy scipy matplotlib obspy flake8 mock coverage opencv3 + elif [[ "$py" == "3.5" ]]; then conda create -q -n test-environment --file $env_file + elif [[ "$py" == "2.7" ]]; then + conda create -q -n test-environment --file $env_file + else + conda create -q -n test-environment python=$PYTHON_VERSION numpy scipy matplotlib obspy flake8 mock coverage opencv3 fi - source activate test-environment - conda install $PYFLAKES - conda install pyproj - - | - if [[ "${py:0:1}" == "3" ]] && [[ "$OS" == "MacOSX" ]]; then - conda install -c menpo opencv3=3.1.0 - elif [[ "$OS" == "MacOSX" ]]; then - conda install opencv - fi + - conda install h5py + # - | + # if [[ "${py:0:1}" == "3" ]] && [[ "$OS" == "MacOSX" ]]; then + # conda install opencv3 + # conda install obspy + # fi + # elif [[ "$OS" == "MacOSX" ]]; then + # conda install -c menpo opencv3=3.1.0 + # fi - pip install coveralls - pip install geographiclib - pip install https://github.com/megies/PyImgur/archive/py3.zip - pip install pep8-naming - pip install pytest - pip install pytest-cov - - pip install obspy==$OBSPY_VERSION - # - pip install obspy==$OBSPY_VERSION + - pip install Cython + # - pip install obspy==$OBSPY_VERSION - pip freeze - conda list # done installing dependencies - git version - - pip install . + - python setup.py install + script: - python setup.py test + # Need to ignore the files in current directory to ensure we find the + # installed version of EQcorrscan with compiled code. + - python setup.py test after_success: # Check how much code is actually tested and send this report to coveralls diff --git a/CHANGES.md b/CHANGES.md index 9a9f6a867..5072d0728 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,17 @@ +## 0.1.3 +* Now testing on OSX (python 2.7 and 3.5) - also added linux python 3.4; +* Add lag-calculation and tests for it; +* Change how lag-calc does the trace splitting to reduce memory usage; +* Added pick-filtering utility to clean up tutorials; +* Change template generation function names for clarity (wrappers for +depreciated names); +* Add more useful error messages when picks are not associated with +waveforms; +* Add example plots for more plotting functions; +* Add subspace detector including docs and tutorial. +* Add *delayed* option to all template_gen functions, set to True by +default which retains old behaviour. + ## 0.1.2 * Add handling for empty location information in sfiles. * Added project setup script which creates a useful directory structure and copies diff --git a/README.md b/README.md index 43369e0f0..b9bdef8d7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # EQcorrscan -## A python package to conduct matched-filter earthquake detections. +## A python package for the detection and anlysis of repeating and near-repeating earthquakes. [![Join the chat at https://gitter.im/calum-chamberlain/EQcorrscan](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/calum-chamberlain/EQcorrscan?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![TravisCIStatus](https://travis-ci.org/calum-chamberlain/EQcorrscan.svg?branch=master)](https://travis-ci.org/calum-chamberlain/EQcorrscan) @@ -9,10 +9,10 @@ [![DocumentationStatus](http://readthedocs.org/projects/eqcorrscan/badge/?version=latest)](http://eqcorrscan.readthedocs.org/en/latest/?badge=latest) # Installation -Installation has been tested on both OSX and Linux (Ubuntu), and now -Windows systems. We support Python versions 2.7 and 3.5. The codes likely -work on Py 3.4 too, but we currently don't test this and recommend users to -work in Py 3.5. +Installation has been tested on both OSX and Linux (Ubuntu), and +Windows systems. We support Python versions 2.7, 3.4 and 3.5. +Note that, although we support Windows, EQcorrscan is optimized for +linux style distributions. Instructions for installing EQcorrscan and the required dependency, openCV are linked from the [docs](http://eqcorrscan.readthedocs.io/en/latest/intro.html#installation) @@ -56,13 +56,13 @@ the gh-pages branch. # Functionality -This package contains routines to enable the user to conduct match-filter earthquake +This package contains routines to enable the user to conduct matched-filter earthquake detections using [obspy](https://github.com/obspy/obspy/wiki) bindings when reading and writing seismic data, and the correlation routine in [openCV](http://opencv.org/). -The OpendCV package is not installed by this software, due to a need to build from +The OpenCV package is not installed by this software, due to a need to build from source. The user should follow the instructions above for OpenCV install. -This package was written to implement the Matlab routines -used by Chamberlain et al. (2014) for the detection of low-frequency earthquakes. + +We have also added subspace detection and correlation derived pick adjustment. Also within this package are: * Clustering routines for seismic data; @@ -74,10 +74,9 @@ Also within this package are: * Brightness based template creation based on the work of Frank et al. (2014); * Singular Value Decomposition derived magnitude calculations based on Rubinstein & Ellsworth (2010). -We are currently hovering around 9,000 lines of code (including doc-strings) - it is probably worth -having a look at the docs to check what functions we have. We plan to write a series of tutorials to be -included on the EQcorrscan API to highlight key functions, currently our tutorials only show -how to do the core matched-filter detection. +We are currently hovering around 15,000 lines of code (including doc-strings) - it is probably worth +having a look at the docs to check what functions we have. We are writing a series of tutorials +included on the EQcorrscan API to highlight key functions. # Licence @@ -100,18 +99,6 @@ Please document your functions following the other documentation within the functions, these doc-scripts will then be built into the main documentation using Sphinx. -We are trying to implement a better branching model, following that found [here](http://nvie.com/posts/a-successful-git-branching-model/). -To this end, please fork the development branch if you want to develop -things, and flag issues in the master for us to bugfix. -If you have a feature you want to develop please create a new branch -from the development branch and work on it there, we can then merge -it back in to the development branch when it is stable enough. - -This branching model (git-flow) is pretty well established, and I would recommend -you to install [git-flow](https://github.com/nvie/gitflow/wiki/Installation) and -read their [documentation](https://github.com/nvie/gitflow). It seems pretty intuitive and -will keep us all branching in the same way. - # References * CJ Chamberlain, DR Shelly, J Townend, TA Stern (2014) [Low‐frequency earthquakes reveal punctuated slow slip on the deep extent of the Alpine Fault, New Zealand](http://onlinelibrary.wiley.com/doi/10.1002/2014GC005436/full), __G-cubed__,doi:10.1002/2014GC005436 * Thurber, C. H., Zeng, X., Thomas, A. M., & Audet, P. (2014). [Phase‐Weighted Stacking Applied to Low‐Frequency Earthquakes](http://www.bssaonline.org/content/early/2014/08/12/0120140077.abstract), __BSSA__, doi:10.1785/0120140077. diff --git a/appveyor.yml b/appveyor.yml index 9d1a66469..52558d8ad 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,4 +1,5 @@ + # AppVeyor.com is a Continuous Integration service to build and run tests under Windows environment: @@ -29,14 +30,16 @@ install: - "%CMD_IN_ENV% python --version" # Install the build and runtime dependencies of the project. - "conda update -q --yes conda" + - "conda config --add channels menpo" + - "conda config --add channels obspy" # Create a conda environment using the astropy bonus packages - "conda create -q --yes -n test python=%PYTHON_VERSION%" - "activate test" # Install default dependencies - - "conda install -q --yes pip numpy scipy matplotlib=1.3.1 lxml sqlalchemy flake8 mock nose gdal decorator requests basemap pytest pyflakes=0.9.0" + - "conda install -q --yes pip numpy scipy opencv3 matplotlib obspy mock flake8 pytest pyflakes=0.9.0 cython h5py" # additional dependecies - - "choco install opencv -version 2.4.9.20140518" - - "powershell copy-item C:\\OpenCV249\\opencv\\build\\python\\2.7\\x64\\cv2.pyd C:\\conda\\envs\\test\\lib\\site-packages\\." + # - "choco install opencv" + # - "powershell copy-item C:\\OpenCV\\opencv\\build\\python\\2.7\\x64\\cv2.pyd C:\\conda\\envs\\test\\lib\\site-packages\\." - "pip install pyimgur" - "pip install -U future" - "pip install pytest-cov" @@ -49,4 +52,4 @@ build: false test_script: - "%CMD_IN_ENV% python setup.py develop" - - "%CMD_IN_ENV% py.test" + - "%CMD_IN_ENV% py.test" \ No newline at end of file diff --git a/eqcorrscan/__init__.py b/eqcorrscan/__init__.py index b876ac8c5..b995b6a83 100755 --- a/eqcorrscan/__init__.py +++ b/eqcorrscan/__init__.py @@ -34,9 +34,11 @@ import importlib import warnings -__all__ = ['core', 'utils', 'par'] +__all__ = ['core', 'utils'] + + +__version__ = '0.1.3' -__version__ = '0.1.2' # Cope with changes to name-space to remove most of the camel-case _import_map = { diff --git a/eqcorrscan/core/__init__.py b/eqcorrscan/core/__init__.py index f3c1528fc..be006d6e5 100644 --- a/eqcorrscan/core/__init__.py +++ b/eqcorrscan/core/__init__.py @@ -28,7 +28,8 @@ python-pylab - used for plotting """ -__all__ = ['template_gen', 'match_filter', 'bright_lights'] +__all__ = ['template_gen', 'match_filter', 'bright_lights', 'lag_calc', + 'subspace', 'subspace_statistic'] if __name__ == '__main__': import doctest diff --git a/eqcorrscan/core/lag_calc.py b/eqcorrscan/core/lag_calc.py new file mode 100644 index 000000000..b680008f4 --- /dev/null +++ b/eqcorrscan/core/lag_calc.py @@ -0,0 +1,364 @@ +""" +Functions to generate pick-corrections for events detected by correlation. + + +""" +import numpy as np +from eqcorrscan.core.match_filter import normxcorr2 +import scipy +import warnings + + +def _xcorr_interp(ccc, dt): + """ + Intrpolate around the maximum correlation value for sub-sample precision. + + :param ccc: Cross-correlation array + :type ccc: numpy.ndarray + :param dt: sample interval + :type dt: float + + :return: Position of interpolated maximum in seconds from start of ccc + :rtype: float + """ + if ccc.shape[0] == 1: + cc = ccc[0] + else: + cc = ccc + # Code borrowed from obspy.signal.cross_correlation.xcorr_pick_correction + cc_curvature = np.concatenate((np.zeros(1), np.diff(cc, 2), np.zeros(1))) + cc_t = np.arange(0, len(cc) * dt, dt) + peak_index = cc.argmax() + first_sample = peak_index + # XXX this could be improved.. + while first_sample > 0 and cc_curvature[first_sample - 1] <= 0: + first_sample -= 1 + last_sample = peak_index + while last_sample < len(cc) - 1 and cc_curvature[last_sample + 1] <= 0: + last_sample += 1 + num_samples = last_sample - first_sample + 1 + if num_samples < 3: + msg = "Less than 3 samples selected for fit to cross " + \ + "correlation: %s" % num_samples + raise IndexError(msg) + if num_samples < 5: + msg = "Less than 5 samples selected for fit to cross " + \ + "correlation: %s" % num_samples + warnings.warn(msg) + coeffs, residual = scipy.polyfit( + cc_t[first_sample:last_sample + 1], + cc[first_sample:last_sample + 1], deg=2, full=True)[:2] + # check results of fit + if coeffs[0] >= 0: + msg = "Fitted parabola opens upwards!" + warnings.warn(msg) + if residual > 0.1: + msg = "Residual in quadratic fit to cross correlation maximum " + \ + "larger than 0.1: %s" % residual + warnings.warn(msg) + # X coordinate of vertex of parabola gives time shift to correct + # differential pick time. Y coordinate gives maximum correlation + # coefficient. + shift = -coeffs[1] / 2.0 / coeffs[0] + coeff = (4 * coeffs[0] * coeffs[2] - coeffs[1] ** 2) / (4 * coeffs[0]) + return shift + +def _channel_loop(detection, template, min_cc, interpolate=False, i=0, + debug=0): + """ + Inner loop for correlating and assigning picks. + + Utility function to take a stream of data for the detected event and write + maximum correlation to absolute time as picks in an obspy.core.event.Event + object. + Only outputs picks for picks above min_cc. + + :type detection: obspy.core.stream.Stream + :param detection: Stream of data for the slave event detected using \ + template. + :type template: obspy.core.stream.Stream + :param template: Stream of data as the template for the detection. + :type interpolate: bool + :param interpolate: Interpolate the correlation function to achieve \ + sub-sample precision. + :type i: int + :param i: Used to track which process has occurred when running in \ + parallel. + + :returns: Event object containing net, sta, chan information + :rtype: obspy.core.event.Event + """ + from obspy.core.event import Event, Pick, WaveformStreamID + from obspy.core.event import ResourceIdentifier + event = Event() + s_stachans = {} + used_s_sta = [] + for tr in template: + temp_net = tr.stats.network + temp_sta = tr.stats.station + temp_chan = tr.stats.channel + image = detection.select(station=temp_sta, + channel=temp_chan) + if image: + ccc = normxcorr2(tr.data, image[0].data) + # Convert the maximum cross-correlation time to an actual time + if debug > 3: + print('********DEBUG: Maximum cross-corr=%s' % np.amax(ccc)) + if np.amax(ccc) > min_cc: + if interpolate: + try: + interp_max = _xcorr_interp(ccc=ccc, + dt=image[0].stats.delta) + except IndexError: + print('Could not interpolate ccc, not smooth') + interp_max = np.argmax(ccc) * image[0].stats.delta + picktime = image[0].stats.starttime + interp_max + else: + picktime = image[0].stats.starttime + (np.argmax(ccc) * + image[0].stats.delta) + else: + continue + # Perhaps weight each pick by the cc val or cc val^2? + # weight = np.amax(ccc) ** 2 + if temp_chan[-1:] == 'Z': + phase = 'P' + # Only take the S-pick with the best correlation + elif temp_chan[-1:] in ['E', 'N']: + phase = 'S' + if temp_sta not in s_stachans and np.amax(ccc) > min_cc: + s_stachans[temp_sta] = ((temp_chan, np.amax(ccc), + picktime)) + elif temp_sta in s_stachans and np.amax(ccc) > min_cc: + if np.amax(ccc) > s_stachans[temp_sta][1]: + picktime = picktime + else: + picktime = s_stachans[temp_sta][2] + temp_chan = s_stachans[temp_sta][0] + elif np.amax(ccc) < min_cc and temp_sta not in used_s_sta: + used_s_sta.append(temp_sta) + else: + continue + else: + phase = None + _waveform_id = WaveformStreamID(network_code=temp_net, + station_code=temp_sta, + channel_code=temp_chan) + event.picks.append(Pick(waveform_id=_waveform_id, + time=picktime, + method_id=ResourceIdentifier('EQcorrscan'), + phase_hint=phase)) + return (i, event) + + +def _day_loop(detection_streams, template, min_cc, interpolate=False, + cores=False, debug=0): + """ + Function to loop through multiple detections for one template. + + Designed to run for the same day of data for I/O simplicity, but as you + are passing stream objects it could run for all the detections ever, as + long as you have the RAM! + + :type detection_streams: list + :param detection_streams: List of all the detections for this template that + you want to compute the optimum pick for. Individual things in list + should be of obspy.core.stream.Stream type. + :type template: obspy.core.stream.Stream + :param template: The original template used to detect the detections passed + :type min_cc: float + :param min_cc: Minimum cross-correlation value to be allowed for a pick. + :type interpolate: bool + :param interpolate: Interpolate the correlation function to achieve \ + sub-sample precision. + + :returns: Catalog object containing Event objects for each detection + created by this template. + :rtype: obspy.core.event.Catalog + """ + from multiprocessing import Pool, cpu_count + # Used to run detections in parallel + from obspy.core.event import Catalog + if not cores: + num_cores = cpu_count() + else: + num_cores = cores + if num_cores > len(detection_streams): + num_cores = len(detection_streams) + pool = Pool(processes=num_cores) + # Parallelize generation of events for each detection: + # results is a list of (i, event class) + results = [pool.apply_async(_channel_loop, args=(detection_streams[i], + template, min_cc, + interpolate, i, debug)) + for i in range(len(detection_streams))] + pool.close() + events_list = [p.get() for p in results] + pool.join() + events_list.sort(key=lambda tup: tup[0]) # Sort based on i. + temp_catalog = Catalog() + temp_catalog.events = [event_tup[1] for event_tup in events_list] + return temp_catalog + + +def _prepare_data(detect_data, detections, zipped_templates, delays, + shift_len): + """Prepare data for lag_calc - reduce memory here. + + :type detect_data: obspy.core.Stream + :type detections: list + :type zipped_templates: zip + :type delays: list + :type shift_len: float + + :returns: List of detect_streams to be worked on + :rtype: list + """ + from obspy import Stream + detect_streams = [] + for detection in detections: + # Stream to be saved for new detection + detect_stream = [] + for tr in detect_data: + tr_copy = tr.copy() + # Right now, copying each trace hundreds of times... + template = [t for t in zipped_templates + if str(t[0]) == str(detection.template_name)] + if len(template) > 0: + template = template[0] + else: + warnings.warn('No template with name: %s' % + detection.template_name) + for t in zipped_templates: + print(t) + continue + template = template[1].select(station=tr.stats.station, + channel=tr.stats.channel) + if template: + # Save template trace length in seconds + template_len = len(template[0]) / \ + template[0].stats.sampling_rate + else: + continue + # If there is no template-data match then skip the rest + # of the trace loop. + # Grab the delays for the desired template: [(sta, chan, delay)] + delay = [delay for delay in delays if delay[0] == detection. + template_name][0][1] + # Now grab the delay for the desired trace for this template + delay = [d for d in delay if d[0] == tr.stats.station and + d[1] == tr.stats.channel][0][2] + detect_stream.append(tr_copy.trim(starttime=detection.detect_time - + shift_len + delay, + endtime=detection.detect_time + + delay + shift_len + + template_len)) + del tr_copy + for tr in detect_stream: + if len(tr.data) == 0: + detect_stream.remove(tr) + if not len(detect_stream) == 0: + # Create tuple of (template name, data stream) + detect_streams.append((detection.template_name, + Stream(detect_stream))) + return detect_streams + + +def lag_calc(detections, detect_data, template_names, templates, + shift_len=0.2, min_cc=0.4, cores=1, interpolate=False, plot=False): + """ + Main lag-calculation function for detections of specific events. + + Overseer function to take a list of detection objects, cut the data for + them to lengths of the same length of the template + shift_len on + either side. This will then write out SEISAN s-file or QuakeML for the + detections with pick times based on the lag-times found at the maximum + correlation, providing that correlation is above the min_cc. + + :type detections: list + :param detections: List of DETECTION objects + :type detect_data: obspy.core.stream.Stream + :param detect_data: All the data needed to cut from - can be a gappy Stream + :type template_names: list + :param template_names: List of the template names, used to help identify \ + families of events. Must be in the same order as templates. + :type templates: list + :param templates: List of the templates, templates are of type: \ + obspy.core.stream.Stream. + :type shift_len: float + :param shift_len: Shift length allowed for the pick in seconds, will be + plus/minus this amount - default=0.2 + :type min_cc: float + :param min_cc: Minimum cross-correlation value to be considered a pick, + default=0.4 + :type cores: int + :param cores: Number of cores to use in parallel processing, defaults to \ + one. + :type interpolate: bool + :param interpolate: Interpolate the correlation function to achieve \ + sub-sample precision. + :type plot: bool + :param plot: To generate a plot for every detection or not, defaults to \ + False. + + :returns: Catalog of events with picks. No origin information is \ + included, these events can then be written out via \ + obspy.core.event functions, or to seisan Sfiles using Sfile_util \ + and located. + :rtype: obspy.core.event.Catalog + + .. rubric: Example + + >>> from eqcorrscan.core import lag_calc + + .. note:: Picks output in catalog are generated relative to the template \ + start-time. For example, if you generated your template with a \ + pre_pick time of 0.2 seconds, you should expect picks generated by \ + lag_calc to occur 0.2 seconds before the true phase-pick. This \ + is because we do not currently store template meta-data alongside the \ + templates. + + .. warning:: Because of the above note, origin times will be consistently \ + shifted by the static pre_pick applied to the templates. + """ + from obspy.core.event import Catalog + from eqcorrscan.utils.plotting import plot_repicked + + # First work out the delays for each template + delays = [] # List of tuples of (tempname, (sta, chan, delay)) + zipped_templates = list(zip(template_names, templates)) + for template in zipped_templates: + temp_delays = [] + for tr in template[1]: + temp_delays.append((tr.stats.station, tr.stats.channel, + tr.stats.starttime - template[1]. + sort(['starttime'])[0].stats.starttime)) + delays.append((template[0], temp_delays)) + # Segregate detections by template, then feed to day_loop + initial_cat = Catalog() + for template in zipped_templates: + template_detections = [detection for detection in detections + if detection.template_name == template[0]] + detect_streams = _prepare_data(detect_data=detect_data, + detections=template_detections, + zipped_templates=zipped_templates, + delays=delays, shift_len=shift_len) + detect_streams = [detect_stream[1] for detect_stream in detect_streams] + if len(template_detections) > 0: + template_cat = _day_loop(detection_streams=detect_streams, + template=template[1], min_cc=min_cc, + interpolate=interpolate, cores=cores) + initial_cat += template_cat + if plot: + for i, event in enumerate(template_cat): + if len(event.picks) == 0: + print('Made no picks for event') + print(event) + continue + plot_repicked(template=template[1], picks=event.picks, + det_stream=detect_streams[i]) + return initial_cat + + +if __name__ == '__main__': + import doctest + doctest.testmod() \ No newline at end of file diff --git a/eqcorrscan/core/match_filter.py b/eqcorrscan/core/match_filter.py index 8496bad94..e6dcf673c 100644 --- a/eqcorrscan/core/match_filter.py +++ b/eqcorrscan/core/match_filter.py @@ -267,104 +267,116 @@ def normxcorr2(template, image): return ccc -def _template_loop(template, chan, station, channel, debug=0, i=0): - """ - Internal loop for parallel processing. - Sister loop to handle the correlation of a single template (of \ +def _template_loop(template, chan, station, channel, do_subspace=False, + debug=0, i=0): + r"""Sister loop to handle the correlation of a single template (of \ multiple channels) with a single channel of data. - :type template: obspy.Stream + :type template: obspy.Stream or list of obspy.Stream if subspace is True :type chan: np.array :type station: string :type channel: string + :type do_subspace: bool + :param do_subspace: Flag for running subspace detection. Defaults to False. :type i: int :param i: Optional argument, used to keep track of which process is being \ run. :returns: tuple of (i, ccc) with ccc as an ndarray - - .. note:: This function currently assumes only one template-channel per \ - data-channel, while this is normal for a standard matched-filter \ - routine, if we wanted to impliment a subspace detector, this would be \ - the function to change, I think. E.g. where I currently take only \ - the first matching channel, we could loop through all the matching \ - channels and then sum the correlation sums - however I haven't yet - implimented detection based on that. More reading of the Harris \ - document required. """ from eqcorrscan.utils.timer import Timer - - ccc = np.array([np.nan] * (len(chan) - len(template[0].data) + 1), - dtype=np.float16) - ccc = ccc.reshape((1, len(ccc))) # Set default value for + from eqcorrscan.core import subspace + if do_subspace: + temp_len = len(template[0][0].data) + else: + temp_len = len(template[0].data) + cstat = np.array([np.nan] * (len(chan) - temp_len + 1), dtype=np.float16) + cstat = cstat.reshape((1, len(cstat))) # Set default value for # cross-channel correlation in case there are no data that match our # channels. - with Timer() as t: # While each bit of this loop isn't slow, looping through the if # statement when I don't need to adds up, I should work this out # earlier - template_data = template.select(station=station, - channel=channel) - # I will for now assume that you only have one template per-channel - template_data = template_data[0] - delay = template_data.stats.starttime - \ - template.sort(['starttime'])[0].stats.starttime - pad = np.array([0] * int(round(delay * - template_data.stats.sampling_rate))) - image = np.append(chan, pad)[len(pad):] - ccc = (normxcorr2(template_data.data, image)) - ccc = ccc.astype(np.float16) + if do_subspace: + sin_vecs = [st.select(station=station, channel=channel)[0].data + for st in template + if len(st.select(station=station, + channel=channel)) != 0] + # Convert trace data to np array + detector = np.asarray(sin_vecs) + cstat = subspace.det_statistic(detector, data=chan) + cstat = cstat.reshape((1, len(cstat))) + # Do not convert subspace statistic to float16 due to overrunning + # 16 bit precision in the mean calculation. np.isinf(np.mean())=T + # cstat = cstat.astype(np.float16) + else: + template_data = template.select(station=station, + channel=channel) + # I will for now assume that you only have one template per-channel + template_data = template_data[0] + delay = template_data.stats.starttime - \ + template.sort(['starttime'])[0].stats.starttime + pad = np.array([0] * int(round(delay * + template_data.stats.sampling_rate))) + image = np.append(chan, pad)[len(pad):] + cstat = (normxcorr2(template_data.data, image)) + cstat = cstat.astype(np.float16) # Convert to float16 to save memory for large problems - lose some # accuracy which will affect detections very close to threshold # # There is an interesting issue found in the tests that sometimes what # should be a perfect correlation results in a max of ccc of 0.99999994 # Converting to float16 'corrects' this to 1.0 - bad workaround. - if debug >= 2 and t.secs > 4: - print("Single if statement took %s s" % t.secs) - if not template_data: - print("Didn't even correlate!") - print(station + ' ' + channel) - elif debug >= 2: - print("If statement without correlation took %s s" % t.secs) if debug >= 3: print('********* DEBUG: ' + station + '.' + - channel + ' ccc MAX: ' + str(np.max(ccc[0]))) + channel + ' ccc MAX: ' + str(np.max(cstat[0]))) print('********* DEBUG: ' + station + '.' + - channel + ' ccc MEAN: ' + str(np.mean(ccc[0]))) - if np.isinf(np.mean(ccc[0])): + channel + ' ccc MEAN: ' + str(np.mean(cstat[0]))) + if np.isinf(np.mean(cstat[0])): warnings.warn('Mean of ccc is infinite, check!') if debug >= 3: - np.save('inf_cccmean_ccc.npy', ccc[0]) - np.save('inf_cccmean_template.npy', template_data.data) - np.save('inf_cccmean_image.npy', image) + np.save('inf_cccmean_ccc_%02d.npy' % i, cstat[0]) + if do_subspace: + np.save('inf_cccmean_template_%02d.npy' % i, sin_vecs) + np.save('inf_cccmean_image_%02d.npy' % i, chan) + else: + np.save('inf_cccmean_template_%02d.npy' % i, template_data.data) + np.save('inf_cccmean_image_%02d.npy' % i, image) + ccc = np.zeros(len(ccc)) + ccc = ccc.reshape((1, len(ccc))) + # Returns zeros if debug >= 3: - print('shape of ccc: ' + str(np.shape(ccc))) - print('A single ccc is using: ' + str(ccc.nbytes / 1000000) + 'MB') - print('ccc type is: ' + str(type(ccc))) + print('shape of ccc: ' + str(np.shape(cstat))) + print('A single ccc is using: ' + str(cstat.nbytes / 1000000) + 'MB') + print('ccc type is: ' + str(type(cstat))) if debug >= 3: - print('shape of ccc: ' + str(np.shape(ccc))) + print('shape of ccc: ' + str(np.shape(cstat))) print("Parallel worker " + str(i) + " complete") - return (i, ccc) + return (i, cstat) -def _channel_loop(templates, stream, cores=1, debug=0): +def _channel_loop(templates, stream, cores=1, do_subspace=False, debug=0): """ Internal loop for parallel processing. + Loop to generate cross channel correaltion sums for a series of templates \ hands off the actual correlations to a sister function which can be run \ in parallel. - :type templates: :class: 'obspy.Stream' + :type templates: list :param templates: A list of templates, where each one should be an \ obspy.Stream object containing multiple traces of seismic data and \ - the relevant header information. + the relevant header information. If do_subspace is True, templates \ + should be a list of lists of obspy.Stream objects, one list for each \ + detector of length n, where n is the number of singular vectors. :param stream: A single obspy.Stream object containing daylong seismic \ data to be correlated through using the templates. This is in effect \ the image. - :type core: int - :param core: Number of cores to loop over + :type cores: int + :param cores: Number of cores to loop over + :type do_subspace: bool + :param do_subspace: Flag for running subspace detection. Defaults to False. :type debug: int :param debug: Debug level. @@ -389,8 +401,12 @@ def _channel_loop(templates, stream, cores=1, debug=0): # Note: This requires all templates to be the same length, and all channels # to be the same length + if do_subspace: + temp_len = len(templates[0][0][0].data) + else: + temp_len = len(templates[0][0].data) cccs_matrix = np.array([np.array([np.array([0.0] * (len(stream[0].data) - - len(templates[0][0].data) + 1))] * + temp_len + 1))] * len(templates))] * 2, dtype=np.float32) # Initialize number of channels array no_chans = np.array([0] * len(templates)) @@ -409,7 +425,7 @@ def _channel_loop(templates, stream, cores=1, debug=0): pool = Pool(processes=num_cores) results = [pool.apply_async(_template_loop, args=(templates[i], tr_data, station, - channel, debug, i)) + channel, do_subspace, debug, i)) for i in range(len(templates))] pool.close() if debug >= 1: @@ -603,7 +619,8 @@ def match_filter(template_names, template_list, st, threshold, if not type(st) == Stream: msg = 'st must be of type: obspy.core.stream.Stream' raise IOError(msg) - if threshold_type not in ['MAD', 'absolute', 'av_chan_corr']: + if str(threshold_type) not in [str('MAD'), str('absolute'), + str('av_chan_corr')]: msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr' raise IOError(msg) @@ -628,12 +645,20 @@ def match_filter(template_names, template_list, st, threshold, print(template_stachan) print('I have daylong data for these stations:') print(data_stachan) - # Perform a check that the daylong vectors are daylong + # Perform a check that the daylong vectors are all the same length + min_start_time = min([tr.stats.starttime for tr in stream]) + max_end_time = max([tr.stats.endtime for tr in stream]) + longest_trace_length = stream[0].stats.sampling_rate * (max_end_time - + min_start_time) for tr in stream: - if not tr.stats.sampling_rate * 86400 == tr.stats.npts: - msg = ' '.join(['Data are not daylong for', tr.stats.station, - tr.stats.channel]) - raise ValueError(msg) + if not tr.stats.npts == longest_trace_length: + msg = 'Data are not equal length, padding short traces' + warnings.warn(msg) + start_pad = np.zeros(int(tr.stats.sampling_rate * + (tr.stats.starttime - min_start_time))) + end_pad = np.zeros(int(tr.stats.sampling_rate * + (max_end_time - tr.stats.endtime))) + tr.data = np.concatenate([start_pad, tr.data, end_pad]) # Perform check that all template lengths are internally consistent for i, temp in enumerate(template_list): if len(set([tr.stats.npts for tr in temp])) > 1: @@ -648,7 +673,7 @@ def match_filter(template_names, template_list, st, threshold, # data make the data NaN to return NaN ccc_sum # Note: this works if debug >= 2: - print('Ensuring all template channels have matches in daylong data') + print('Ensuring all template channels have matches in long data') template_stachan = [] for template in templates: for tr in template: @@ -688,7 +713,10 @@ def match_filter(template_names, template_list, st, threshold, template += nulltrace if debug >= 2: print('Starting the correlation run for this day') - [cccsums, no_chans, chans] = _channel_loop(templates, stream, cores, debug) + [cccsums, no_chans, chans] = _channel_loop(templates=templates, + stream=stream, + cores=cores, + debug=debug) if len(cccsums[0]) == 0: raise ValueError('Correlation has not run, zero length cccsum') outtoc = time.clock() @@ -705,11 +733,11 @@ def match_filter(template_names, template_list, st, threshold, det_cat = Catalog() for i, cccsum in enumerate(cccsums): template = templates[i] - if threshold_type == 'MAD': + if str(threshold_type) == str('MAD'): rawthresh = threshold * np.median(np.abs(cccsum)) - elif threshold_type == 'absolute': + elif str(threshold_type) == str('absolute'): rawthresh = threshold - elif threshold_type == 'av_chan_corr': + elif str(threshold_type) == str('av_chan_corr'): rawthresh = threshold * no_chans[i] # Findpeaks returns a list of tuples in the form [(cccsum, sample)] print(' '.join(['Threshold is set at:', str(rawthresh)])) @@ -773,9 +801,11 @@ def match_filter(template_names, template_list, st, threshold, detecttime = stream[0].stats.starttime +\ peak[1] / stream[0].stats.sampling_rate # Detect time must be valid QuakeML uri within resource_id. - # This will write a formatted string which is still readable by UTCDateTime + # This will write a formatted string which is still + # readable by UTCDateTime rid = ResourceIdentifier(id=template_names[i] + '_' + - str(detecttime.strftime('%Y%m%dT%H%M%S.%f')), + str(detecttime. + strftime('%Y%m%dT%H%M%S.%f')), prefix='smi:local') ev = Event(resource_id=rid) cr_i = CreationInfo(author='EQcorrscan', @@ -794,7 +824,8 @@ def match_filter(template_names, template_list, st, threshold, if (tr.stats.station, tr.stats.channel) not in chans[i]: continue else: - pick_tm = detecttime + (tr.stats.starttime - min_template_tm) + pick_tm = detecttime + (tr.stats.starttime - + min_template_tm) wv_id = WaveformStreamID(network_code=tr.stats.network, station_code=tr.stats.station, channel_code=tr.stats.channel) diff --git a/eqcorrscan/core/match_filter_internal.py b/eqcorrscan/core/match_filter_internal.py deleted file mode 100644 index 3724da976..000000000 --- a/eqcorrscan/core/match_filter_internal.py +++ /dev/null @@ -1,156 +0,0 @@ -""" -Functions written to be compilled by Cython as the inner loops of \ -the match_filter.py routine. - -:copyright: - Calum Chamberlain, Chet Hopp. - -:license: - GNU Lesser General Public License, Version 3 - (https://www.gnu.org/copyleft/lesser.html) -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -import numpy as np - - -def _channel_loop(templates, stream, delays, ktemplates, savedir=False, - cores=10): - """ - Loop to generate cross channel correaltion sums for a series of templates - hands off the actual correlations to a sister function which can be run in - parallel. - - :type templates: .ndarray - :param templates: A series of templates, organized into one np.ndarray \ - to the extent that the returned ccc[i][:] will correspond to \ - template[i][:] - :type stream: np.ndarray - :param stream: Image stream - np.ndarray, should be daylong - :type delays: np.ndarray - :param delays: Delays for each template in the templates array - must be \ - in samples, these will be the length of the pads applied to the \ - stream arrays - :type ktemplates: int - :param ktemplates: The number of templates given, should be the actual \ - number of templates, with each template potentially containing \ - multiple traces. - :type savedir: Str or bool - :param savedir: If false, data will be kept in memory, otherwise, data \ - will be stored on disk if memory is tight. - :type cores: int - :param cores: Number of cores to use. - - :return: :class: 'numpy.ndarray' objects. These will contain the \ - correlation sums for each template for this day of data. - :return: list of ints as number of channels used for each cross-correlation - - .. rubric: Note - templates must be arranged into a numpy array of numpy arrays. The inner \ - numpy arrays must be shaped (len(template_trace),) - the outer np.ndarray \ - must be shaped (number of channels in stream*number of templates,), such \ - that if there are 5 traces in the stream (e.g. stream is shaped (5,n)) \ - and there are 10 templates each of length 20, the templates ndarray is \ - shaped (50,20). - """ - # cimport numpy as np - import os - import multiprocessing as mp - DTYPE = np.float32 - # ctypedef np.float32_t DTYPE_t - num_cores = cores - if len(templates) < num_cores: - num_cores = len(templates) - - # from cython.parallel import parallel, prange - # from libc.stdlib cimport abort, malloc, free - - # Make some lovely ctypes static declarations - # cdef int image_ind, template_ind, i - # cdef np.ndarray ccc = np.zeros(len(stream[0])-len(templates[0])+1) - # Initialize ndarray for cccsums, which should be one cccsum for - # each of the ktemplates, with each cccsum as long as the - # correlation overlap window. - # cdef np.ndarray cccsums=np.array([np.array([0.0]*len(stream[0])-\ - cccsums = np.array([np.array([0.0]*(len(stream[0])-len(templates[0])+1), - dtype=DTYPE)]*ktemplates) - # Initialize an empty array for the return of the number of channels - # cdef np.ndarray nchans=np.array([0]*ktemplates, dtype=int) - nchans = np.array([0] * ktemplates, dtype=int) - # Loop through the templates, using some kind of clever indexing - # Note, this is where we could parallelise this! - image_ind = 0 - template_ind = 0 - j_ind = np.concatenate([np.arange(0, len(stream))] * ktemplates) - # Array of indexes for stream! - pool = mp.Pool(processes=num_cores) - if savedir: - results = [pool.apply_async(_template_loop, args=(templates[i], - stream[j_ind[i]], - delays[i], - savedir+'/'+str(i), - i)) - for i in range(len(templates))] - else: - results = [pool.apply_async(_template_loop, args=(templates[i], - stream[j_ind[i]], - delays[i], False, i)) - for i in range(len(templates))] - pool.close() - if not savedir: - ccc_list = [p.get() for p in results] - ccc_list.sort(key=lambda tup: tup[0]) - ccc_list = [ccc[1] for ccc in ccc_list] - else: - # order_list = [p.get() for p in results] - del order_list - pool.join() - print("Finished parallel run") - for i in range(len(templates)): - # if i in range(0,len(templates),len(templates)/100): - # print(str(i/len(templates))+' % read back in') - # Check if there was data for that station for both the - if not (np.all(np.isnan(stream[image_ind])) or - np.all(np.isnan(templates[i]))): - nchans[template_ind] += 1 - if not savedir: - cccsums[template_ind] = np.sum([cccsums[template_ind], - ccc_list[i]], axis=0) - else: - cccsums[template_ind] = np.sum([cccsums[template_ind], - np.load(savedir+'/'+str(i) + - '.npy')], - axis=0) - os.remove(savedir+'/'+str(i)+'.npy') - if image_ind < len(stream) - 1: - image_ind += 1 - else: - # Move on to the next template - image_ind = 0 - template_ind += 1 - # Reshape the array to give what we want - for i in range(len(cccsums)): - cccsums[i] = cccsums[i].reshape(len(cccsums[i],)) - return cccsums, nchans - - -def _template_loop(template, stream, delay, savefile=False, i=0): - """ - Helper loop for parallelisation - """ - import cv2 - image = np.append(stream, - np.array([0] * int(round(delay))))[int(round(delay)):] - # Compute the cross correlation - ccc = cv2.matchTemplate(image.astype(np.float32), - template.astype(np.float32), - cv2.TM_CCOEFF_NORMED) - ccc = ccc.T.reshape(len(ccc),) - if savefile: - np.save(savefile, ccc) - del ccc - return i - else: - return(i, ccc) diff --git a/eqcorrscan/core/subspace.py b/eqcorrscan/core/subspace.py new file mode 100644 index 000000000..705421097 --- /dev/null +++ b/eqcorrscan/core/subspace.py @@ -0,0 +1,988 @@ +r"""This module contains functions relevant to executing subspace detection \ +for earthquake catalogs. + +We recommend that you read Harris' detailed report on subspace detection \ +theory which can be found here: https://e-reports-ext.llnl.gov/pdf/335299.pdf + +:copyright: + Calum Chamberlain, Chet Hopp. + +:license: + GNU Lesser General Public License, Version 3 + (https://www.gnu.org/copyleft/lesser.html) +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +import numpy as np +import warnings +import time +import h5py +import getpass +import eqcorrscan +import copy +from obspy import Trace, UTCDateTime, Stream +from obspy.core.event import Event, CreationInfo, ResourceIdentifier, Comment,\ + WaveformStreamID, Pick +from eqcorrscan.utils.clustering import svd +from eqcorrscan.utils import findpeaks, pre_processing, stacking, plotting +from eqcorrscan.core.match_filter import DETECTION, extract_from_stream +import matplotlib.pyplot as plt + + + +class Detector(object): + """ + Class to serve as the base for subspace detections. + + :type name: str + :param name: Name of subspace detector, used for book-keeping + :type sampling_rate: float + :param sampling_rate: Sampling rate in Hz of original waveforms + :type multiplex: bool + :param multiplex: Is this detector multiplexed. + :type stachans: list + :param stachans: List of tuples of (station, channel) used in detector. \ + If multiplexed, these must be in the order that multiplexing was done. + :type delays: list + :param delays: List of individual trace delays in the order of stachans, \ + only used if multiplex=False. + :type lowcut: float + :param lowcut: Lowcut filter in Hz + :type highcut: float + :param highcut: Highcut filter in Hz + :type filt_order: int + :param filt_order: Number of corners for filtering + :type data: np.ndarray + :param data: The actual detector + :type u: np.ndarray + :param u: Full rank U matrix of left (input) singular vectors. + :type sigma: np.ndarray + :param sigma: Full rank vector of singular values. + :type v: np.ndarray + :param v: Full rank right (output) singular vectors. + :type dimension: int + :param dimension: Dimension of data. + """ + def __init__(self, name=None, sampling_rate=None, multiplex=None, + stachans=None, lowcut=None, highcut=None, + filt_order=None, data=None, u=None, sigma=None, v=None, + dimension=None): + self.name = name + self.sampling_rate = sampling_rate + self.multiplex = multiplex + self.stachans = stachans + # self.delays = delays + self.lowcut = lowcut + self.highcut = highcut + self.filt_order = filt_order + self.data = data + self.u = u + self.sigma = sigma + self.v = v + self.dimension = dimension + + def __repr__(self): + if self.name: + out = 'Detector: ' + self.name + else: + out = 'Empty Detector object' + return out + + def __str__(self): + out = 'Detector object: \n' + for key in ['name', 'sampling_rate', 'multiplex', 'lowcut', 'highcut', + 'filt_order', 'dimension']: + if self.__getattribute__(key): + out += ('\t' + key + ': ' + str(self.__getattribute__(key)) + + '\n') + return out + + def __eq__(self, other): + if not isinstance(other, Detector): + return False + for key in ['name', 'sampling_rate', 'multiplex', 'lowcut', 'highcut', + 'filt_order', 'dimension', 'stachans']: + if not self.__getattribute__(key) == other.__getattribute__(key): + return False + for key in ['data', 'u', 'v', 'sigma']: + list_item = self.__getattribute__(key) + other_list = other.__getattribute__(key) + if not len(list_item) == len(other_list): + return False + for item, other_item in zip(list_item, other_list): + if not np.allclose(item, other_item): + return False + return True + + def __ne__(self, other): + return not self.__eq__(other) + + def __len__(self): + return len(self.data) + + def construct(self, streams, lowcut, highcut, filt_order, + sampling_rate, multiplex, name, align, shift_len=0, + reject=0.3, no_missed=True, plot=False): + """ + Construct a subspace detector from a list of streams, full rank. + + Subspace detector will be full-rank, further functions can be used \ + to select the desired dimensions. + + :type streams: list + :param streams: List of obspy.core.stream.Stream to be used to \ + generate the subspace detector. These should be pre-clustered \ + and aligned. + :type lowcut: float + :param lowcut: Lowcut in Hz, can be None to not apply filter + :type highcut: float + :param highcut: Highcut in Hz, can be None to not apply filter + :type filt_order: int + :param filt_order: Number of corners for filter. + :type sampling_rate: float + :param sampling_rate: Desired sampling rate in Hz + :type multiplex: bool + :param multiplex: Whether to multiplex the data or not. Data are \ + multiplexed according to the method of Harris, see the multi \ + function for details. + :type name: str + :param name: Name of the detector, used for book-keeping. + :type align: bool + :param align: Whether to align the data or not - needs to be done \ + at some point + :type shift_len: float + :param shift_len: Maximum shift allowed for alignment in seconds. + :type reject: float + :param reject: Minimum correlation to include traces - only used if \ + align=True. + :type no_missed: bool + :param: no_missed: Reject streams with missed traces, defaults to \ + True. A missing trace from lots of events will reduce the quality \ + of the subspace detector if multiplexed. Only used when multi \ + is set to True. + :type plot: bool + :param plot: Whether to plot the alignment stage or not. + + .. note:: The detector will be normalized such that the data, before \ + computing the singular-value decomposition, will have unit energy. \ + e.g. We divide the amplitudes of the data by the L1 norm of the \ + data. + + .. warning:: EQcorrscans alignment will attempt to align over the \ + whole data window given. For long (more than 2s) chunks of data \ + this can give poor results and you might be better off using the \ + eqcorrscan.stacking.align_traces function externally, focusing \ + on a smaller window of data. To do this you would align the data \ + prior to running construct. + """ + self.lowcut = lowcut + self.highcut = highcut + self.filt_order = filt_order + self.sampling_rate = sampling_rate + self.name = name + self.multiplex = multiplex + # Pre-process data + p_streams, stachans = \ + _subspace_process(streams=copy.deepcopy(streams), + lowcut=lowcut, highcut=highcut, + filt_order=filt_order, + sampling_rate=sampling_rate, multiplex=multiplex, + align=align, shift_len=shift_len, reject=reject, + plot=plot, no_missed=no_missed) + # Compute the SVD, use the cluster.SVD function + v, sigma, u, svd_stachans = svd(stream_list=p_streams, full=True) + if not multi: + stachans = [tuple(stachan.split('.')) for stachan in svd_stachans] + self.stachans = stachans + # self.delays = delays + self.u = u + self.v = v + self.sigma = sigma + self.data = copy.deepcopy(u) # Set the data matrix to be full rank U. + self.dimension = np.inf + return self + + def partition(self, dimension): + """ + Partition subspace into desired dimension. + + :type dimension: int + :param dimension: Maximum dimension to use. + """ + # Take leftmost 'dimension' input basis vectors + for i, channel in enumerate(self.u): + if channel.shape[1] < dimension: + raise IndexError('Channel is max dimension %s' + % channel.shape[1]) + self.data[i] = channel[:, 0:dimension] + self.dimension = dimension + return self + + def energy_capture(self): + """ + Calculate the average percentage energy capture for this subspace. + + :return: Percentage energy capture + :rtype: float + """ + percent_captrue = 0 + if np.isinf(self.dimension): + return 100 + for channel in self.sigma: + fc = np.sum(channel[0:self.dimension]) / np.sum(channel) + percent_captrue += fc + return 100 * (percent_captrue / len(self.sigma)) + + def detect(self, st, threshold, trig_int, moveout=0, min_trig=0, + process=True, extract_detections=False, debug=0): + """ + Detect within continuous data using the subspace method. + + :type st: obspy.core.stream.Stream + :param st: Un-processed stream to detect within using the subspace \ + detector + :type threshold: float + :param threshold: Threshold value for detections between 0-1 + :type trig_int: float + :param trig_int: Minimum trigger interval in seconds. + :type moveout: float + :param moveout: Maximum allowable moveout window for non-multiplexed, + network detection. See note. + :type min_trig: int + :param min_trig: Minimum number of stations exceeding threshold for \ + non-multiplexed, network detection. See note. + :type process: bool + :param process: Whether or not to process the stream according to the \ + parameters defined by the detector. Default is to process the \ + data (True). + :type extract_detections: bool + :param extract_detections: Whether to extract waveforms for each \ + detection or not, if true will return detections and streams. + :type debug: int + :param debug: Debug output level from 0-5. + + :return: list of detections + :rtype: list of eqcorrscan.core.match_filter.DETECTION + + .. note:: If running in bulk with detectors that all have the same \ + parameters then you can pre-process the data and set process to \ + False. This will speed up this detect function dramatically. + + .. warning:: If the detector and stream are multiplexed then they must \ + contain the same channels and multiplexed in the same order. This \ + is handled internally when process=True, but if running in bulk \ + you must take care. + + .. note:: Non-multiplexed, network detection. When the detector is \ + not multiplexed, but there are multiple channels within the \ + detector, we do not stack the single-channel detection statistics \ + because we do not have a one-size-fits-all solution for computing \ + delays for a subspace detector (if you want to implement one, then \ + please contribute it!). Therefore, these parameters provide a \ + means for declaring a network coincidence trigger using \ + single-channel detection statistics, in a similar fashion to the \ + commonly used network-coincidence trigger with energy detection \ + statistics. + """ + return _detect(detector=self, st=st, threshold=threshold, + trig_int=trig_int, moveout=moveout, min_trig=min_trig, + process=process, extract_detections=extract_detections, + debug=debug) + + def write(self, filename): + """ + Write detector to a file - uses HDF5 file format. + + Meta-data are stored alongside numpy data arrays. See h5py.org for \ + details of the methods. + + :type filename: str + :param filename: Filename to save the detector to. + """ + f = h5py.File(filename, "w") + # Must store eqcorrscan version number, username would be useful too. + data_group = f.create_group(name="data") + for i, data in enumerate(self.data): + dset = data_group.create_dataset(name="data_" + str(i), + shape=data.shape, dtype=data.dtype) + dset[...] = data + data_group.attrs['length'] = len(self.data) + data_group.attrs['name'] = self.name.encode("ascii", "ignore") + data_group.attrs['sampling_rate'] = self.sampling_rate + data_group.attrs['multiplex'] = self.multiplex + data_group.attrs['lowcut'] = self.lowcut + data_group.attrs['highcut'] = self.highcut + data_group.attrs['filt_order'] = self.filt_order + data_group.attrs['dimension'] = self.dimension + data_group.attrs['user'] = getpass.getuser() + data_group.attrs['eqcorrscan_version'] = str(eqcorrscan.__version__) + # Convert station-channel list to something writable + ascii_stachans = ['.'.join(stachan).encode("ascii", "ignore") + for stachan in self.stachans] + stachans = f.create_dataset(name="stachans", + shape=(len(ascii_stachans),), + dtype='S10') + stachans[...] = ascii_stachans + u_group = f.create_group("u") + for i, u in enumerate(self.u): + uset = u_group.create_dataset(name="u_" + str(i), + shape=u.shape, dtype=u.dtype) + uset[...] = u + u_group.attrs['length'] = len(self.u) + sigma_group = f.create_group("sigma") + for i, sigma in enumerate(self.sigma): + sigmaset = sigma_group.create_dataset(name="sigma_" + str(i), + shape=sigma.shape, + dtype=sigma.dtype) + sigmaset[...] = sigma + sigma_group.attrs['length'] = len(self.sigma) + v_group = f.create_group("v") + for i, v in enumerate(self.v): + vset = v_group.create_dataset(name="v_" + str(i), + shape=v.shape, dtype=v.dtype) + vset[...] = v + v_group.attrs['length'] = len(self.v) + f.flush() + f.close() + return self + + def read(self, filename): + """ + Read detector from a file, must be HDF5 format. + + Reads a Detector object from an HDF5 file, usually created by \ + eqcorrscan. + + :type filename: str + :param filename: Filename to save the detector to. + """ + f = h5py.File(filename, "r") + self.data = [] + for i in range(f['data'].attrs['length']): + self.data.append(f['data']['data_' + str(i)].value) + self.u = [] + for i in range(f['u'].attrs['length']): + self.u.append(f['u']['u_' + str(i)].value) + self.sigma = [] + for i in range(f['sigma'].attrs['length']): + self.sigma.append(f['sigma']['sigma_' + str(i)].value) + self.v = [] + for i in range(f['v'].attrs['length']): + self.v.append(f['v']['v_' + str(i)].value) + self.stachans = [tuple(stachan.decode('ascii').split('.')) + for stachan in f['stachans'].value] + self.dimension = f['data'].attrs['dimension'] + self.filt_order = f['data'].attrs['filt_order'] + self.highcut = f['data'].attrs['highcut'] + self.lowcut = f['data'].attrs['lowcut'] + self.multiplex = bool(f['data'].attrs['multiplex']) + self.sampling_rate = f['data'].attrs['sampling_rate'] + if isinstance(f['data'].attrs['name'], str): + self.name = f['data'].attrs['name'] + else: + self.name = f['data'].attrs['name'].decode('ascii') + return self + + def plot(self, stachans='all', size=(10, 7), show=True): + """ + Plot the output basis vectors for the detector at the given dimension. + + Corresponds to the first n horizontal vectors of the V matrix. + + :type stachans: list + :param stachans: list of tuples of station, channel pairs to plot. + :type stachans: list + :param stachans: List of tuples of (station, channel) to use. Can set \ + to 'all' to use all the station-channel pairs available. If \ + detector is multiplexed, will just plot that. + :type size: tuple + :param size: Figure size. + :type show: bool + :param show: Whether or not to show the figure. + + :returns: Figure + :rtype: matplotlib.pyplot.Figure + """ + if stachans == 'all' and not self.multiplex: + stachans = self.stachans + elif self.multiplex: + stachans = [('multi', ' ')] + fig, axes = plt.subplots(nrows=self.dimension, ncols=len(stachans), + sharex=True, sharey=True, figsize=size) + x = np.arange(len(self.v[0]), dtype=np.float32) + if self.multiplex: + x /= len(self.stachans) * self.sampling_rate + else: + x /= self.sampling_rate + for column, stachan in enumerate(stachans): + channel = self.v[column] + for row, vector in enumerate(channel.T[0:self.dimension]): + if len(stachans) == 1: + if self.dimension == 1: + axis = axes + else: + axis = axes[row] + else: + axis = axes[row, column] + if row == 0: + axis.set_title('.'.join(stachan)) + axis.plot(x, vector, 'k', linewidth=1.1) + if column == 0: + axis.set_ylabel('Basis %s' % (row + 1)) + if row == self.dimension - 1: + axis.set_xlabel('Time (s)') + plt.subplots_adjust(hspace=0.05) + plt.subplots_adjust(wspace=0.05) + if show: + plt.show() + return fig + + +def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0, + process=True, extract_detections=False, debug=0): + """ + Detect within continuous data using the subspace method. + + Not to be called directly, use the detector.detect method. + + :type detector: eqcorrscan.core.subspace.Detector + :param detector: Detector to use. + :type st: obspy.core.stream.Stream + :param st: Un-processed stream to detect within using the subspace \ + detector + :type threshold: float + :param threshold: Threshold value for detections between 0-1 + :type trig_int: float + :param trig_int: Minimum trigger interval in seconds. + :type moveout: float + :param moveout: Maximum allowable moveout window for non-multiplexed, + network detection. See note. + :type min_trig: int + :param min_trig: Minimum number of stations exceeding threshold for \ + non-multiplexed, network detection. See note. + :type process: bool + :param process: Whether or not to process the stream according to the \ + parameters defined by the detector. Default is to process the \ + data (True). + :type extract_detections: bool + :param extract_detections: Whether to extract waveforms for each \ + detection or not, if true will return detections and streams. + :type debug: int + :param debug: Debug output level from 0-5. + + :return: list of detections + :rtype: list of eqcorrscan.core.match_filter.DETECTION + """ + from eqcorrscan.core import subspace_statistic + detections = [] + # First process the stream + if process: + if debug > 0: + print('Processing Stream') + stream, stachans = _subspace_process(streams=[st.copy()], + lowcut=detector.lowcut, + highcut=detector.highcut, + filt_order=detector.filt_order, + sampling_rate=detector. + sampling_rate, + multiplex=detector.multiplex, + stachans=detector.stachans, + parallel=True, + align=False, + shift_len=None, + reject=False) + else: + # Check the sampling rate at the very least + for tr in st: + if not tr.stats.sampling_rate == detector.sampling_rate: + raise ValueError('Sampling rates do not match.') + stream = [st] + stachans = detector.stachans + outtic = time.clock() + if debug > 0: + print('Computing detection statistics') + stats = np.zeros((len(stream[0]), + len(stream[0][0]) - len(detector.data[0][0]) + 1), + dtype=np.float32) + for det_channel, in_channel, i in zip(detector.data, stream[0], + np.arange(len(stream[0]))): + stats[i] = subspace_statistic.\ + det_statistic(detector=det_channel.astype(np.float32), + data=in_channel.data.astype(np.float32)) + if debug > 0: + print(stats[i].shape) + if debug > 3: + plt.plot(stats[i]) + plt.show() + # Hard typing in Cython loop requires float32 type. + # statistics + if detector.multiplex: + trig_int_samples = (len(detector.stachans) * + detector.sampling_rate * trig_int) + else: + trig_int_samples = detector.sampling_rate * trig_int + if debug > 0: + print('Finding peaks') + peaks = [] + for i in range(len(stream[0])): + peaks.append(findpeaks.find_peaks2_short(arr=stats[i], + thresh=threshold, + trig_int=trig_int_samples, + debug=debug)) + if not detector.multiplex: + # Conduct network coincidence triggering + peaks = findpeaks.coin_trig(peaks=peaks, + samp_rate=detector.sampling_rate, + moveout=moveout, min_trig=min_trig, + stachans=stachans, trig_int=trig_int) + else: + peaks = peaks[0] + if len(peaks) > 0: + for peak in peaks: + if detector.multiplex: + detecttime = st[0].stats.starttime + (peak[1] / + (detector.sampling_rate * + len(detector.stachans))) + else: + detecttime = st[0].stats.starttime + (peak[1] / + detector.sampling_rate) + rid = ResourceIdentifier(id=detector.name + '_' + + str(detecttime), + prefix='smi:local') + ev = Event(resource_id=rid) + cr_i = CreationInfo(author='EQcorrscan', + creation_time=UTCDateTime()) + ev.creation_info = cr_i + # All detection info in Comments for lack of a better idea + thresh_str = 'threshold=' + str(threshold) + ccc_str = 'detect_val=' + str(peak[0]) + used_chans = 'channels used: ' +\ + ' '.join([str(pair) for pair in detector.stachans]) + ev.comments.append(Comment(text=thresh_str)) + ev.comments.append(Comment(text=ccc_str)) + ev.comments.append(Comment(text=used_chans)) + for stachan in detector.stachans: + tr = st.select(station=stachan[0], channel=stachan[1]) + if tr: + net_code = tr[0].stats.network + else: + net_code = '' + pick_tm = detecttime + wv_id = WaveformStreamID(network_code=net_code, + station_code=stachan[0], + channel_code=stachan[1]) + ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id)) + detections.append(DETECTION(detector.name, + detecttime, + len(detector.stachans), + peak[0], + threshold, + 'subspace', detector.stachans, + event=ev)) + outtoc = time.clock() + print('Detection took %s seconds' % str(outtoc - outtic)) + if extract_detections: + detection_streams = extract_from_stream(st, detections) + return detections, detection_streams + return detections + + +def _subspace_process(streams, lowcut, highcut, filt_order, sampling_rate, + multiplex, align, shift_len, reject, no_missed=True, + stachans=None, parallel=False, plot=False): + """ + Process stream data, internal function. + + :type streams: list + :param streams: List of obspy.core.stream.Stream to be used to \ + generate the subspace detector. These should be pre-clustered \ + and aligned. + :type lowcut: float + :param lowcut: Lowcut in Hz, can be None to not apply filter + :type highcut: float + :param highcut: Highcut in Hz, can be None to not apply filter + :type filt_order: int + :param filt_order: Number of corners for filter. + :type sampling_rate: float + :param sampling_rate: Desired sampling rate in Hz + :type multiplex: bool + :param multiplex: Whether to multiplex the data or not. Data are \ + multiplexed according to the method of Harris, see the multi \ + function for details. + :type stachans: list of tuple + :param stachans: list of tuples of (station, channel) to use. + :type align: bool + :param align: Whether to align the data or not - needs to be done \ + at some point + :type shift_len: float + :param shift_len: Maximum shift allowed for alignment in seconds. + :type reject: float + :param reject: Minimum correlation for traces, only used if align=True. + :type no_missed: bool + :param: no_missed: Reject streams with missed traces, defaults to True. \ + A missing trace from lots of events will reduce the quality of the \ + subspace detector if multiplexed. Only used when multi is set to True. + :type plot: bool + :param plot: Passed down to align traces - used to check alignment process. + + :return: Processed streams + :rtype: list + :return: Station, channel pairs in order + :rtype: list of tuple + :return: List of delays + :rtype: list + """ + from multiprocessing import Pool, cpu_count + processed_streams = [] + if not stachans: + input_stachans = list(set([(tr.stats.station, tr.stats.channel) + for st in streams for tr in st.sort()])) + else: + input_stachans = stachans + input_stachans.sort() # Make sure stations and channels are in order + # Check that all channels are the same length in seconds + first_length = len(streams[0][0].data) /\ + streams[0][0].stats.sampling_rate + for st in streams: + for tr in st: + if not len(tr) / tr.stats.sampling_rate == first_length: + msg = 'All channels of all streams must be the same length' + raise IOError(msg) + for st in streams: + if not parallel: + processed_stream = Stream() + for stachan in input_stachans: + dummy, tr = _internal_process(st=st, lowcut=lowcut, + highcut=highcut, + filt_order=filt_order, + sampling_rate=sampling_rate, + first_length=first_length, + stachan=stachan, debug=0) + processed_stream += tr + processed_streams.append(processed_stream) + else: + pool = Pool(processes=cpu_count()) + results = [pool.apply_async(_internal_process, (st,), + {'lowcut': lowcut, + 'highcut': highcut, + 'filt_order': filt_order, + 'sampling_rate': sampling_rate, + 'first_length': first_length, + 'stachan': stachan, + 'debug': 0, + 'i': i}) + for i, stachan in enumerate(input_stachans)] + pool.close() + processed_stream = [p.get() for p in results] + pool.join() + processed_stream.sort(key=lambda tup: tup[0]) + processed_stream = Stream([p[1] for p in processed_stream]) + processed_streams.append(processed_stream) + if no_missed and multiplex: + for tr in processed_stream: + if np.count_nonzero(tr.data) == 0: + processed_streams.remove(processed_stream) + print('Removed stream with empty trace') + break + if align: + processed_streams = align_design(design_set=processed_streams, + shift_len=shift_len, + reject=reject, multiplex=multiplex, + plot=plot, no_missed=no_missed) + output_streams = [] + for processed_stream in processed_streams: + if len(processed_stream) == 0: + # If we have removed all of the traces from the stream then onwards! + continue + # Need to order the stream according to input_stachans + _st = Stream() + for stachan in input_stachans: + tr = processed_stream.select(station=stachan[0], + channel=stachan[1]) + if len(tr) >= 1: + _st += tr[0] + elif multiplex and len(tr) == 0: + raise IndexError('Missing data for %s.%s' % + (stachan[0], stachan[1])) + if multiplex: + st = multi(stream=_st) + st = Stream(Trace(st)) + st[0].stats.station = 'Multi' + st[0].stats.sampling_rate = sampling_rate + else: + st = _st + for tr in st: + # Normalize the data + norm = np.linalg.norm(tr.data) + if not norm == 0: + tr.data /= norm + output_streams.append(st) + return output_streams, input_stachans + + +def _internal_process(st, lowcut, highcut, filt_order, sampling_rate, + first_length, stachan, debug, i=0): + tr = st.select(station=stachan[0], channel=stachan[1]) + if len(tr) == 0: + tr = Trace(np.zeros(int(first_length * sampling_rate))) + tr.stats.station = stachan[0] + tr.stats.channel = stachan[1] + tr.stats.sampling_rate = sampling_rate + tr.stats.starttime = st[0].stats.starttime # Do this to make more + # sensible plots + warnings.warn('Padding stream with zero trace for ' + + 'station ' + stachan[0] + '.' + stachan[1]) + elif len(tr) == 1: + tr = tr[0] + tr.detrend('simple') + tr = pre_processing.process(tr=tr, lowcut=lowcut, highcut=highcut, + filt_order=filt_order, + samp_rate=sampling_rate, debug=debug, + seisan=False) + else: + msg = ('Multiple channels for ' + stachan[0] + '.' + + stachan[1] + ' in a single design stream.') + raise IOError(msg) + return i, tr + + +def read_detector(filename): + """ + Read detector from a filename. + + :type filename: str + :param filename: Filename to save the detector to. + + :return: Detector object + :rtype: eqcorrscan.core.subspace.Detector + """ + detector = Detector() + detector.read(filename=filename) + return detector + + +def multi(stream): + """ + Internal multiplexer for multiplex_detect. + + :type stream: obspy.core.stream.Stream + :param stream: Stream to multiplex + + :return: trace of multiplexed data + :rtype: obspy.core.trace.Trace + + .. Note: Requires all channels to be the same length. + + Maps a standard multiplexed stream of seismic data to a single traces of \ + multiplexed data as follows: + + Input: + x = [x1, x2, x3, ...] + y = [y1, y2, y3, ...] + z = [z1, z2, z3, ...] + + Output: + xyz = [x1, y1, z1, x2, y2, z2, x3, y3, z3, ...] + """ + stack = stream[0].data + for tr in stream[1:]: + stack = np.dstack(np.array([stack, tr.data])) + multiplex = stack.reshape(stack.size, ) + return multiplex + + +def align_design(design_set, shift_len, reject, multiplex, no_missed=True, + plot=False): + """ + Align individual traces within streams of the design set. + + Perform before Detector.construct to align traces before computing the \ + singular value decomposition. + + :type design_set: list + :param design_set: List of obspy.core.stream.Stream to be aligned + :type shift_len: float + :param shift_len: Maximum shift (plus/minus) in seconds. + :type reject: float + :param reject: Minimum correlation for traces, only used if align=True. + :type multiplex: bool + :param multiplex: If you are going to multiplex the data, then there has to be \ + data for all channels, so we will pad with zeros, otherwise there is \ + no need. + :type no_missed: bool + :param: no_missed: Reject streams with missed traces, defaults to True. \ + A missing trace from lots of events will reduce the quality of the \ + subspace detector if multiplexed. Only used when multi is set to True. + :type plot: bool + :param plot: Whether to plot the aligned traces as we go or not. + + :rtype: list + :return: List of obspy.core.stream.Stream of aligned streams + + .. Note:: Assumes only one trace for each channel for each stream in the \ + design_set. If more are present will only use the first one. + + .. Note:: Will cut all traces to be the same length as required for the \ + svd, this length will be the shortest trace length - 2 * shift_len + """ + trace_lengths = [tr.stats.endtime - tr.stats.starttime for st in design_set + for tr in st] + clip_len = min(trace_lengths) - (2 * shift_len) + stachans = list(set([(tr.stats.station, tr.stats.channel) + for st in design_set for tr in st])) + remove_set = [] + for stachan in stachans: + trace_list = [] + trace_ids = [] + for i, st in enumerate(design_set): + tr = st.select(station=stachan[0], channel=stachan[1]) + if len(tr) > 0: + trace_list.append(tr[0]) + trace_ids.append(i) + if len(tr) > 1: + warnings.warn('Too many matches for %s %s' % (stachan[0], + stachan[1])) + shift_len_samples = int(shift_len * trace_list[0].stats.sampling_rate) + shifts, cccs = stacking.align_traces(trace_list=trace_list, + shift_len=shift_len_samples, + positive=True) + for i, shift in enumerate(shifts): + st = design_set[trace_ids[i]] + start_t = st.select(station=stachan[0], + channel=stachan[1])[0].stats.starttime + start_t += shift_len + start_t -= shift + st.select(station=stachan[0], + channel=stachan[1])[0].trim(start_t, + start_t + clip_len) + if cccs[i] < reject: + if multiplex and not no_missed: + st.select(station=stachan[0], + channel=stachan[1])[0].data =\ + np.zeros(int(clip_len * + (st.select(station=stachan[0], + channel=stachan[1])[0]. + stats.sampling_rate) + 1)) + warnings.warn('Padding stream with zero trace for ' + + 'station ' + stachan[0] + '.' + stachan[1]) + elif multiplex and no_missed: + remove_set.append(st) + warnings.warn('Will remove stream due to low-correlation') + continue + else: + st.remove(st.select(station=stachan[0], + channel=stachan[1])[0]) + print('Removed channel with correlation at %s' % cccs[i]) + continue + if no_missed: + for st in remove_set: + if st in design_set: + design_set.remove(st) + if plot: + for stachan in stachans: + trace_list = [] + for st in design_set: + tr = st.select(station=stachan[0], channel=stachan[1]) + if len(tr) > 0: + trace_list.append(tr[0]) + if len(trace_list) > 1: + plotting.multi_trace_plot(traces=trace_list, corr=True, + stack=None, title='.'.join(stachan)) + else: + print('No plot for you, only one trace left after rejection') + return design_set + + +def subspace_detect(detectors, stream, threshold, trig_int, moveout=0, + min_trig=1, parallel=True, num_cores=None): + """ + Conduct subspace detection with chosen detectors. + + :type detectors: list + :param detectors: list of eqcorrscan.core.subspace.Detector to be used for \ + detection + :type stream: obspy.core.stream.Stream + :param stream: Stream to detect within. + :type threshold: float + :param threshold: Threshold between 0 and 1 for detection, see \ + Detector.detect. + :type trig_int: float + :param trig_int: Minimum trigger interval in seconds. + :type moveout: float + :param moveout: Maximum allowable moveout window for non-multiplexed, + network detection. See note. + :type min_trig: int + :param min_trig: Minimum number of stations exceeding threshold for \ + non-multiplexed, network detection. See note in Detector.detect. + :type parallel: bool + :param parallel: Whether to run detectors in parallel in groups. + :type num_cores: int + :param num_cores: How many cpu cores to use if parallel==True. If set to \ + None (default), will use all avaiable cores. + + :rtype: list + :return: List of eqcorrscan.core.match_filter.DETECTION detections. + + .. Note:: This will loop through your detectors using their detect method. \ + If the detectors are multiplexed it will run groups of detectors with \ + the same channels at the same time. + """ + from multiprocessing import Pool, cpu_count + # First check that detector parameters are the same + parameters = [] + detections = [] + for detector in detectors: + parameter = (detector.lowcut, detector.highcut, + detector.filt_order, detector.sampling_rate, + detector.multiplex, detector.stachans) + if parameter not in parameters: + parameters.append(parameter) + for parameter_set in parameters: + parameter_detectors = [] + for detector in detectors: + det_par = (detector.lowcut, detector.highcut, detector.filt_order, + detector.sampling_rate, detector.multiplex, + detector.stachans) + if det_par == parameter_set: + parameter_detectors.append(detector) + stream, stachans = \ + _subspace_process(streams=[stream.copy()], + lowcut=parameter_set[0], + highcut=parameter_set[1], + filt_order=parameter_set[2], + sampling_rate=parameter_set[3], + multiplex=parameter_set[4], + stachans=parameter_set[5], + parallel=True, align=False, shift_len=None, + reject=False) + if not parallel: + for detector in parameter_detectors: + detections += _detect(detector=detector, st=stream[0], + threshold=threshold, trig_int=trig_int, + moveout=moveout, min_trig=min_trig, + process=False, extract_detections=False, + debug=0) + else: + if num_cores: + ncores = num_cores + else: + ncores = cpu_count() + pool = Pool(processes=ncores) + results = [pool.apply_async(_detect, + args=(detector, stream[0], threshold, + trig_int, moveout, min_trig, + False, False, 0)) + for detector in parameter_detectors] + pool.close() + _detections = [p.get() for p in results] + pool.join() + for d in _detections: + if isinstance(d, list): + detections += d + else: + detections.append(d) + return detections + + diff --git a/eqcorrscan/core/subspace_statistic.pyx b/eqcorrscan/core/subspace_statistic.pyx new file mode 100644 index 000000000..57ca765aa --- /dev/null +++ b/eqcorrscan/core/subspace_statistic.pyx @@ -0,0 +1,64 @@ +""" +Internal loop for subspace detection statistic calculation. Testing speedups. + +:copyright: + Calum Chamberlain, Chet Hopp. + +:license: + GNU Lesser General Public License, Version 3 + (https://www.gnu.org/copyleft/lesser.html) +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +import numpy as np +cimport numpy as np +import cython + +DTYPE = np.float32 +ctypedef np.float32_t DTYPE_t + +@cython.boundscheck(False) +@cython.wraparound(False) +def det_statistic(np.ndarray[DTYPE_t, ndim=2] detector, + np.ndarray[DTYPE_t, ndim=1] data): + """ + Base function to calculate the subspace detection statistic. + + Calculates for a given subspace detector and data stream. \ + The statistic is calculated by \ + projecting the data onto the N dimensional subspace defined by the given \ + detector following the equation: :math:'\\gamma = y^TUU^Ty' where y is \ + the data stream, U is the subspace detector and :math:'\\gamma' is the \ + detection statistic from 0 to 1. + + :type detector: np.ndarray + :param detector: U matrix from singular value decomposition + :type data: np.ndarry + :param data: Data to detect within + + :returns: Detection statistic from 0-1 + :rtype: np.ndarray + """ + cdef int i + cdef int datamax = data.shape[0] + cdef int ulen = detector.shape[1] + cdef int umax = detector.shape[0] + cdef int imax = datamax - ulen + 1 + cdef np.ndarray[DTYPE_t, ndim=1] stats = np.zeros(imax, dtype=DTYPE) + # Check that there will not be an empty window + cdef np.ndarray[DTYPE_t, ndim=1] _data = \ + np.concatenate([data, np.zeros((umax * imax) - + datamax, dtype=DTYPE)]) + cdef np.ndarray[DTYPE_t, ndim=2] uut = np.dot(detector, detector.T) + # Actual loop after static typing + for i in range(imax): + stats[i] = np.dot(_data[i:i + umax], + np.dot(uut, _data[i:i + umax].T)) + # Cope with case of errored internal loop + if np.all(np.isnan(stats)): + return np.zeros(imax, dtype=DTYPE) + else: + return stats + diff --git a/eqcorrscan/core/template_gen.py b/eqcorrscan/core/template_gen.py index de45c1e7e..6d6a3f6f4 100644 --- a/eqcorrscan/core/template_gen.py +++ b/eqcorrscan/core/template_gen.py @@ -30,10 +30,11 @@ from __future__ import division from __future__ import print_function from __future__ import unicode_literals +import warnings def from_sac(sac_files, lowcut, highcut, samp_rate, filt_order, length, swin, - prepick=0.05, debug=0, plot=False): + prepick, all_horiz=False, delayed=True, plot=False, debug=0): """ Generate a multiplexed template from a list of SAC files. @@ -66,10 +67,16 @@ def from_sac(sac_files, lowcut, highcut, samp_rate, filt_order, length, swin, defaults file. :type prepick: float :param prepick: Length to extract prior to the pick in seconds. + :type all_horiz: bool + :param all_horiz: To use both horizontal channels even if there is only \ + a pick on one of them. Defaults to False. :type debug: int :param debug: Debug level, higher number=more output. :type plot: bool :param plot: Turns template plotting on or off. + :type delayed: bool + :param delayed: If True, each channel will begin relative to it's own \ + pick-time, if set to False, each channel will begin at the same time. :returns: obspy.core.stream.Stream Newly cut template @@ -85,7 +92,7 @@ def from_sac(sac_files, lowcut, highcut, samp_rate, filt_order, length, swin, >>> sac_files = glob.glob('eqcorrscan/tests/test_data/SAC/2014p611252/*') >>> template = from_sac(sac_files=sac_files, lowcut=2.0, highcut=10.0, ... samp_rate=25.0, filt_order=4, length=2.0, - ... swin='all', prepick=0.1) + ... swin='all', prepick=0.1, all_horiz=True) >>> print(template[0].stats.sampling_rate) 25.0 >>> print(len(template)) @@ -110,16 +117,17 @@ def from_sac(sac_files, lowcut, highcut, samp_rate, filt_order, length, swin, event = sactoevent(st, debug=debug) # Process the data st.merge(fill_value='interpolate') - st = pre_processing.shortproc(st, lowcut, highcut, filt_order, - samp_rate, debug) - template = _template_gen(picks=event.picks, st=st, length=length, - swin=swin, prepick=prepick, plot=plot, - debug=debug) + st = pre_processing.shortproc(st=st, lowcut=lowcut, highcut=highcut, + filt_order=filt_order, + samp_rate=samp_rate, debug=debug) + template = template_gen(picks=event.picks, st=st, length=length, + swin=swin, prepick=prepick, plot=plot, + debug=debug, all_horiz=all_horiz, delayed=delayed) return template def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin, - prepick=0.05, debug=0, plot=False): + prepick, all_horiz=False, delayed=True, plot=False, debug=0): """ Generate multiplexed template from a Nordic (Seisan) s-file. Function to read in picks from sfile then generate the template from \ @@ -148,10 +156,17 @@ def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin, defaults file. :type prepick: float :param prepick: Length to extract prior to the pick in seconds. + :type all_horiz: bool + :param all_horiz: To use both horizontal channels even if there is only \ + a pick on one of them. Defaults to False. :type debug: int :param debug: Debug level, higher number=more output. :type plot: bool :param plot: Turns template plotting on or off. + :type delayed: bool + :param delayed: If True, each channel will begin relative to it's own \ + pick-time, if set to False, each channel will begin at the same time. + :returns: obspy.core.stream.Stream Newly cut template @@ -163,10 +178,12 @@ def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin, .. rubric:: Example >>> from eqcorrscan.core.template_gen import from_sfile - >>> sfile = 'eqcorrscan/tests/test_data/REA/TEST_/01-0411-15L.S201309' + >>> import os + >>> sfile = os.path.join('eqcorrscan', 'tests', 'test_data', + ... 'REA', 'TEST_', '01-0411-15L.S201309') >>> template = from_sfile(sfile=sfile, lowcut=5.0, highcut=15.0, ... samp_rate=50.0, filt_order=4, swin='P', - ... prepick=0.2, length=6) + ... prepick=0.2, length=6, all_horiz=True) >>> print(len(template)) 15 >>> print(template[0].stats.sampling_rate) @@ -177,8 +194,9 @@ def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin, from eqcorrscan.core.template_gen import from_sfile import os - sfile = os.path.realpath('../../..') + \ - '/tests/test_data/REA/TEST_/01-0411-15L.S201309' + sfile = os.path.realpath('../../..') + os.sep +\ + os.path.join('tests', 'test_data', 'REA', + 'TEST_', '01-0411-15L.S201309') template = from_sfile(sfile=sfile, lowcut=5.0, highcut=15.0, samp_rate=50.0, filt_order=4, swin='P', prepick=0.2, length=6) @@ -194,7 +212,7 @@ def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin, from obspy import read as obsread # Read in the header of the sfile wavefiles = sfile_util.readwavename(sfile) - pathparts = sfile.split('/')[0:-1] + pathparts = sfile.split(os.sep)[0:-1] new_path_parts = [] for part in pathparts: if part == 'REA': @@ -205,6 +223,11 @@ def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin, main_wav_parts.append(part) if part == 'WAV': break + if main_wav_parts[0] == 'C:': + main_wav_parts[1] = main_wav_parts[0] + os.sep + main_wav_parts[1] + new_path_parts[1] = new_path_parts[0] + os.sep + new_path_parts[1] + main_wav_parts.remove(main_wav_parts[0]) + new_path_parts.remove(new_path_parts[0]) mainwav = os.path.join(*main_wav_parts) + os.path.sep # * argument to allow .join() to accept a list wavpath = os.path.join(*new_path_parts) + os.path.sep @@ -246,20 +269,25 @@ def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin, if debug > 0: print("I have found the following picks") for pick in picks: + if not pick.waveform_id: + continue print(' '.join([pick.waveform_id.station_code, pick.waveform_id.channel_code, pick.phase_hint, str(pick.time)])) # Process waveform data st.merge(fill_value='interpolate') - st = pre_processing.shortproc(st, lowcut, highcut, filt_order, - samp_rate, debug) - st1 = _template_gen(picks=picks, st=st, length=length, swin=swin, - prepick=prepick, plot=plot, debug=debug) + st = pre_processing.shortproc(st=st, lowcut=lowcut, highcut=highcut, + filt_order=filt_order, samp_rate=samp_rate, + debug=debug) + st1 = template_gen(picks=picks, st=st, length=length, swin=swin, + prepick=prepick, all_horiz=all_horiz, + plot=plot, debug=debug, delayed=delayed) return st1 def from_contbase(sfile, contbase_list, lowcut, highcut, samp_rate, filt_order, - length, prepick, swin, debug=0, plot=False): + length, prepick, swin, all_horiz=False, delayed=True, + plot=False, debug=0): """ Generate multiplexed template from a Nordic file using continuous data. @@ -300,10 +328,16 @@ def from_contbase(sfile, contbase_list, lowcut, highcut, samp_rate, filt_order, :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. + :type all_horiz: bool + :param all_horiz: To use both horizontal channels even if there is only \ + a pick on one of them. Defaults to False. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Turns template plotting on or off. + :type delayed: bool + :param delayed: If True, each channel will begin relative to it's own \ + pick-time, if set to False, each channel will begin at the same time. :returns: obspy.Stream Newly cut template """ @@ -326,6 +360,10 @@ def from_contbase(sfile, contbase_list, lowcut, highcut, samp_rate, filt_order, used_picks = [] wavefiles = [] for pick in picks: + if not pick.waveform_id: + print('Pick not associated with waveforms, will not use.') + print(pick) + continue station = pick.waveform_id.station_code channel = pick.waveform_id.channel_code phase = pick.phase_hint @@ -363,22 +401,39 @@ def from_contbase(sfile, contbase_list, lowcut, highcut, samp_rate, filt_order, filt_order=filt_order, samp_rate=samp_rate, starttime=day, debug=debug) # Cut and extract the templates - st1 = _template_gen(picks, st, length, swin, prepick=prepick, plot=plot, - debug=debug) + st1 = template_gen(picks, st, length, swin, prepick=prepick, + all_horiz=all_horiz, plot=plot, debug=debug, + delayed=delayed) return st1 -def from_quakeml(quakeml, st, lowcut, highcut, samp_rate, filt_order, - length, prepick, swin, debug=0, plot=False): +def from_quakeml(meta_file, st, lowcut, highcut, samp_rate, filt_order, + length, prepick, swin, all_horiz=False, delayed=True, + plot=False, debug=0): + """Depreciated wrapper.""" + warnings.warn('from_quakeml is depreciated, please use from_meta_file') + templates = from_meta_file(meta_file=meta_file, st=st, lowcut=lowcut, + highcut=highcut, samp_rate=samp_rate, + filt_order=filt_order, length=length, + prepick=prepick, swin=swin, debug=debug, + plot=plot, all_horiz=all_horiz, + delayed=delayed) + return templates + + +def from_meta_file(meta_file, st, lowcut, highcut, samp_rate, filt_order, + length, prepick, swin, all_horiz=False, delayed=True, + plot=False, debug=0): """ Generate a multiplexed template from a local quakeML file. Function to generate a template from a local quakeml file \ and an obspy.Stream object. - :type quakeml: str - :param quakeml: QuakeML file containing pick information, can contain \ - multiple events. + :type meta_file: str + :param meta_file: File containing pick information, can contain \ + multiple events. File must be formatted in a way readable by \ + obspy.core.event.read_events. :type st: obspy.core.stream.Stream :param st: Stream containing waveform data for template (hopefully). \ Note that this should be the same length of stream as you will use \ @@ -403,10 +458,16 @@ def from_quakeml(quakeml, st, lowcut, highcut, samp_rate, filt_order, :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. + :type all_horiz: bool + :param all_horiz: To use both horizontal channels even if there is only \ + a pick on one of them. Defaults to False. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Display template plots or not + :type delayed: bool + :param delayed: If True, each channel will begin relative to it's own \ + pick-time, if set to False, each channel will begin at the same time. :returns: list of obspy.Stream Newly cut templates @@ -418,20 +479,21 @@ def from_quakeml(quakeml, st, lowcut, highcut, samp_rate, filt_order, .. rubric:: Example >>> from obspy import read - >>> from eqcorrscan.core.template_gen import from_quakeml + >>> from eqcorrscan.core.template_gen import from_meta_file >>> st = read('eqcorrscan/tests/test_data/WAV/TEST_/' + ... '2013-09-01-0410-35.DFDPC_024_00') >>> quakeml = 'eqcorrscan/tests/test_data/20130901T041115.xml' - >>> templates = from_quakeml(quakeml=quakeml, st=st, lowcut=2.0, - ... highcut=9.0, samp_rate=20.0, filt_order=3, - ... length=2, prepick=0.1, swin='S') + >>> templates = from_meta_file(meta_file=quakeml, st=st, lowcut=2.0, + ... highcut=9.0, samp_rate=20.0, filt_order=3, + ... length=2, prepick=0.1, swin='S', + ... all_horiz=True) >>> print(len(templates[0])) 15 """ # Perform some checks first import os import warnings - if not os.path.isfile(quakeml): + if not os.path.isfile(meta_file): raise IOError('QuakeML file does not exist') import obspy if int(obspy.__version__.split('.')[0]) >= 1: @@ -452,19 +514,24 @@ def from_quakeml(quakeml, st, lowcut, highcut, samp_rate, filt_order, else: daylong = False if daylong: - st = pre_processing.dayproc(st, lowcut, highcut, filt_order, - samp_rate, debug=debug, + st = pre_processing.dayproc(st=st, lowcut=lowcut, highcut=highcut, + filt_order=filt_order, samp_rate=samp_rate, + debug=debug, starttime=UTCDateTime(st[0].stats. starttime.date)) else: - st = pre_processing.shortproc(st, lowcut, highcut, filt_order, - samp_rate, debug=debug) + st = pre_processing.shortproc(st=st, lowcut=lowcut, highcut=highcut, + filt_order=filt_order, + samp_rate=samp_rate, debug=debug) data_start = min([tr.stats.starttime for tr in st]) data_end = max([tr.stats.endtime for tr in st]) # Read QuakeML file into Catalog class - catalog = read_events(quakeml) + catalog = read_events(meta_file) templates = [] for event in catalog: + if len(event.picks) == 0: + warnings.warn('No picks for event %s' % event.resource_id) + continue use_event = True # Check that the event is within the data for pick in event.picks: @@ -482,6 +549,10 @@ def from_quakeml(quakeml, st, lowcut, highcut, samp_rate, filt_order, if debug > 0: print("I have found the following picks") for pick in event.picks: + if not pick.waveform_id: + print('Pick not associated with waveforms, will not use.') + print(pick) + continue if debug > 0: print(' '.join([pick.waveform_id.station_code, pick.waveform_id.channel_code, @@ -497,14 +568,16 @@ def from_quakeml(quakeml, st, lowcut, highcut, samp_rate, filt_order, channels[i]) st1 = st.copy() # Cut and extract the templates - template = _template_gen(event.picks, st1, length, swin, - prepick=prepick, plot=plot, debug=debug) + template = template_gen(event.picks, st1, length, swin, + prepick=prepick, plot=plot, debug=debug, + all_horiz=all_horiz, delayed=delayed) templates.append(template) return templates def from_seishub(catalog, url, lowcut, highcut, samp_rate, filt_order, - length, prepick, swin, debug=0, plot=False): + length, prepick, swin, process_len=86400, data_pad=90, + all_horiz=False, delayed=True, debug=0, plot=False): """ Generate multiplexed template from SeisHub database. Function to generate templates from a SeisHub database. Must be given \ @@ -535,12 +608,28 @@ def from_seishub(catalog, url, lowcut, highcut, samp_rate, filt_order, :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. + :type process_len: int + :param process_len: Length of data in seconds to download and process. + :param data_pad: Length of data (in seconds) required before and after \ + any event for processing, use to reduce edge-effects of filtering on \ + the templates. + :type data_pad: int + :type all_horiz: bool + :param all_horiz: To use both horizontal channels even if there is only \ + a pick on one of them. Defaults to False. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Plot templates or not. + :type delayed: bool + :param delayed: If True, each channel will begin relative to it's own \ + pick-time, if set to False, each channel will begin at the same time. :returns: obspy.core.stream.Stream Newly cut template + + .. note:: process_len should be set to the same length as used when \ + computing detections using match_filter.match_filter, e.g. if you read \ + in day-long data fro match_filter, process_len should be 86400. """ # This import section copes with namespace changes between obspy versions import obspy @@ -552,34 +641,40 @@ def from_seishub(catalog, url, lowcut, highcut, samp_rate, filt_order, from obspy import UTCDateTime client = Client(url, timeout=10) temp_list = [] - for event in catalog: + sub_catalogs = _group_events(catalog=catalog, process_len=process_len, + data_pad=data_pad) + for sub_catalog in sub_catalogs: # Figure out which picks we have - day = event.origins[0].time - picks = event.picks + all_waveform_info = [] + for event in sub_catalog: + for pick in event.picks: + if not pick.waveform_id: + print('Pick not associated with waveforms, will not use.') + print(pick) + continue + all_waveform_info.append(pick.waveform_id) + _all_waveform_info = [] + for w in all_waveform_info: + _all_waveform_info.append((w.network_code, + w.station_code, + w.channel_code, + w.location_code)) + all_waveform_info = list(set(_all_waveform_info)) + del _all_waveform_info + all_waveform_info.sort() print("Fetching the following traces from SeisHub") - for pick in picks: - if pick.waveform_id.network_code: - net = pick.waveform_id.network_code - else: - raise IOError('No network code defined for pick: ' + pick) - if pick.waveform_id.station_code: - sta = pick.waveform_id.station_code - else: - raise IOError('No station code defined for pick: ' + pick) - if pick.waveform_id.channel_code: - chan = pick.waveform_id.channel_code - else: - raise IOError('No channel code defined for pick: ' + pick) - if pick.waveform_id.location_code: - loc = pick.waveform_id.location_code - else: - loc = '*' - starttime = UTCDateTime(pick.time.date) - endtime = starttime + 86400 - # Here we download a full day of data. We do this so that minor - # differences in processing during processing due to the effect - # of resampling do not impinge on our cross-correlations. - + for waveform_info in all_waveform_info: + net = waveform_info[0] + sta = waveform_info[1] + chan = waveform_info[2] + loc = waveform_info[3] + if not loc: + loc = '' + starttime = UTCDateTime(sub_catalog[0].origins[0].time - + data_pad) + endtime = starttime + process_len + if not endtime > sub_catalog[-1].origins[0].time + data_pad: + raise IOError('Events do not fit in processing window') if debug > 0: print('start-time: ' + str(starttime)) print('end-time: ' + str(endtime)) @@ -588,30 +683,40 @@ def from_seishub(catalog, url, lowcut, highcut, samp_rate, filt_order, if sta in client.waveform.get_station_ids(network=net): if 'st' not in locals(): st = client.waveform.get_waveform(net, sta, loc, chan, - starttime, endtime) + starttime, endtime) else: st += client.waveform.get_waveform(net, sta, loc, chan, - starttime, endtime) + starttime, endtime) else: print('Station not found in SeisHub DB') if len(st) == 0: raise IOError('No waveforms found') if debug > 0: st.plot() - print('Preprocessing data for event: '+str(event.resource_id)) + print('Pre-processing data for event: '+str(event.resource_id)) st.merge(fill_value='interpolate') - st1 = pre_processing.dayproc(st, lowcut, highcut, filt_order, - samp_rate, starttime=starttime, - debug=debug) - template = _template_gen(event.picks, st1, length, swin, prepick, - plot=plot, debug=debug) - del st, st1 - temp_list.append(template) + # clients download chunks, we need to assert that the data are + # the desired length + for tr in st: + tr.trim(starttime, endtime) + print(len(tr)) + st1 = pre_processing.shortproc(st=st, lowcut=lowcut, highcut=highcut, + filt_order=filt_order, + samp_rate=samp_rate, debug=debug, + parallel=True) + for event in sub_catalog: + template = template_gen(picks=event.picks, st=st1, length=length, + swin=swin, prepick=prepick, + all_horiz=all_horiz, plot=plot, + debug=debug, delayed=delayed) + del st, st1 + temp_list.append(template) return temp_list def from_client(catalog, client_id, lowcut, highcut, samp_rate, filt_order, - length, prepick, swin, debug=0, plot=False): + length, prepick, swin, process_len=86400, data_pad=90, + all_horiz=False, delayed=True, plot=False, debug=0): """ Generate multiplexed template from FDSN client. Function to generate templates from an FDSN client. Must be given \ @@ -643,13 +748,29 @@ def from_client(catalog, client_id, lowcut, highcut, samp_rate, filt_order, :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. + :type process_len: int + :param process_len: Length of data in seconds to download and process. + :param data_pad: Length of data (in seconds) required before and after \ + any event for processing, use to reduce edge-effects of filtering on \ + the templates. + :type data_pad: int + :type all_horiz: bool + :param all_horiz: To use both horizontal channels even if there is only \ + a pick on one of them. Defaults to False. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Plot templates or not. + :type delayed: bool + :param delayed: If True, each channel will begin relative to it's own \ + pick-time, if set to False, each channel will begin at the same time. :returns: obspy.core.stream.Stream Newly cut template + .. note:: process_len should be set to the same length as used when \ + computing detections using match_filter.match_filter, e.g. if you read \ + in day-long data for match_filter, process_len should be 86400. + .. rubric:: Example >>> import obspy @@ -667,11 +788,11 @@ def from_client(catalog, client_id, lowcut, highcut, samp_rate, filt_order, >>> templates = from_client(catalog=catalog, client_id='NCEDC', ... lowcut=2.0, highcut=9.0, samp_rate=20.0, ... filt_order=4, length=3.0, prepick=0.15, - ... swin='all') - Fetching the following traces from NCEDC + ... swin='all', process_len=300, + ... all_horiz=True) BG.CLV..DPZ BK.BKS.00.HHZ - Pre-processing data for event: quakeml:nc.anss.org/Event/NC/72572665 + Pre-processing data >>> templates[0].plot(equal_scale=False, size=(800,600)) # doctest: +SKIP .. figure:: ../../plots/template_gen.from_client.png @@ -690,18 +811,34 @@ def from_client(catalog, client_id, lowcut, highcut, samp_rate, filt_order, client = Client(client_id) temp_list = [] - for event in catalog: - # Figure out which picks we have - day = event.origins[0].time - print("Fetching the following traces from " + client_id) + # Group catalog into days and only download the data once per day + sub_catalogs = _group_events(catalog=catalog, process_len=process_len, + data_pad=data_pad) + for sub_catalog in sub_catalogs: + all_waveform_info = [] + for event in sub_catalog: + for pick in event.picks: + if not pick.waveform_id: + print('Pick not associated with waveforms, will not use.') + print(pick) + continue + all_waveform_info.append(pick.waveform_id) + all_waveform_info = list(set([(w.network_code, w.station_code, + w.channel_code, w.location_code) + for w in all_waveform_info])) + all_waveform_info.sort() dropped_pick_stations = 0 - for pick in event.picks: - net = pick.waveform_id.network_code - sta = pick.waveform_id.station_code - chan = pick.waveform_id.channel_code - loc = pick.waveform_id.location_code - starttime = UTCDateTime(pick.time.date) - endtime = starttime + 86400 + for waveform_info in all_waveform_info: + net = waveform_info[0] + sta = waveform_info[1] + chan = waveform_info[2] + loc = waveform_info[3] + starttime = UTCDateTime(sub_catalog[0].origins[0].time - + data_pad) + endtime = starttime + process_len + # Check that endtime is after the last event + if not endtime > sub_catalog[-1].origins[0].time + data_pad: + raise IOError('Events do not fit in processing window') # Here we download a full day of data. We do this so that minor # differences in processing during processing due to the effect # of resampling do not impinge on our cross-correlations. @@ -729,22 +866,32 @@ def from_client(catalog, client_id, lowcut, highcut, samp_rate, filt_order, st.plot() if not st and dropped_pick_stations == len(event.picks): raise FDSNException('No data available, is the server down?') - print('Pre-processing data for event: '+str(event.resource_id)) + print('Pre-processing data') st.merge(fill_value='interpolate') - st1 = pre_processing.dayproc(st, lowcut, highcut, filt_order, - samp_rate, starttime=starttime, - debug=debug, parallel=True) + # clients download chunks, we need to assert that the data are + # the desired length + for tr in st: + tr.trim(starttime, endtime) + if len(tr.data) == (process_len * tr.stats.sampling_rate) + 1: + tr.data = tr.data[1:len(tr.data)] + st1 = pre_processing.shortproc(st=st, lowcut=lowcut, highcut=highcut, + filt_order=filt_order, + samp_rate=samp_rate, + debug=debug, parallel=True) if debug > 0: st1.plot() - template = _template_gen(event.picks, st1, length, swin, prepick, - plot=plot, debug=debug) + for event in sub_catalog: + template = template_gen(picks=event.picks, st=st1, length=length, + swin=swin, prepick=prepick, + plot=plot, debug=debug, + all_horiz=all_horiz, delayed=delayed) + temp_list.append(template) del st, st1 - temp_list.append(template) return temp_list def multi_template_gen(catalog, st, length, swin='all', prepick=0.05, - plot=False, debug=0): + all_horiz=False, delayed=True, plot=False, debug=0): """ Generate multiple templates from one stream of data. Thin wrapper around _template_gen to generate multiple templates from \ @@ -762,10 +909,16 @@ def multi_template_gen(catalog, st, length, swin='all', prepick=0.05, :type prepick: float :param prepick: Length in seconds to extract before the pick time \ default is 0.05 seconds + :type all_horiz: bool + :param all_horiz: To use both horizontal channels even if there is only \ + a pick on one of them. Defaults to False. :type plot: bool :param plot: To plot the template or not, default is True :type debug: int :param debug: Debug output level from 0-5. + :type delayed: bool + :param delayed: If True, each channel will begin relative to it's own \ + pick-time, if set to False, each channel will begin at the same time. :returns: list of :class: obspy.core.Stream newly cut templates @@ -787,6 +940,11 @@ def multi_template_gen(catalog, st, length, swin='all', prepick=0.05, for event in working_catalog: picks = event.picks for pick in picks: + if not pick.waveform_id: + print('Pick not associated with waveforms, will not use.') + print(pick) + picks.remove(pick) + continue if st[0].stats.starttime < pick.time < st[0].stats.endtime: pick_stachan = (pick.waveform_id.station_code, pick.waveform_id.channel_code) @@ -799,14 +957,25 @@ def multi_template_gen(catalog, st, length, swin='all', prepick=0.05, picks.remove(pick) if len(picks) > 0: st_clip = st.copy() - template = _template_gen(picks, st_clip, length, swin, - prepick, plot, debug) + template = template_gen(picks=picks, st=st_clip, length=length, + swin=swin, prepick=prepick, plot=plot, + debug=debug, all_horiz=all_horiz, + delayed=delayed) templates.append(template) return templates def _template_gen(picks, st, length, swin='all', prepick=0.05, plot=False, - debug=0): + all_horiz=False, debug=0): + warnings.warn('_template_gen is depreciated, please use template_gen') + st1 = template_gen(picks=picks, st=st, length=length, swin=swin, + prepick=prepick, all_horiz=all_horiz, plot=plot, + debug=debug) + return st1 + + +def template_gen(picks, st, length, swin='all', prepick=0.05, + all_horiz=False, delayed=True, plot=False, debug=0): """ Master function to generate a multiplexed template for a single event. Function to generate a cut template in the obspy \ @@ -814,9 +983,10 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05, plot=False, class. Should be given pre-processed data (downsampled and filtered). :type picks: list - :param picks: Picks to extract data around + :param picks: Picks to extract data around, where each pick in the \ + list is an obspy.core.event.origin.Pick object. :type st: obspy.core.stream.Stream - :param st: Stream to etract templates from + :param st: Stream to extract templates from :type length: float :param length: Length of template in seconds :type swin: str @@ -824,10 +994,16 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05, plot=False, :type prepick: float :param prepick: Length in seconds to extract before the pick time \ default is 0.05 seconds + :type all_horiz: bool + :param all_horiz: To use both horizontal channels even if there is only \ + a pick on one of them. Defaults to False. :type plot: bool :param plot: To plot the template or not, default is True :type debug: int :param debug: Debug output level from 0-5. + :type delayed: bool + :param delayed: If True, each channel will begin relative to it's own \ + pick-time, if set to False, each channel will begin at the same time. :returns: obspy.core.stream.Stream Newly cut template. @@ -842,20 +1018,29 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05, plot=False, .. warning:: If there is no phase_hint included in picks, and swin=all, \ all channels with picks will be used. """ - import copy from eqcorrscan.utils.plotting import pretty_template_plot as\ tplot from obspy import Stream import warnings + import numpy as np + import copy + stations = [] channels = [] st_stachans = [] + picks_copy = copy.deepcopy(picks) # Work on a copy of the picks and leave + # the users picks intact. if swin not in ['P', 'all', 'S']: raise IOError('Phase type is not in [all, P, S]') - for pick in picks: + for pick in picks_copy: + if not pick.waveform_id: + print('Pick not associated with waveform, will not use it.') + print(pick) + picks_copy.remove(pick) + continue # Check to see that we are only taking the appropriate picks if swin == 'all': - # Annoying compatability with seisan two channel codes + # Annoying comparability with seisan two channel codes stations.append(pick.waveform_id.station_code) channels.append(pick.waveform_id.channel_code[0] + pick.waveform_id.channel_code[-1]) @@ -869,7 +1054,15 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05, plot=False, channels.append(pick.waveform_id.channel_code[0] + pick.waveform_id.channel_code[-1]) for tr in st: - st_stachans.append('.'.join([tr.stats.station, tr.stats.channel])) + # Check that the data can be represented by float16, and check they + # are not all zeros + if np.all(tr.data.astype(np.float16) == 0): + warnings.warn('Trace is all zeros at float16 level,' + 'either gain or check. Not using in template.') + print(tr) + st.remove(tr) + else: + st_stachans.append('.'.join([tr.stats.station, tr.stats.channel])) for i, station in enumerate(stations): if '.'.join([station, channels[i]]) not in st_stachans and debug > 0: warnings.warn('No data provided for ' + station + '.' + @@ -878,7 +1071,7 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05, plot=False, for tr in st: if tr.stats.station in stations: # This is used to cope with seisan handling channel codes as - # two charectar codes, internally we will do the same. + # two character codes, internally we will do the same. if len(tr.stats.channel) == 3: temp_channel = tr.stats.channel[0] + tr.stats.channel[2] elif len(tr.stats.channel) == 2: @@ -898,12 +1091,15 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05, plot=False, del st1 if plot: stplot = st.copy() + # Get the earliest pick-time and use that if we are not using delayed. + event_start_time = min([pick.time for pick in picks_copy]) + event_start_time -= prepick # Cut the data for tr in st: if 'starttime' in locals(): del starttime if swin == 'all': - for pick in picks: + for pick in picks_copy: if not pick.phase_hint: msg = 'Pick for ' + pick.waveform_id.station_code + '.' +\ pick.waveform_id.channel_code + ' has no phase ' +\ @@ -927,16 +1123,24 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05, plot=False, elif pick.waveform_id.station_code == tr.stats.station and\ tr.stats.channel[-1] in ['1', '2', 'N', 'E', 'R', 'T'] and\ - 'S' in pick.phase_hint.upper(): + 'S' in pick.phase_hint.upper() and\ + all_horiz: + starttime = pick.time - prepick + elif pick.waveform_id.station_code == tr.stats.station and\ + pick.waveform_id.channel_code[0] + \ + pick.waveform_id.channel_code[-1] == \ + tr.stats.channel: starttime = pick.time - prepick else: - for pick in picks: + for pick in picks_copy: if pick.waveform_id.station_code == tr.stats.station and\ swin in pick.phase_hint.upper(): starttime = pick.time - prepick if 'starttime' in locals(): if debug > 0: print("Cutting " + tr.stats.station + '.' + tr.stats.channel) + if not delayed: + starttime = event_start_time tr.trim(starttime=starttime, endtime=starttime + length, nearest_sample=False) if debug > 0: @@ -958,7 +1162,7 @@ def _template_gen(picks, st, length, swin='all', prepick=0.05, plot=False, 10) tplot(st1, background=background, title='Template for '+str(st1[0].stats.starttime), - picks=picks) + picks=picks_copy) del stplot del st # st1.plot(size=(800,600)) @@ -1028,9 +1232,12 @@ def extract_from_stack(stack, template, length, pre_pick, pre_pad, for tr in new_template: # Process the data if necessary if not pre_processed: - new_template = pre_processing.shortproc(new_template, lowcut, - highcut, filt_order, - samp_rate, 0) + new_template = pre_processing.shortproc(st=new_template, + lowcut=lowcut, + highcut=highcut, + filt_order=filt_order, + samp_rate=samp_rate, + debug=0) # Find the matching delay delay = [d[2] for d in delays if d[0] == tr.stats.station and d[1] == tr.stats.channel[-1]] @@ -1053,6 +1260,42 @@ def extract_from_stack(stack, template, length, pre_pick, pre_pad, return new_template +def _group_events(catalog, process_len, data_pad): + """ + Internal function to group events into sub-catalogs based on process_len. + + :param catalog: Catalog to groups into sub-catalogs + :type catalog: obspy.core.event.Catalog + :param process_len: Length in seconds that data will be processed in + :type process_len: int + :param data_pad: Length of data (in seconds) required before and after \ + any event for processing, use to reduce edge-effects of filtering on \ + the templates. + :type data_pad: int + + :return: List of catalogs + :rtype: list + """ + from obspy.core.event import Catalog + # case for catalog only containing one event + if len(catalog) == 1: + return [catalog] + sub_catalogs = [] + # Sort catalog by date + cat_list = [(event, event.origins[0].time) for event in catalog] + cat_list.sort(key=lambda tup: tup[1]) + catalog = Catalog([tup[0] for tup in cat_list]) + sub_catalog = Catalog([catalog[0]]) + for event in catalog[1:]: + if (event.origins[0].time + data_pad) - \ + (sub_catalog[0].origins[0].time - data_pad) < process_len: + sub_catalog.append(event) + else: + sub_catalogs.append(sub_catalog) + sub_catalog = Catalog([event]) + sub_catalogs.append(sub_catalog) + return sub_catalogs + if __name__ == "__main__": import doctest doctest.testmod() diff --git a/eqcorrscan/detections/20090512_20090512 b/eqcorrscan/detections/20090512_20090512 deleted file mode 100644 index 93f7c62ef..000000000 --- a/eqcorrscan/detections/20090512_20090512 +++ /dev/null @@ -1,3 +0,0 @@ -template, detect-time, cccsum, threshold, number of channels -2009-05-12T00:09:50.300000Z.ms, 2009-05-12T00:09:50.300000Z, 10.2474736571, 3.52316607535, 13 - diff --git a/eqcorrscan/doc/core.rst b/eqcorrscan/doc/core.rst index d6b8ea16e..ea1634365 100644 --- a/eqcorrscan/doc/core.rst +++ b/eqcorrscan/doc/core.rst @@ -13,8 +13,8 @@ miniseed files, where each file is a single template. .. _Obspy: http://docs.obspy.org/ -**Brightness** - +Brightness +---------- bright_lights_ contains a series of functions to detect events using the brightness-based beamforming method of `Frank et. al (2014)`_. This has been tested significantly, but has failed to detect events unambiguously in the @@ -23,8 +23,8 @@ central Southern Alps. As such development of these functions has ceased. .. _bright_lights: submodules/core.bright_lights.html .. _Frank et. al (2014): http://gji.oxfordjournals.org/content/197/2/1215.short -**Template generation** - +Template generation +------------------- template_gen_ contains routines for cutting waveforms around picks for use as templates in match_filter_. Included in this are wrappers to directly read in Seisan formattaed pick files and waveforms associated with the picks, and @@ -33,8 +33,8 @@ and catalogs, and seishub databases. .. _template_gen: submodules/core.template_gen.html -**Matched-Filter** - +Matched-Filter +-------------- match_filter_ contains the core routines for earthquake detection by cross-correlation. This is optimized for large-scale, multi-paralleled detection, with large numbers of templates. Because we are unsure of your @@ -47,12 +47,47 @@ the batch job submission capability which distributes daily detections across multiple nodes. This allows us to detect earthquakes through > 6 years of multi-channel data using > 600 templates in less than 36 hours. +Of note: EQcorrscan does not enforce a length of data to process, it is up to the +user to exercise caution when thresholding cross-correlation sums. As the figure +below shows, if using the median absolute deviation (MAD) thresholding metric +the user should be aware that this changes with time, and those variations +can be significant when using short windows of data. + + +.. figure:: plots/range_of_threshold_windows_Parkfield1.png + :width: 800px + :align: center + :alt: plots/range_of_threshold_windows_Parkfield1.png + + Plot of a few hours of cross-correlation sum (black) for a five channel template + associated with the Parkfield 2004 earthquake with various windowed thresholds, + set to 8 x Median Absolute Deviation. x-axis is time in hours, y-axis is + cross-correlation sum. + .. _match_filter: submodules/core.match_filter.html +Lag-Calc +-------- +lag_calc_ contains functions for generating pick-corrections from +cross-correlations with a defined template. Originally this was designed +for events detected by match_filter_, however you can use any well correlated +events. Based on the method of `Shelly and Hardebeck (2010)`_. + +.. _lag_calc: submodules/core.lag_calc.html +.. _Shelly and Hardebeck (2010): http://onlinelibrary.wiley.com/doi/10.1029/2010GL043672/full + + +Subspace +-------- +subspace_ contains a subspace detector for either single-channel cases, or +network cases. This is modelled on that described by Harris_. This method +allows for slightly more variation in detected waveforms than the traditional +matched-filter method. In this method, templates are constructed either by +using the empirical subspace method, or by computing the basis vectors by +singular-value decomposition. Both methods are provided as part of EQcorrscan +in the clustering_ module. -.. toctree:: - :maxdepth: 1 +.. _subspace: submodules/core.subspace.html +.. _Harris: https://e-reports-ext.llnl.gov/pdf/335299.pdf +.. _clustering: submodules/utils.clustering.html - submodules/core.bright_lights - submodules/core.template_gen - submodules/core.match_filter \ No newline at end of file diff --git a/eqcorrscan/doc/index.rst b/eqcorrscan/doc/index.rst index 54d909bf4..f17f99a16 100644 --- a/eqcorrscan/doc/index.rst +++ b/eqcorrscan/doc/index.rst @@ -1,22 +1,19 @@ -.. EQcorrscan documentation master file, created by - sphinx-quickstart on Mon Mar 23 21:20:41 2015. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to EQcorrscan's documentation -===================================== - .. image:: EQcorrscan_logo.png - :width: 300px + :width: 600px :align: left :alt: EQcorrscan_logo.png :target: https://github.com/calum-chamberlain/EQcorrscan/releases EQcorrscan ----------- +========== -A Python package to conduct matched-filter earthquake detections. Codes are stored -on github, the master and development branches are |github_link|, or the latest stable(ish) release +A Python package for the detection and analysis of repeating and near-repeating seismicity. +EQcorrscan contains an efficient, multi-parallel, +:doc:`matched-filter ` detection routine, as well as +routines to implement :doc:`subspace ` detection, +and detection based on :doc:`brightness `. + +Code is stored on github, the development branches are |github_link|, or the latest stable release can be found |releases_link|. .. |releases_link| raw:: html @@ -27,20 +24,12 @@ can be found |releases_link|. on github -This package contains routines to enable the user to conduct :doc:`matched-filter ` earthquake -detections, and do some fun things with the detections (stacking, clustering, -singular-value decomposition, pick correction...). - -EQcorrscan uses -|Obspy_link| bindings when reading and writing seismic data, and for handling most +EQcorrscan uses |Obspy_link| bindings when reading and writing seismic data, and for handling most of the event metadata, which ensures that detections can be easily migrated between softwares. Matched-filter correlations are calculated using |opencv_link|. OpenCV is not installed alongside EQcorrscan and must be installed before using this package. -This package was written to implement the matlab routines -used by Chamberlain et al. (2014) for the detection of low-frequency earthquakes. - .. |Obspy_link| raw:: html Obspy @@ -62,9 +51,9 @@ Also within this package are: Seisan -This package is written by Calum Chamberlain of Victoria University of Wellington, and -is distributed under the LGPL GNU Licence, Copyright Calum Chamberlain & -Chet Hopp 2015 & 2016. +This package is written by Calum Chamberlain and Chet Hopp of Victoria +University of Wellington, New Zealand, and is distributed under the LGPL GNU +Licence, Copyright Calum Chamberlain & Chet Hopp 2015 & 2016. References ---------- diff --git a/eqcorrscan/doc/intro.rst b/eqcorrscan/doc/intro.rst index 2a4885688..faf5488ce 100644 --- a/eqcorrscan/doc/intro.rst +++ b/eqcorrscan/doc/intro.rst @@ -2,14 +2,14 @@ Introduction to the EQcorrscan package ====================================== This document is designed to give you an overview of the capabilities and -implementation of the EQcorrscan python module. +implementation of the EQcorrscan Python module. Why EQcorrscan? --------------- -EQcorrscan is designed to compute matched-filter detections of earthquakes, -or any seismic signal (explosions work *really* well) by comparing templates -with continuous data. The main benefit of EQcorrscan is the level of -parallel processing that can be achieved. By exploiting the fact that each template +EQcorrscan is designed to compute detections of earthquakes, or any seismic signal +(explosions work *really* well) by comparing templates with continuous data. +The main benefit of EQcorrscan's matched-filter routine is the level of parallel +processing that can be achieved. By exploiting the fact that each template does not rely on any other template, detections from a single template through a day of seismic data can be computed in parallel. By computing these in parallel rather than a single template through multiple days we reduce IO load. At a low @@ -23,23 +23,27 @@ development of software for the detection and analysis of repeating and near-repeating earthquakes. This repository will continue to grow and develop and any and all help/criticism will be appreciated. -We have a long way to go with this project - if you want to get involved the -best place to start, and the most valuable thing for your understanding, and -for the health of this repository would be to contribute tests and -documentation. Ideally we would like to have one test for every function! +There are a lot of things that could be added to this project - if you want to +get involved the best place to start, and the most valuable thing for your +understanding, and for the health of this package would be to contribute tests and +documentation. Installation ------------ In general we recommend users to install EQcorrscan in a virtual environment, -for this the virtualenvwrapper package is handy. +for this the |virtualenvwrapper| package is handy. Within a virtual environment, a fresh install should be as simple as: **pip install eqcorrscan** -Most codes should work without any effort on your part. However you may need to -install the openCV-python package yourself. +Most codes should work without any effort on your part. However you will need to +install the openCV-python package yourself. We recommend installing openCV version +3, and we recommend installing it from source - it is available via anaconda, but +it will run faster if you compile it yourself, and it will give more consistent +results. See |pyimagesearch| for details for install on all operating systems +(including raspberry pi, which EQcorrscan runs on too :) ). On Linux with Python 2.7: @@ -56,6 +60,10 @@ You can also install from source; for Python 3 this is a must as you will have to install openCV 3. |pyimagesearch| has lots of lovely tutorials like this |cv3_ubuntu|. +.. |virtualenvwrapper| raw:: html + + virtualenvwrapper + .. |pyimagesearch| raw:: html pyimagesearch @@ -89,7 +97,7 @@ required to give. Supported environments ---------------------- -We support Linux, OSX and Windows environments running Python 2.7 and 3.5. +We support Linux, OSX and Windows environments running Python 2.7, 3.4 and 3.5. We don't run our tests on other versions of Python so you might have some issues with other Python 3.x series, if you do, let us know. diff --git a/eqcorrscan/doc/plots/detection_multiplot.png b/eqcorrscan/doc/plots/detection_multiplot.png new file mode 100644 index 000000000..9bd95c6ff Binary files /dev/null and b/eqcorrscan/doc/plots/detection_multiplot.png differ diff --git a/eqcorrscan/doc/plots/multi_event_singlechan.png b/eqcorrscan/doc/plots/multi_event_singlechan.png new file mode 100644 index 000000000..7c559146e Binary files /dev/null and b/eqcorrscan/doc/plots/multi_event_singlechan.png differ diff --git a/eqcorrscan/doc/plots/plot_repicked.png b/eqcorrscan/doc/plots/plot_repicked.png new file mode 100644 index 000000000..0d2463112 Binary files /dev/null and b/eqcorrscan/doc/plots/plot_repicked.png differ diff --git a/eqcorrscan/doc/plots/range_of_threshold_windows_Parkfield1.png b/eqcorrscan/doc/plots/range_of_threshold_windows_Parkfield1.png new file mode 100644 index 000000000..0a7f92bea Binary files /dev/null and b/eqcorrscan/doc/plots/range_of_threshold_windows_Parkfield1.png differ diff --git a/eqcorrscan/doc/plots/triple_plot.png b/eqcorrscan/doc/plots/triple_plot.png new file mode 100644 index 000000000..8e1cbe17f Binary files /dev/null and b/eqcorrscan/doc/plots/triple_plot.png differ diff --git a/eqcorrscan/doc/plots/xcorr_plot.png b/eqcorrscan/doc/plots/xcorr_plot.png new file mode 100644 index 000000000..26d0f2dc1 Binary files /dev/null and b/eqcorrscan/doc/plots/xcorr_plot.png differ diff --git a/eqcorrscan/doc/submodules/core.lag_calc.rst b/eqcorrscan/doc/submodules/core.lag_calc.rst new file mode 100644 index 000000000..6d6077c9a --- /dev/null +++ b/eqcorrscan/doc/submodules/core.lag_calc.rst @@ -0,0 +1,29 @@ +lag_calc +-------- + +.. currentmodule:: eqcorrscan.core.lag_calc +.. automodule:: eqcorrscan.core.lag_calc + + .. comment to end block + + Classes & Functions + ------------------- + .. autosummary:: + :toctree: autogen + :nosignatures: + + lag_calc + + .. comment to end block + + Private Functions + ----------------- + Note that these functions are not designed for public use and may change + at any point. + + .. autosummary:: + :toctree: autogen + :nosignatures: + + _channel_loop + _day_loop diff --git a/eqcorrscan/doc/submodules/core.subspace.Detector.rst b/eqcorrscan/doc/submodules/core.subspace.Detector.rst new file mode 100644 index 000000000..69ee27115 --- /dev/null +++ b/eqcorrscan/doc/submodules/core.subspace.Detector.rst @@ -0,0 +1,27 @@ +eqcorrscan.core.subspace.Detector +================================= + +.. currentmodule:: eqcorrscan.core.subspace + +.. autoclass:: Detector + + .. rubric:: Methods + + .. autosummary:: + + construct + detect + energy_capture + partition + read + write + + + .. automethod:: __init__ + .. automethod:: construct + .. automethod:: detect + .. automethod:: energy_capture + .. automethod:: partition + .. automethod:: read + .. automethod:: write + diff --git a/eqcorrscan/doc/submodules/core.subspace.rst b/eqcorrscan/doc/submodules/core.subspace.rst new file mode 100644 index 000000000..c7d4d6b6d --- /dev/null +++ b/eqcorrscan/doc/submodules/core.subspace.rst @@ -0,0 +1,26 @@ +subspace +-------- + +.. currentmodule:: eqcorrscan.core.subspace +.. automodule:: eqcorrscan.core.subspace + + .. comment to end block + + Classes + ------- + .. toctree:: + :maxdepth: 1 + + core.subspace.Detector + + Functions + --------- + .. autosummary:: + :toctree: autogen + :nosignatures: + + read_detector + multi + subspace_detect + + .. comment to end block diff --git a/eqcorrscan/doc/submodules/core.template_gen.rst b/eqcorrscan/doc/submodules/core.template_gen.rst index bd130c40f..4d47a4dcd 100644 --- a/eqcorrscan/doc/submodules/core.template_gen.rst +++ b/eqcorrscan/doc/submodules/core.template_gen.rst @@ -12,11 +12,11 @@ template_gen :toctree: autogen :nosignatures: - _template_gen + template_gen extract_from_stack from_client from_contbase - from_quakeml + from_meta_file from_sac from_seishub from_sfile diff --git a/eqcorrscan/doc/submodules/utils.catalog_utils.rst b/eqcorrscan/doc/submodules/utils.catalog_utils.rst new file mode 100644 index 000000000..68b47d024 --- /dev/null +++ b/eqcorrscan/doc/submodules/utils.catalog_utils.rst @@ -0,0 +1,17 @@ +catalog_utils +------------- + +.. currentmodule:: eqcorrscan.utils.catalog_utils +.. automodule:: eqcorrscan.utils.catalog_utils + + .. comment to end block + + Classes & Functions + ------------------- + .. autosummary:: + :toctree: autogen + :nosignatures: + + filter_picks + + .. comment to end block diff --git a/eqcorrscan/doc/submodules/utils.mag_calc.rst b/eqcorrscan/doc/submodules/utils.mag_calc.rst index dbc717f1d..de45e5314 100644 --- a/eqcorrscan/doc/submodules/utils.mag_calc.rst +++ b/eqcorrscan/doc/submodules/utils.mag_calc.rst @@ -28,13 +28,13 @@ mag_calc at any point. .. autosummary:: - :toctree: autogen - :nosignatures: - - _sim_WA - _pairwise - _max_p2t - _find_resp - _GSE2_PAZ_read + :toctree: autogen + :nosignatures: + + _sim_WA + _pairwise + _max_p2t + _find_resp + _GSE2_PAZ_read .. comment to end block diff --git a/eqcorrscan/doc/submodules/utils.parameters.rst b/eqcorrscan/doc/submodules/utils.parameters.rst new file mode 100644 index 000000000..2a672c569 --- /dev/null +++ b/eqcorrscan/doc/submodules/utils.parameters.rst @@ -0,0 +1,18 @@ +parameters +---------- + +.. currentmodule:: eqcorrscan.utils.parameters +.. automodule:: eqcorrscan.utils.parameters + + .. comment to end block + + Classes & Functions + ------------------- + .. autosummary:: + :toctree: autogen + :nosignatures: + + EQcorrscanParameters + read_parameters + + .. comment to end block diff --git a/eqcorrscan/doc/submodules/utils.picker.rst b/eqcorrscan/doc/submodules/utils.picker.rst index bdb386b0b..8195f4d9d 100644 --- a/eqcorrscan/doc/submodules/utils.picker.rst +++ b/eqcorrscan/doc/submodules/utils.picker.rst @@ -14,6 +14,5 @@ picker cross_net stalta_pick - synth_compare .. comment to end block diff --git a/eqcorrscan/doc/submodules/utils.plotting.rst b/eqcorrscan/doc/submodules/utils.plotting.rst index f0b2af3d6..7e66aabf3 100644 --- a/eqcorrscan/doc/submodules/utils.plotting.rst +++ b/eqcorrscan/doc/submodules/utils.plotting.rst @@ -22,6 +22,7 @@ plotting NR_plot obspy_3d_plot peaks_plot + plot_repicked plot_synth_real pretty_template_plot spec_trace @@ -29,5 +30,6 @@ plotting threeD_gridplot threeD_seismplot triple_plot + xcorr_plot .. comment to end block diff --git a/eqcorrscan/doc/submodules/utils.sfile_util.rst b/eqcorrscan/doc/submodules/utils.sfile_util.rst index 6c2262a1d..40f4bd693 100644 --- a/eqcorrscan/doc/submodules/utils.sfile_util.rst +++ b/eqcorrscan/doc/submodules/utils.sfile_util.rst @@ -13,10 +13,8 @@ sfile_util :nosignatures: blanksfile - eventtopick eventtosfile nordpick - picktoevent populatesfile readheader readpicks diff --git a/eqcorrscan/doc/tutorial.rst b/eqcorrscan/doc/tutorial.rst index bfe9ae659..86155dad6 100644 --- a/eqcorrscan/doc/tutorial.rst +++ b/eqcorrscan/doc/tutorial.rst @@ -1,5 +1,5 @@ -EQcorrscan tutorial -=================== +EQcorrscan tutorials +==================== Welcome to EQcorrscan - this package is designed to compute earthquake detections using a paralleled matched-filter network cross-correlation routine, and analyse the results. @@ -21,23 +21,35 @@ The core sub-module contains the main, high-level functions: :match_filter: The main matched-filter routines, this is split into several smaller functions to allow python-based parallel-processing; +:subspace: + Subspace detection routine based on |Harris2006|. :lag_calc: Routines for calculating optimal lag-times for events detected by the match-filter routine, these lags can then be used to define new picks - for high accuracy re-locations. *Under-development* + for high accuracy re-locations. + +Some other high-level functions are included in the :doc:`utils ` sub-module +and are documented here with tutorials: + +:mag_calc: + Simple local magnitude calculation and high-precision relative moment + calculation using singular-value decomposition. +:clustering: + Routines for clustering earthquakes based on a range of metircs using + agglomorative clustering methods. The :doc:`utils ` sub-module contains useful, but small functions. These functions are rarely cpu intensive, but perform vital operations, such -as reading *Seisan* s-files (:doc:`sfile_util `), +as reading |Seisan| s-files (:doc:`sfile_util `), finding peaks in noisy data (:doc:`findpeaks `), converting a seisan database to hypoDD formatted files and computing cross-correlations between -detections for hypoDD (a double difference relocation software) +detections for |hypoDD| (a double difference relocation software) (:doc:`catalog_to_dd `), calculating magnitudes (:doc:`mag_calc `), clustering detections (:doc:`clustering `), stacking detections (:doc:`stacking `), making pretty plots (:doc:`plotting `), -and processing seismic data in the same way repeatedly using *Obspy*'s +and processing seismic data in the same way repeatedly using |Obspy|'s functionality (:doc:`pre_processing `). What follows is an expanding set of tutorials that should take you @@ -49,5 +61,23 @@ through some of the key functionality of the EQcorrscan package. tutorials/template-creation.rst tutorials/matched-filter.rst + tutorials/subspace.rst + tutorials/lag-calc.rst tutorials/mag-calc.rst tutorials/clustering.rst + +.. |Harris2006| raw:: html + + Harris (2006) + +.. |HypoDD| raw:: html + + HypoDD + +.. |Seisan| raw:: html + + Seisan + +.. |Obspy| raw:: html + + Obspy \ No newline at end of file diff --git a/eqcorrscan/doc/tutorials/lag-calc.rst b/eqcorrscan/doc/tutorials/lag-calc.rst index b1e6f2100..f38d102d3 100644 --- a/eqcorrscan/doc/tutorials/lag-calc.rst +++ b/eqcorrscan/doc/tutorials/lag-calc.rst @@ -1,4 +1,20 @@ -Lag time calculation and pick correction - Unfinished -===================================================== +Lag-time and pick correction +============================ -To be completed +The following is a work-in-progress tutorial for lag-calc functionality. + +An important note +----------------- +Picks generated by lag-calc are relative to the start of the template waveform, +for example, if you generated your templates with a pre_pick of 0.2, you +should expect picks to occur 0.2 seconds before the actual phase arrival. +The result of this is that origin-times will be shifted by the same amount. + +If you have applied different pre_picks to different channels when generating +template (currently not supported by any EQcorrscan functions), then picks +generated here will not give the correct location. + +Advanced Example: Parkfield 2004 +-------------------------------- + +.. literalinclude:: ../../tutorials/lag_calc.py diff --git a/eqcorrscan/doc/tutorials/matched-filter.rst b/eqcorrscan/doc/tutorials/matched-filter.rst index 4438a2364..d2b90ac2d 100644 --- a/eqcorrscan/doc/tutorials/matched-filter.rst +++ b/eqcorrscan/doc/tutorials/matched-filter.rst @@ -28,7 +28,8 @@ can be as simple as: detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8, threshold_type='MAD', - plotvar=False, cores=4) + trig_int=6, plotvar=False, + cores=4, trig_int=6) This will create a list of detections, which are of class detection. You can write out the detections to a csv (colon separated) using the detection.write @@ -41,6 +42,55 @@ if this is set and the file already exists, it will just add on to the old file. detection.write('my_first_detections.csv', append=True) +Memory limitations and what to do about it +------------------------------------------ + +You may (if you are running large numbers of templates, long data durations, or using +a machine with small memory) run in to errors to do with memory consumption. The +most obvious symptom of this is your computer freezing because it has allocated +all of its RAM, or declaring that it cannot allocate memory. Because EQcorrscan +computes correlations in parallel for multiple templates for the same data period, +it will generate a large number of correlation vectors. At start-up, EQcorrscan +will try to assign the memory it needs (although it then requires a little more +later to do the summation across channels), so you might find that it fills your +memory very early - this is just to increase efficiency and ensure that the memory +is available when needed. + +To get around memory limitations you can: + +* Reduce the number of templates you run in parallel at once - for example you can + make groups of a number of templates and run that group in parallel, before running + the next group in parallel. This is not much less efficient, unless you have + a machine with more CPU cores than your group-size. +* Reduce the length of data you are correlating at any one time. The default is + to use day-long files, but there is nothing stopping you using shorter waveform + durations. +* Reduce the number of channels in templates to only those that you need. Note, + EQcorrscan will generate vectors of zeros for templates that are missing a + channel that is present in other templates, again for processing efficiency, + if not memory efficiency. +* Reduce your sampling rate. Obviously this needs to be at-least twice as large + as your upper frequency filter, but much above this is wasted data. + +As an example of this: we run 100, 5-channel templates sampled at 20 Hz through +day-long data on a 128GB RAM machine without issue, however, running 200 templates +is too much memory. + +The three threshold parameters +------------------------------ + +The match-filter routine has three key threshold parameters: + +* **threshold_type** can either be MAD, abs or av_chan_corr. MAD stands for Median Absolute + Deviation and is the most commonly used detection statistic in matched-filter studies. + abs is the absolute cross-channel correlation sum, note that if you have different + numbers of channels in your templates then this threshold metric probably isn't for you. + av_chan_corr sets a threshold in the cross-channel correlation sum based on av_chan_corr x number of channels. +* **threshold** is the value used for the above metric. +* **trig_int** is the minimum interval in seconds for a detection using the same template. + If there are multiple detections within this window for a single template then EQcorrscan + will only give the best one (that exceeds the threshold the most). + Advanced example ---------------- diff --git a/eqcorrscan/doc/tutorials/subspace.rst b/eqcorrscan/doc/tutorials/subspace.rst new file mode 100644 index 000000000..067a0575d --- /dev/null +++ b/eqcorrscan/doc/tutorials/subspace.rst @@ -0,0 +1,111 @@ +Subspace Detection +================== + +EQcorrscan's subspace detection methods are closely modelled on the method +described by |Harris2006|, Subspace Detectors: Theory. We offer options to +multiplex data or leave as single-channels (multiplexing is significantly +faster). + +Subspace detection is implemented in an object-oriented style, whereby individual +detectors are constructed from data, then used to detect within continuous data. +At the core of the subspace routine is a Cython-based, static-typed routine to +calculate the detection statistics. We do this to make use of numpy's vectorized +calculations, while taking advantage of the speed-ups afforded by compiling +the sliding window loop. + +Important +--------- + +How you generate you detector is likely to be the most important thing, careful +selection and alignment is key, and because of this we haven't provided a total +*cookie-cutter* system for doing this. You have freedom to chose your parameters, +how to process, how you align, what traces to keep, whether you multiplex or not, +etc. This also means you have a lot of freedom to **get it wrong**. You will +have to do significant testing with your own dataset to work out what works and +what doesn't. Anything that you find that doesn't work well in EQcorrscans +system, it would be great to hear about so that we can make it better. + +The following examples demonstrate some of the options, but not all of them. +The advanced example is the example used to test and develop subspace and took +a fair amount of effort over a number of weeks. + +Simple example +-------------- + +To begin with you will need to create a **Detector**: + +.. code-block:: python + + from eqcorrscan.core import subspace + detector = subspace.Detector() + +This will create an empty *detector* object. These objects have various attributes, +including the data to be used as a detector (*detector.data*), alongside the full +input and output basis vector matrices (*detector.u* and *detector.v* respectively) +and the vector of singular-values (*detector.sigma*). Meta-data are also included, +including whether the detector is multiplexed or not (*detector.multiplex*), the +filters applied (*detector.lowcut*, *detector.highcut*, *detection.filt_order*, +*detector.sampling_rate*), the dimension of the subspace (*detector.dimension*), +and the name of the detector, which you can use for book-keeping +(*detector.name*). + +To populate the empty detector you need a design set of streams that have been +aligned (see clustering submodule for alignment methods). + +.. code-block:: python + + detector.construct(streams=streams, lowcut=2, highcut=9, filt_order=4, + sampling_rate=20, multiplex=True, name='Test_1') + +This will populate all the attributes of your *detector* object, and fill the +*detector.data* with the full input basis vector matrix. + +You will want to reduce the dimensions of your subspace detector, such that +you are just describing the signal, preferably with a lot of generality. Details +for selecting dimensionality should be found in |Harris2006|. To do this in +EQcorrscan simply use the *partition* method: + +.. code-block:: python + + detector.partition(4) + +This will populate *detector.data* with the first four, left-most input basis +vectors. You can test to see how much of your original design set is +described by this detector by using the *energy_capture* method: + +.. code-block:: python + + percent_capture = detector.energy_capture() + +This will return a percentage capture, you can run this for multiple dimensions +to test what dimension best suits your application. Again, details for this +selection can be found in |Harris2006|. + +Finally, to use your detector to detect within continuous data you should use +the *detect* method. This requires a stream with the same stations and channels +used in the detector, and a threshold from 0-1, where 0 is no signal, and 1 is +totally described by your detector. You can extract streams for the detections +at the same time as the detections by setting the extract_detections flag to +True. + +.. code-block:: python + + detections = detector.detect(st=stream, threshold=0.5, trig_int=3) + + +Advanced Example +---------------- + +This example computes detections for a short data-period during an earthquake +sequence in the Wairarapa region of New Zealand's North Island. This example only +shows one subspace detector, but could be extended, using the various :doc:`clustering <../submodules/utils.clustering>` +routines in EQcorrscan, to create many subspace detectors. These could be run +using the :doc:`subspace_detect <../submodules/autogen/eqcorrscan.core.subspace.subspace_detect>` +function, which runs similar +detectors in parallel through the given data. + +.. literalinclude:: ../../tutorials/subspace.py + +.. |Harris2006| raw:: html + + Harris (2006) \ No newline at end of file diff --git a/eqcorrscan/doc/tutorials/template-creation.rst b/eqcorrscan/doc/tutorials/template-creation.rst index b4ad0a406..657dd76ba 100644 --- a/eqcorrscan/doc/tutorials/template-creation.rst +++ b/eqcorrscan/doc/tutorials/template-creation.rst @@ -50,6 +50,23 @@ this. In practice, five picks (and therefore traces in a template) is often sufficient for matched-filter detections. However, you should test this on your own data. +Some other things that you might want to consider when generating templates +include: + +* Template-length, you probably only want to include the real earthquake signal in your template, + so really long templates are probably not the best idea. +* On the same note, don't include much (if any) data before the P-phase, unless you have + good reason to - assuming your noise is random, including noise will reduce the + correlations. +* Consider your frequency band - look for peak power in the chosen waveform + **relative to the noise**. +* Coda waves often describe scatterers - scattered waves are very interesting, + but may reduce the generality of your templates. If this is what you want, include + coda, if you want a more general template, I would suggest not including coda. + For examples of this you could try generating a lot of templates from a sequence + and computing the SVD of the templates to see where the most coherent energy is + (in the first basis vector), or just computing the stack of the waveforms. + Storing templates ----------------- diff --git a/eqcorrscan/doc/updates.rst b/eqcorrscan/doc/updates.rst index 9768e6eab..f32ef328d 100644 --- a/eqcorrscan/doc/updates.rst +++ b/eqcorrscan/doc/updates.rst @@ -1,6 +1,18 @@ What's new ========== +Version 0.1.3 +------------- +* Now testing on OSX (python 2.7 and 3.5) - also added linux python 3.4; +* Add lag-calculation and tests for it; +* Change how lag-calc does the trace splitting to reduce memory usage; +* Added pick-filtering utility to clean up tutorials; +* Change template generation function names for clarity (wrappers for depreciated names); +* Add more useful error messages when picks are not associated with waveforms; +* Add example plots for more plotting functions; +* Add subspace detector including docs and tutorial. +* Add *delayed* option to all template_gen functions, set to True by default which retains old behaviour. + Version 0.1.2 ------------- diff --git a/eqcorrscan/doc/utils.rst b/eqcorrscan/doc/utils.rst index 1dc70afe0..65c2f1dfd 100644 --- a/eqcorrscan/doc/utils.rst +++ b/eqcorrscan/doc/utils.rst @@ -18,17 +18,19 @@ there are now multiple options for location of events. :maxdepth: 1 submodules/utils.archive_read + submodules/utils.catalog_to_dd + submodules/utils.catalog_utils submodules/utils.clustering - submodules/utils.plotting - submodules/utils.findpeaks submodules/utils.despike - submodules/utils.picker + submodules/utils.findpeaks submodules/utils.mag_calc + submodules/utils.parameters + submodules/utils.picker + submodules/utils.plotting submodules/utils.pre_processing + submodules/utils.sac_util submodules/utils.seismo_logs submodules/utils.sfile_util submodules/utils.stacking submodules/utils.synth_seis submodules/utils.trigger - submodules/utils.catalog_to_dd - submodules/utils.sac_util diff --git a/eqcorrscan/grid/.gitignore b/eqcorrscan/grid/.gitignore deleted file mode 100644 index 5e7d2734c..000000000 --- a/eqcorrscan/grid/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# Ignore everything in this directory -* -# Except this file -!.gitignore diff --git a/eqcorrscan/plot/.gitignore b/eqcorrscan/plot/.gitignore deleted file mode 100644 index 2c3cd16ad..000000000 --- a/eqcorrscan/plot/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.jpg -*.pdf diff --git a/eqcorrscan/scripts/MAD_stability_test.py b/eqcorrscan/scripts/MAD_stability_test.py new file mode 100644 index 000000000..9dcc3e36f --- /dev/null +++ b/eqcorrscan/scripts/MAD_stability_test.py @@ -0,0 +1,102 @@ +""" +Simple functions to assert the stability of MAD thresholding at short-time +intervals. MAD relies on the assumption that large outliers have little +affect on otherwise normally distributed data. Previous versions of EQcorrscan +have forced the use of day-long data, which has resulted in high memory +consumption, however there is no theoretical need for this. It would be +desirable to use shorter chunks of data to be more memory efficient, +this would allow for even more parallel processing, and, potentially, allow +for data to be processing in near real-time. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +def test_stability(): + """Test various threshold window lengths.""" + from eqcorrscan.core.match_filter import _channel_loop + from eqcorrscan.utils import pre_processing, catalog_utils, plotting + from eqcorrscan.core import template_gen + from obspy.clients.fdsn import Client + from obspy import UTCDateTime, Trace + import numpy as np + import matplotlib.pyplot as plt + + # Do some set-up + client = Client('NCEDC') + t1 = UTCDateTime(2004, 9, 28) + t2 = t1 + 86400 + catalog = client.get_events(starttime=t1, endtime=t2, minmagnitude=2, + minlatitude=35.7, maxlatitude=36.1, + minlongitude=-120.6, maxlongitude=-120.2, + includearrivals=True) + catalog = catalog_utils.filter_picks(catalog, channels=['EHZ'], + top_n_picks=5) + templates = template_gen.from_client(catalog=catalog, client_id='NCEDC', + lowcut=2.0, highcut=9.0, + samp_rate=20.0, filt_order=4, + length=3.0, prepick=0.15, + swin='all') + bulk_info = [(tr.stats.network, tr.stats.station, '*', + tr.stats.channel[0] + 'H' + tr.stats.channel[1], + t2 - 3600, t2) for tr in templates[0]] + st = client.get_waveforms_bulk(bulk_info) + st.merge(fill_value='interpolate') + st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, + filt_order=4, samp_rate=20.0, + debug=0, num_cores=4) + i = 0 + cccsums, no_chans, chans = _channel_loop(templates, st) + + cccsum = cccsums[0] + MAD_thresh = 8 + MAD_daylong = MAD_thresh * np.median(np.abs(cccsum)) + MAD_hours = [] + MAD_five_mins = [] + MAD_minutes = [] + for hour in range(24): + ccc_hour_slice = cccsum[hour * 3600 * st[0].stats.sampling_rate: + (hour + 1) * 3600 * + st[0].stats.sampling_rate] + MAD_hour_slice = MAD_thresh * np.median(np.abs(ccc_hour_slice)) + MAD_hours.append(MAD_hour_slice) + for five_min in range(12): + ccc_five_slice = ccc_hour_slice[five_min * 300 * + st[0].stats.sampling_rate: + (five_min + 1) * 300 * + st[0].stats.sampling_rate] + MAD_five_slice = MAD_thresh * np.median(np.abs(ccc_five_slice)) + MAD_five_mins.append(MAD_five_slice) + for minute in range(60): + ccc_min_slice = ccc_hour_slice[minute * 60 * + st[0].stats.sampling_rate: + (minute + 1) * 60 * + st[0].stats.sampling_rate] + MAD_min_slice = MAD_thresh * np.median(np.abs(ccc_min_slice)) + MAD_minutes.append(MAD_min_slice) + plotting_cccsum = Trace(cccsum) + plotting_cccsum.stats.sampling_rate = st[0].stats.sampling_rate + plotting_cccsum = plotting.chunk_data(plotting_cccsum, 1, 'Maxabs') + x = np.arange(0, 24, 1.0 / (3600 * plotting_cccsum.stats.sampling_rate)) + x = x[0:len(plotting_cccsum.data)] + plt.plot(x, plotting_cccsum.data, linewidth=0.7, color='k') + plt.plot(np.arange(0, 24, 1.0/60), MAD_minutes, label='1 minute MAD', + color='y') + plt.plot(np.arange(0, 24, 1.0/60), [-1 * m for m in MAD_minutes], + color='y') + plt.plot(np.arange(0, 24, 1.0/12), MAD_five_mins, label='5 minute MAD', + color='b') + plt.plot(np.arange(0, 24, 1.0/12), [-1 * m for m in MAD_five_mins], + color='b') + plt.plot(np.arange(24), MAD_hours, label='Hourly MAD', + color='r', linewidth=1.4) + plt.plot(np.arange(24), [-1 * m for m in MAD_hours], + color='r', linewidth=1.4) + plt.plot([0, 24], [MAD_daylong, MAD_daylong], label='Day-long MAD', + linewidth=1.5, color='g') + plt.plot([0, 24], [-1 * MAD_daylong, -1 * MAD_daylong], + linewidth=1.5, color='g') + plt.legend() + plt.show() diff --git a/eqcorrscan/tests/catalog_utils_test.py b/eqcorrscan/tests/catalog_utils_test.py new file mode 100644 index 000000000..bd6283a7a --- /dev/null +++ b/eqcorrscan/tests/catalog_utils_test.py @@ -0,0 +1,49 @@ +""" +Functions to test the functions within the eqcorrscan.utils.catalog_utils \ +submodule. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +import unittest + + +class CatalogUtilsTests(unittest.TestCase): + def test_filter_picks(self): + """ Test various methods of filetring picks in a catalog.""" + from obspy.clients.fdsn import Client + from eqcorrscan.utils.catalog_utils import filter_picks + from obspy import UTCDateTime + client = Client(str("NCEDC")) + t1 = UTCDateTime(2004, 9, 28) + t2 = t1 + 86400 + catalog = client.get_events(starttime=t1, endtime=t2, minmagnitude=3, + minlatitude=35.7, maxlatitude=36.1, + minlongitude=-120.6, maxlongitude=-120.2, + includearrivals=True) + stations = ['BMS', 'BAP', 'PAG', 'PAN', 'PBI', 'PKY', 'YEG', 'WOF'] + channels = ['SHZ', 'SHN', 'SHE', 'SH1', 'SH2'] + networks = ['NC'] + locations = [''] + top_n_picks = 5 + filtered_catalog = filter_picks(catalog=catalog, stations=stations, + channels=channels, networks=networks, + locations=locations, + top_n_picks=top_n_picks) + for event in filtered_catalog: + for pick in event.picks: + self.assertTrue(pick.waveform_id.station_code in stations) + self.assertTrue(pick.waveform_id.channel_code in channels) + self.assertTrue(pick.waveform_id.network_code in networks) + self.assertTrue(pick.waveform_id.location_code in locations) + filtered_catalog = filter_picks(catalog=catalog, + top_n_picks=top_n_picks) + filtered_stations = [] + for event in filtered_catalog: + for pick in event.picks: + filtered_stations.append(pick.waveform_id.station_code) + self.assertEqual(len(list(set(filtered_stations))), top_n_picks) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/eqcorrscan/tests/despike_test.py b/eqcorrscan/tests/despike_test.py index b32d222a7..cd8549f07 100644 --- a/eqcorrscan/tests/despike_test.py +++ b/eqcorrscan/tests/despike_test.py @@ -10,16 +10,12 @@ class DespikeTesting(unittest.TestCase): def test_median_filter(self): """Test the median filter implimentation.""" - import numpy as np - from obspy.core import Trace + from obspy import read + import os from eqcorrscan.utils.despike import median_filter - spiked = np.random.randn(1000) - # Put some spikes in there - spiked[100] = 20 - spiked[400] = 40 - spiked[450] = -40 - spiked = Trace(spiked) - spiked.stats.sampling_rate = 100 + testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data') + spiked = read(os.path.join(testing_path, 'random_spiked.ms'))[0] despiked = median_filter(tr=spiked, multiplier=2, windowlength=0.5, interp_len=0.05) self.assertNotEqual(despiked.data[100], 20) @@ -28,16 +24,14 @@ def test_median_filter(self): def test_template_remove(self): """Test the despiker based on correlations.""" + from obspy import read + import os import numpy as np from obspy.core import Trace from eqcorrscan.utils.despike import template_remove - spiked = np.random.randn(1000) - # Put some spikes in there - spiked[100] = 20 - spiked[400] = 40 - spiked[450] = -40 - spiked = Trace(spiked) - spiked.stats.sampling_rate = 100 + testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data') + spiked = read(os.path.join(testing_path, 'random_spiked.ms'))[0] template = np.zeros(10) template[2] = 1 template = Trace(template) diff --git a/eqcorrscan/tests/find_peaks_test.py b/eqcorrscan/tests/find_peaks_test.py index 6967bb48c..d846b81aa 100644 --- a/eqcorrscan/tests/find_peaks_test.py +++ b/eqcorrscan/tests/find_peaks_test.py @@ -26,6 +26,15 @@ def test_main_find_peaks(self): 'updated?') self.assertTrue((np.array(peaks) == expected_peaks).all()) + def test_coincidence(self): + """Test the coincidence trigger.""" + from eqcorrscan.utils.findpeaks import coin_trig + peaks = [[(0.5, 100), (0.3, 800), (0.3, 105)], + [(0.4, 120), (0.7, 850)]] + triggers = coin_trig(peaks, [('a', 'Z'), ('b', 'Z')], samp_rate=10, + moveout=3, min_trig=2, trig_int=1) + self.assertEqual(triggers, [(0.45, 100)]) + if __name__ == '__main__': """ Run tests diff --git a/eqcorrscan/tests/lag_calc_test.py b/eqcorrscan/tests/lag_calc_test.py new file mode 100644 index 000000000..0b7877d74 --- /dev/null +++ b/eqcorrscan/tests/lag_calc_test.py @@ -0,0 +1,47 @@ +""" +A series of test functions for the core functions in EQcorrscan. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from eqcorrscan.core import lag_calc +import unittest + + +class TestMethods(unittest.TestCase): + def test_channel_loop(self): + """Test the main lag_calc function""" + import os + from eqcorrscan.core.template_gen import from_sfile + + testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data', 'REA', 'TEST_') + template = from_sfile(sfile=os.path.join(testing_path, + '21-1412-02L.S201309'), + lowcut=5, highcut=15, samp_rate=40, + filt_order=4, length=3, swin='all', prepick=0.05) + detection = from_sfile(sfile=os.path.join(testing_path, + '21-1759-04L.S201309'), + lowcut=5, highcut=15, samp_rate=40, + filt_order=4, length=4, swin='all', prepick=0.55) + + i, event = lag_calc._channel_loop(detection=detection, + template=template, + min_cc=0.4, i=0) + matched_traces = [] + detection_stachans = [(tr.stats.station, tr.stats.channel) + for tr in detection] + picked_stachans = [(pick.waveform_id.station_code, + pick.waveform_id.channel_code) + for pick in event.picks] + for master_tr in template: + stachan = (master_tr.stats.station, master_tr.stats.channel) + if stachan in detection_stachans: + matched_traces.append(stachan) + + for picked_stachan in picked_stachans: + self.assertTrue(picked_stachan in matched_traces) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/eqcorrscan/tests/core_test.py b/eqcorrscan/tests/match_filter_test.py similarity index 82% rename from eqcorrscan/tests/core_test.py rename to eqcorrscan/tests/match_filter_test.py index 6680fd01a..f0fedf045 100644 --- a/eqcorrscan/tests/core_test.py +++ b/eqcorrscan/tests/match_filter_test.py @@ -23,7 +23,7 @@ def test_perfect_normxcorr2(self): self.assertEqual(ccc.max(), 1.0) def test_fail_normxcorr2(self): - """Ensure it template is nan then return is nan + """Ensure if template is nan then return is nan """ import numpy as np from eqcorrscan.core.match_filter import normxcorr2 @@ -68,7 +68,7 @@ def test_set_normxcorr2(self): self.assertTrue((np.gradient(expected_ccc).round(2) == np.gradient(ccc).round(2)).all()) if not (ccc == expected_ccc).all(): - warnings.warn('The expected result was not achieved') + warnings.warn('The expected result was not achieved, but it has the same shape') def test_perfect_template_loop(self): """Check that perfect correlations are carried through. @@ -130,6 +130,7 @@ def test_debug_range(self): # debug == 3 fails on travis for some reason: # doesn't output any detections, fine on appveyor and local machine for debug in range(0, 3): + print('Testing for debug level=%s' % debug) kfalse, ktrue = test_match_filter(debug=debug) if ktrue > 0: self.assertTrue(kfalse / ktrue < 0.25) @@ -174,6 +175,49 @@ def test_missing_data(self): # Test case where there are non-matching streams in the data test_match_filter(template_excess=True) + def test_short_match_filter(self): + """Test using short streams of data.""" + from obspy.clients.fdsn import Client + from obspy import UTCDateTime + from eqcorrscan.core import template_gen, match_filter + from eqcorrscan.utils import pre_processing, catalog_utils + + client = Client('NCEDC') + t1 = UTCDateTime(2004, 9, 28) + t2 = t1 + 86400 + catalog = client.get_events(starttime=t1, endtime=t2, + minmagnitude=4, + minlatitude=35.7, maxlatitude=36.1, + minlongitude=-120.6, + maxlongitude=-120.2, + includearrivals=True) + catalog = catalog_utils.filter_picks(catalog, channels=['EHZ'], + top_n_picks=5) + templates = template_gen.from_client(catalog=catalog, + client_id='NCEDC', + lowcut=2.0, highcut=9.0, + samp_rate=50.0, filt_order=4, + length=3.0, prepick=0.15, + swin='all', process_len=3600) + # Download and process the day-long data + bulk_info = [(tr.stats.network, tr.stats.station, '*', + tr.stats.channel[0] + 'H' + tr.stats.channel[1], + t2 - 3600, t2) for tr in templates[0]] + # Just downloading an hour of data + st = client.get_waveforms_bulk(bulk_info) + st.merge(fill_value='interpolate') + st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, + filt_order=4, samp_rate=50.0, + debug=0, num_cores=4) + template_names = [str(template[0].stats.starttime) + for template in templates] + detections = match_filter.match_filter(template_names=template_names, + template_list=templates, + st=st, threshold=8.0, + threshold_type='MAD', + trig_int=6.0, plotvar=False, + plotdir='.', cores=4) + def test_match_filter(samp_rate=10.0, debug=0, plotvar=False, extract_detections=False, threshold_type='MAD', diff --git a/eqcorrscan/tests/stacking_test.py b/eqcorrscan/tests/stacking_test.py index cd605c977..894832e6a 100644 --- a/eqcorrscan/tests/stacking_test.py +++ b/eqcorrscan/tests/stacking_test.py @@ -61,7 +61,7 @@ def test_phase_weighted_stack(self): self.assertEqual(len(synth[0].data), len(stack[0].data)) def test_align_traces(self): - """Test the utils.stacking.align_traces fucntion.""" + """Test the utils.stacking.align_traces function.""" # Generate synth data import numpy as np from obspy import Trace @@ -93,6 +93,67 @@ def test_align_traces(self): for shift_in, shift_out in zip(shifts_in, shifts): self.assertEqual(-1 * shift_in, shift_out) + def test_known_align(self): + """Test alignment with a known outcome.""" + from obspy import read + import os + import glob + # from eqcorrscan.utils.stacking import align_traces + testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data', 'WAV', 'TEST_') + # testing_path = 'eqcorrscan/tests/test_data/WAV/TEST_/' + wavefiles = sorted(glob.glob(os.path.join(testing_path, '*'))) + trace_list = [] + for wavfile in wavefiles: + st = read(wavfile) + tr = st.select(station='FRAN', channel='SH1') + if len(tr) == 1: + tr.detrend('simple').filter('bandpass', freqmin=2, freqmax=20) + trace_list.append(tr[0]) + shifts, ccs = align_traces(trace_list=trace_list, shift_len=200) + # plot=True) + ccs = [float(str(cc)) for cc in ccs] + f = open(os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data', 'known_alignment.csv'), 'r') + known_shifts = [line.rstrip().split(', ') for line in f] + f.close() + known_shifts = [(float(a[0]), float(a[1])) for a in known_shifts] + known_shifts, known_ccs = zip(*known_shifts) + self.assertEqual(shifts, list(known_shifts)) + ccs = [round(cc, 3) for cc in ccs] + known_ccs = [round(cc, 3) for cc in known_ccs] + self.assertEqual(ccs, list(known_ccs)) + + def test_known_align_positive(self): + """Test a known alignment case with forced positive correlation.""" + from obspy import read + import os + import glob + testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data', 'WAV', 'TEST_') + wavefiles = sorted(glob.glob(os.path.join(testing_path, '*'))) + trace_list = [] + for wavfile in wavefiles: + st = read(wavfile) + tr = st.select(station='FRAN', channel='SH1') + if len(tr) == 1: + tr.detrend('simple').filter('bandpass', freqmin=2, freqmax=20) + trace_list.append(tr[0]) + shifts, ccs = align_traces(trace_list=trace_list, shift_len=200, + positive=True) + ccs = [float(str(cc)) for cc in ccs] + f = open(os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data', 'known_positive_alignment.csv'), + 'r') + known_shifts = [line.rstrip().split(', ') for line in f] + f.close() + known_shifts = [(float(a[0]), float(a[1])) for a in known_shifts] + known_shifts, known_ccs = zip(*known_shifts) + self.assertEqual(shifts, list(known_shifts)) + ccs = [round(cc, 3) for cc in ccs] + known_ccs = [round(cc, 3) for cc in known_ccs] + self.assertEqual(ccs, list(known_ccs)) + if __name__ == '__main__': """ Run stacking tests diff --git a/eqcorrscan/tests/subspace_test.py b/eqcorrscan/tests/subspace_test.py new file mode 100644 index 000000000..e98b28025 --- /dev/null +++ b/eqcorrscan/tests/subspace_test.py @@ -0,0 +1,419 @@ +""" +Functions for testing the utils.stacking functions +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from eqcorrscan.core import subspace, subspace_statistic +from eqcorrscan.core.subspace import _subspace_process +import numpy as np +import unittest +from obspy import Stream +import obspy +if int(obspy.__version__.split('.')[0]) >= 1: + from obspy.clients.fdsn import Client +else: + from obspy.fdsn import Client +from obspy import read +import os +import copy + + +class SimpleSubspaceMethods(unittest.TestCase): + """ + Tests that do not require data to be downloaded. + """ + def test_read(self): + """Test reading from hdf5 file""" + detector = subspace.Detector() + path = os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data', 'Test_detector.h5') + detector.read(path) + self.assertEqual(detector.name, 'Tester') + self.assertEqual(detector.multiplex, False) + self.assertEqual(detector.lowcut, 2) + self.assertEqual(detector.highcut, 9) + self.assertEqual(detector.filt_order, 4) + self.assertEqual(detector.dimension, 9) + self.assertEqual(detector.sampling_rate, 20) + + def test_read_func(self): + """Check that the read function works too.""" + path = os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data', 'Test_detector.h5') + detector = subspace.read_detector(path) + _detector = subspace.Detector() + _detector.read(path) + self.assertEqual(detector, _detector) + + def test_align(self): + """Check that alignment does as expected.""" + test_stream = Stream(read()[0]) + # Shift it + length = 15 + st1 = test_stream.copy().trim(test_stream[0].stats.starttime + 3, + test_stream[0].stats.starttime + + 3 + length) + st2 = test_stream.trim(test_stream[0].stats.starttime, + test_stream[0].stats.starttime + length) + aligned = subspace.align_design(design_set=[st1.copy(), st2.copy()], + shift_len=5, reject=0.3, + multiplex=False, plot=False) + self.assertEqual(aligned[0][0].stats.starttime, + aligned[1][0].stats.starttime) + + def test_stat(self): + """Test that the statistic calculation is the same regardless of + system.""" + detector = subspace.Detector() + detector.read(os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data', 'subspace', + 'stat_test_detector.h5')) + stream = read(os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data', 'subspace', 'test_trace.ms')) + tr_data = stream[0].data + stat = subspace_statistic.det_statistic(detector.data[0]. + astype(np.float32), + tr_data.astype(np.float32)) + self.assertEqual((stat.max().round(6) - 0.252336).round(6), 0) + + +class SubspaceTestingMethods(unittest.TestCase): + """ + Main tests for the subspace module. + """ + @classmethod + def setUpClass(cls): + """Set up the test templates.""" + cls.templates, cls.st = get_test_data() + + def test_write(self): + """Test writing to an hdf5 file""" + templates = copy.deepcopy(self.templates) + # Test a multiplexed version + detector = subspace.Detector() + detector.construct(streams=templates, lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=True, + name=str('Tester'), align=True, shift_len=0.2) + detector.write('Test_file.h5') + self.assertTrue(os.path.isfile('Test_file.h5')) + os.remove('Test_file.h5') + detector.construct(streams=templates, lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=False, + name=str('Tester'), align=True, shift_len=0.2) + detector.write('Test_file.h5') + self.assertTrue(os.path.isfile('Test_file.h5')) + os.remove('Test_file.h5') + + def test_create_multiplexed_unaligned(self): + """Test subspace creation - checks that np.dot(U.T, U) is identity.""" + templates = copy.deepcopy(self.templates) + templates = [template.select(station='HOWZ') for template in templates] + # Test a multiplexed version + detector = subspace.Detector() + detector.construct(streams=templates, lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=True, + name=str('Tester'), align=False, shift_len=0) + for u in detector.data: + identity = np.dot(u.T, u).astype(np.float16) + self.assertTrue(np.allclose(identity, + np.diag(np.ones(len(identity), + dtype=np.float16)))) + comparison_detector = \ + subspace.read_detector(os.path.join(os.path. + abspath(os.path. + dirname(__file__)), + 'test_data', 'subspace', + 'master_detector_multi_unaligned.h5')) + for key in ['name', 'sampling_rate', 'multiplex', 'lowcut', 'highcut', + 'filt_order', 'dimension', 'stachans']: + print(key) + self.assertEqual(comparison_detector.__getattribute__(key), + detector.__getattribute__(key)) + for key in ['data', 'u', 'v', 'sigma']: + print(key) + list_item = detector.__getattribute__(key) + other_list = comparison_detector.__getattribute__(key) + self.assertEqual(len(list_item), len(other_list)) + for item, other_item in zip(list_item, other_list): + if not np.allclose(item, other_item): + print(item) + print(other_item) + self.assertTrue(np.allclose(item, other_item)) + # Finally check that the __eq__ method works if all the above passes. + self.assertEqual(detector, comparison_detector) + + def test_create_nonmultiplexed_unaligned(self): + """Test creation of a non-multiplexed detector.""" + # Test a non-multiplexed version + detector = subspace.Detector() + templates = copy.deepcopy(self.templates) + templates = [template.select(station='HOWZ') for template in templates] + detector.construct(streams=templates, lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=False, + name=str('Tester'), align=False, shift_len=0) + for u in detector.data: + identity = np.dot(u.T, u).astype(np.float16) + self.assertTrue(np.allclose(identity, + np.diag(np.ones(len(identity), + dtype=np.float16)))) + comparison_detector = \ + subspace.read_detector(os.path.join(os.path. + abspath(os.path. + dirname(__file__)), + 'test_data', 'subspace', + 'master_detector_unaligned.h5')) + for key in ['name', 'sampling_rate', 'multiplex', 'lowcut', 'highcut', + 'filt_order', 'dimension', 'stachans']: + print(key) + self.assertEqual(comparison_detector.__getattribute__(key), + detector.__getattribute__(key)) + for key in ['data', 'u', 'v', 'sigma']: + print(key) + list_item = detector.__getattribute__(key) + other_list = comparison_detector.__getattribute__(key) + self.assertEqual(len(list_item), len(other_list)) + for item, other_item in zip(list_item, other_list): + if not np.allclose(item, other_item): + print(item) + print(other_item) + self.assertTrue(np.allclose(item, other_item)) + # Finally check that the __eq__ method works if all the above passes. + self.assertEqual(detector, comparison_detector) + + def test_create_multiplexed_aligned(self): + """Test subspace creation - checks that np.dot(U.T, U) is identity.""" + templates = copy.deepcopy(self.templates) + templates = [template.select(station='HOWZ') for template in templates] + # Test a multiplexed version + detector = subspace.Detector() + detector.construct(streams=templates, lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=True, + name=str('Tester'), align=True, shift_len=3.0, + reject=0.2) + for u in detector.data: + identity = np.dot(u.T, u).astype(np.float16) + self.assertTrue(np.allclose(identity, + np.diag(np.ones(len(identity), + dtype=np.float16)))) + comparison_detector = \ + subspace.read_detector(os.path.join(os.path. + abspath(os.path. + dirname(__file__)), + 'test_data', 'subspace', + 'master_detector_multi.h5')) + for key in ['name', 'sampling_rate', 'multiplex', 'lowcut', 'highcut', + 'filt_order', 'dimension', 'stachans']: + print(key) + self.assertEqual(comparison_detector.__getattribute__(key), + detector.__getattribute__(key)) + for key in ['data', 'u', 'v', 'sigma']: + print(key) + list_item = detector.__getattribute__(key) + other_list = comparison_detector.__getattribute__(key) + self.assertEqual(len(list_item), len(other_list)) + for item, other_item in zip(list_item, other_list): + if not np.allclose(item, other_item): + print(item) + print(other_item) + self.assertTrue(np.allclose(item, other_item)) + # Finally check that the __eq__ method works if all the above passes. + self.assertEqual(detector, comparison_detector) + + def test_create_nonmultiplexed_aligned(self): + """Test creation of a non-multiplexed detector.""" + # Test a non-multiplexed version + detector = subspace.Detector() + templates = copy.deepcopy(self.templates) + templates = [template.select(station='HOWZ') for template in templates] + detector.construct(streams=templates, lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=False, + name=str('Tester'), align=True, shift_len=6, + reject=0.2) + for u in detector.data: + identity = np.dot(u.T, u).astype(np.float16) + self.assertTrue(np.allclose(identity, + np.diag(np.ones(len(identity), + dtype=np.float16)))) + comparison_detector = \ + subspace.read_detector(os.path.join(os.path. + abspath(os.path. + dirname(__file__)), + 'test_data', 'subspace', + 'master_detector.h5')) + for key in ['name', 'sampling_rate', 'multiplex', 'lowcut', 'highcut', + 'filt_order', 'dimension', 'stachans']: + print(key) + self.assertEqual(comparison_detector.__getattribute__(key), + detector.__getattribute__(key)) + for key in ['data', 'u', 'v', 'sigma']: + print(key) + list_item = detector.__getattribute__(key) + other_list = comparison_detector.__getattribute__(key) + self.assertEqual(len(list_item), len(other_list)) + for item, other_item in zip(list_item, other_list): + print(item.shape) + print(other_item.shape) + print('Next') + for item, other_item in zip(list_item, other_list): + self.assertEqual(item.shape, other_item.shape) + if not np.allclose(item, other_item): + print(item) + print(other_item) + self.assertTrue(np.allclose(item, other_item)) + # Finally check that the __eq__ method works if all the above passes. + self.assertEqual(detector, comparison_detector) + + def test_refactor(self): + """Test subspace refactoring, checks that np.dot(U.T, U) is\ + identity.""" + templates = copy.deepcopy(self.templates) + # Test a multiplexed version + detector = subspace.Detector() + detector.construct(streams=templates, lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=True, + name=str('Tester'), align=False, shift_len=None) + for dim in range(2, len(detector.u[0])): + detector.partition(dim) + for u in detector.data: + identity = np.dot(u.T, u).astype(np.float16) + self.assertTrue(np.allclose(identity, + np.diag(np.ones(len(identity), + dtype=np.float16)))) + # Test a non-multiplexed version + detector = subspace.Detector() + templates = copy.deepcopy(self.templates) + detector.construct(streams=templates, lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=False, + name=str('Tester'), align=True, shift_len=0.2, + reject=0.0) + for dim in range(2, len(detector.u[0])): + detector.partition(dim) + for u in detector.data: + identity = np.dot(u.T, u).astype(np.float16) + self.assertTrue(np.allclose(identity, + np.diag(np.ones(len(identity), + dtype=np.float16)))) + + def test_detect(self): + """Test standard detection with known result.""" + + templates = copy.deepcopy(self.templates) + detector = subspace.Detector() + detector.construct(streams=templates, lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=True, + name=str('Tester'), align=True, + shift_len=6, reject=0.2).partition(9) + st = self.st + detections = detector.detect(st=st, threshold=0.009, trig_int=2, + debug=1) + self.assertEqual(len(detections), 1) + + def test_not_multiplexed(self): + """Test that a non-multiplexed detector gets the same result.""" + templates = copy.deepcopy(self.templates) + detector = subspace.Detector() + detector.construct(streams=templates, lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=False, + name=str('Tester'), align=True, + shift_len=6, reject=0.2).partition(9) + st = self.st + detections = detector.detect(st=st, threshold=0.05, trig_int=4, + debug=0, moveout=2, min_trig=5) + self.assertEqual(len(detections), 2) + + def test_multi_detectors(self): + """Test the efficient looping in subspace.""" + templates = copy.deepcopy(self.templates) + detector1 = subspace.Detector() + detector1.construct(streams=templates, lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=False, + name=str('Tester1'), align=True, + shift_len=6, reject=0.2).partition(9) + templates = copy.deepcopy(self.templates) + detector2 = subspace.Detector() + detector2.construct(streams=templates[0:20], lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=False, + name=str('Tester2'), align=True, + shift_len=6, reject=0.2).partition(9) + detections = subspace.subspace_detect(detectors=[detector1, detector2], + stream=self.st.copy(), + threshold=0.05, + trig_int=10, moveout=5, + min_trig=5, + parallel=False, num_cores=2) + print(detections) + self.assertEqual(len(detections), 5) + detections = subspace.subspace_detect(detectors=[detector1, detector2], + stream=self.st.copy(), + threshold=0.05, + trig_int=10, moveout=5, + min_trig=5, + parallel=True, num_cores=2) + print(detections) + self.assertEqual(len(detections), 5) + + def partition_fail(self): + templates = copy.deepcopy(self.templates) + detector2 = subspace.Detector() + with self.assertRaises(IndexError): + detector2.construct(streams=templates[0:10], lowcut=2, highcut=9, + filt_order=4, sampling_rate=20, multiplex=False, + name=str('Tester'), align=True, + shift_len=6, reject=0.2).partition(9) + + # def test_subspace_process(self): + # """Test the processing against a fixed result.""" + # self.assertEqual("This isn't written yet", "Nup") + # + # def test_subspace_svd(self): + # """Test the svd with a known outcome - attempting to debug why \ + # detectors are different on different systems.""" + # self.assertEqual("This isn't written yet", "Nup") + + +def get_test_data(): + """ + Generate a set of waveforms from GeoNet for use in subspace testing + + :return: List of cut templates with no filters applied + :rtype: list + """ + from eqcorrscan.tutorials.get_geonet_events import get_geonet_events + from obspy import UTCDateTime + from eqcorrscan.utils.catalog_utils import filter_picks + from eqcorrscan.utils.clustering import space_cluster + from obspy.clients.fdsn import Client + + cat = get_geonet_events(minlat=-40.98, maxlat=-40.85, minlon=175.4, + maxlon=175.5, startdate=UTCDateTime(2016, 5, 1), + enddate=UTCDateTime(2016, 5, 20)) + cat = filter_picks(catalog=cat, top_n_picks=5) + stachans = list(set([(pick.waveform_id.station_code, + pick.waveform_id.channel_code) for event in cat + for pick in event.picks])) + clusters = space_cluster(catalog=cat, d_thresh=2, show=False) + cluster = sorted(clusters, key=lambda c: len(c))[-1] + client = Client('GEONET') + design_set = [] + for event in cluster: + t1 = event.origins[0].time + t2 = t1 + 25 + bulk_info = [] + for station, channel in stachans: + bulk_info.append(('NZ', station, '*', channel[0:2] + '?', t1, t2)) + st = client.get_waveforms_bulk(bulk=bulk_info) + st.trim(t1, t2) + design_set.append(st) + t1 = UTCDateTime(2016, 5, 11, 19) + t2 = UTCDateTime(2016, 5, 11, 20) + bulk_info = [('NZ', stachan[0], '*', + stachan[1][0:2] + '?', + t1, t2) for stachan in stachans] + st = client.get_waveforms_bulk(bulk_info) + st.merge().detrend('simple').trim(starttime=t1, endtime=t2) + return design_set, st + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/eqcorrscan/tests/template_gen_test.py b/eqcorrscan/tests/template_gen_test.py index 67e8d9f01..752749550 100644 --- a/eqcorrscan/tests/template_gen_test.py +++ b/eqcorrscan/tests/template_gen_test.py @@ -81,6 +81,32 @@ def test_tutorial_template_gen(self): del(template) os.remove('tutorial_template_' + str(template_no) + '.ms') + def test_not_delayed(self): + """Test the method of template_gen without applying delays to + channels.""" + from eqcorrscan.tutorials.get_geonet_events import get_geonet_events + from obspy import UTCDateTime + from eqcorrscan.utils.catalog_utils import filter_picks + cat = get_geonet_events(minlat=-40.98, maxlat=-40.85, minlon=175.4, + maxlon=175.5, + startdate=UTCDateTime(2016, 5, 1), + enddate=UTCDateTime(2016, 5, 2)) + cat = filter_picks(catalog=cat, top_n_picks=5) + template = from_client(catalog=cat, client_id='GEONET', + lowcut=None, highcut=None, samp_rate=100.0, + filt_order=4, length=10.0, prepick=0.5, + swin='all', process_len=3600, + debug=0, plot=False, delayed=False)[0] + for tr in template: + tr.stats.starttime.precision = 6 + starttime = template[0].stats.starttime + length = template[0].stats.npts + print(template) + for tr in template: + self.assertTrue(abs((tr.stats.starttime - starttime)) <= + tr.stats.delta) + self.assertEqual(tr.stats.npts, length) + def test_download_various_methods(self): """Will download data from server and store in various databases, then create templates using the various methods.""" @@ -180,11 +206,60 @@ def test_seishub(self): try: template = from_seishub(test_cat, url=test_url, lowcut=1.0, highcut=5.0, samp_rate=20, filt_order=4, - length=3, prepick=0.5, swin='all') + length=3, prepick=0.5, swin='all', + process_len=300) except URLError: warnings.warn('Timed out connection to seishub') if 'template' in locals(): self.assertEqual(len(template), 3) + def test_catalog_grouping(self): + from obspy.core.event import Catalog + from eqcorrscan.utils.sfile_util import read_event + import glob + import os + from eqcorrscan.core.template_gen import _group_events + + testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data', 'REA', 'TEST_', '*') + catalog = Catalog() + sfiles = glob.glob(testing_path) + for sfile in sfiles: + catalog.append(read_event(sfile=sfile)) + for process_len, pads in [(60, [5]), + (300, [5, 60]), + (3600, [5, 60, 300]), + (86400, [5, 60, 300])]: + for data_pad in pads: + sub_catalogs = _group_events(catalog=catalog, + process_len=process_len, + data_pad=data_pad) + k_events = 0 + for sub_catalog in sub_catalogs: + min_time = min([event.origins[0].time + for event in sub_catalog]) + min_time -= data_pad + for event in sub_catalog: + self.assertTrue((event.origins[0].time + + data_pad) - min_time < process_len) + k_events += 1 + self.assertEqual(k_events, len(catalog)) + + def test_missing_waveform_id(self): + from obspy import read + from eqcorrscan.core.template_gen import from_meta_file + import os + testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), + 'test_data') + quakeml = os.path.join(testing_path, + '20130901T041115_missingwavid.xml') + st = read(os.path.join(testing_path, 'WAV', 'TEST_', + '2013-09-01-0410-35.DFDPC_024_00')) + templates = from_meta_file(meta_file=quakeml, st=st, lowcut=2.0, + highcut=9.0, samp_rate=20.0, filt_order=3, + length=2, prepick=0.1, swin='S') + self.assertEqual(len(templates), 1) + + if __name__ == '__main__': unittest.main() diff --git a/eqcorrscan/tests/test_data/20130901T041115_missingwavid.xml b/eqcorrscan/tests/test_data/20130901T041115_missingwavid.xml new file mode 100644 index 000000000..541d23692 --- /dev/null +++ b/eqcorrscan/tests/test_data/20130901T041115_missingwavid.xml @@ -0,0 +1,429 @@ + + + + + + L + + + VUW + + + + + -43.34 + + + 170.376 + + + 8500.0 + + + Number of stations=8 + + + smi:local/57e81358-1cb6-4ec6-a1f6-bb7cd9fbb6f9 + P + 304.0 + 4.0 + 0.06 + 0.0 + + + smi:local/3c1fa17c-7d23-433d-8acd-370a9b45dcc3 + S + 304.0 + 4.0 + 0.02 + 0.0 + + + smi:local/3125a781-3c67-425c-9ef0-030a97064cb9 + IAML + 304.0 + 4.0 + 0.0 + + + smi:local/e454c5f1-5f77-4c8e-be3b-c2748d0974b2 + P + 30.0 + 5.0 + -0.04 + 0.0 + + + smi:local/22da2c22-b175-4954-a104-60f491d10a67 + IAML + 30.0 + 5.0 + 0.0 + + + smi:local/24be9c62-d36e-467d-8007-2ca2b39d9812 + P + 25.0 + 5.0 + -0.07 + 0.0 + + + smi:local/86907d3f-c111-46e5-b8e0-7b11aa6d033c + IAML + 25.0 + 5.0 + 0.0 + + + smi:local/d533c446-c5ae-4669-9888-01902e19ff5d + S + 96.0 + 8.0 + -0.2 + 0.0 + + + smi:local/895deea9-dd16-4c38-84f4-30178e3eb7a2 + IAML + 96.0 + 8.0 + 0.0 + + + smi:local/73405532-c0b8-47e9-a6a1-d251bb04e228 + P + 182.0 + 11.0 + 0.22 + 0.0 + + + smi:local/d3c0ccbc-a15e-4dbf-9a1c-4c6b5cb1aa30 + S + 182.0 + 11.0 + 0.18 + 0.0 + + + smi:local/6dfaf394-7332-4893-8322-057d68ac8c3a + IAML + 182.0 + 11.0 + 0.0 + + + smi:local/a734421f-1c44-485d-95e2-25a260302276 + P + 240.0 + 19.0 + 0.14 + 0.0 + + + smi:local/0b22753d-9396-4836-9c72-8f20b990a90a + IAML + 240.0 + 19.0 + 0.0 + + + smi:local/40bd8152-308c-4fba-abfe-dbb7a91d2b9c + S + 240.0 + 19.0 + -0.22 + 0.0 + + + smi:local/93c01007-0a7d-4cce-b96e-7d9bdf4a58e4 + S + 205.0 + 25.0 + -0.19 + 0.0 + + + smi:local/838884f6-b8f1-41f7-b66b-d23cc2252429 + IAML + 205.0 + 25.0 + 0.0 + + + + + 0.6 + + ML + smi:local/170be4a4-e69b-4ceb-9c77-1ed7b97cdf0b + + VUW + + + + + + impulsive + P + undecidable + manual + + + + impulsive + S + undecidable + manual + + + + + IAML + undecidable + manual + + + + + impulsive + P + undecidable + manual + + + + + IAML + undecidable + manual + + + + + impulsive + P + undecidable + manual + + + + + IAML + undecidable + manual + + + + + impulsive + S + undecidable + manual + + + + + IAML + undecidable + manual + + + + + impulsive + P + undecidable + manual + + + + + impulsive + S + undecidable + manual + + + + + IAML + undecidable + manual + + + + + impulsive + P + undecidable + manual + + + + + IAML + undecidable + manual + + + + + impulsive + S + undecidable + manual + + + + + impulsive + S + undecidable + manual + + + + + IAML + undecidable + manual + + + + 1.8e-09 + + AML + point + m + + 0.08 + + smi:local/3125a781-3c67-425c-9ef0-030a97064cb9 + + ML + + + + 8.9e-09 + + AML + point + m + + 0.46 + + smi:local/22da2c22-b175-4954-a104-60f491d10a67 + + ML + + + + 1.09e-08 + + AML + point + m + + 0.232 + + smi:local/86907d3f-c111-46e5-b8e0-7b11aa6d033c + + ML + + + + 1e-09 + + AML + point + m + + 0.22 + + smi:local/895deea9-dd16-4c38-84f4-30178e3eb7a2 + + ML + + + + 3.1e-09 + + AML + point + m + + 0.14 + + smi:local/6dfaf394-7332-4893-8322-057d68ac8c3a + + ML + + + + 1.3e-09 + + AML + point + m + + 0.28 + + smi:local/0b22753d-9396-4836-9c72-8f20b990a90a + + ML + + + + 1e-09 + + AML + point + m + + 0.23 + + smi:local/838884f6-b8f1-41f7-b66b-d23cc2252429 + + ML + + + + diff --git a/eqcorrscan/tests/test_data/Test_detector.h5 b/eqcorrscan/tests/test_data/Test_detector.h5 new file mode 100644 index 000000000..0d9f3678a Binary files /dev/null and b/eqcorrscan/tests/test_data/Test_detector.h5 differ diff --git a/eqcorrscan/tests/test_data/day_vols/Y2012/R086.01/EORO.AF..SHZ.2012.086 b/eqcorrscan/tests/test_data/day_vols/Y2012/R086.01/EORO.AF..SHZ.2012.086 new file mode 100644 index 000000000..6b335b271 Binary files /dev/null and b/eqcorrscan/tests/test_data/day_vols/Y2012/R086.01/EORO.AF..SHZ.2012.086 differ diff --git a/eqcorrscan/tests/test_data/day_vols/Y2012/R086.01/WHYM.AF..SHZ.2012.086 b/eqcorrscan/tests/test_data/day_vols/Y2012/R086.01/WHYM.AF..SHZ.2012.086 new file mode 100644 index 000000000..ee575ee96 Binary files /dev/null and b/eqcorrscan/tests/test_data/day_vols/Y2012/R086.01/WHYM.AF..SHZ.2012.086 differ diff --git a/eqcorrscan/tests/test_data/expected_tutorial_detections.txt b/eqcorrscan/tests/test_data/expected_tutorial_detections.txt index 82a52ea64..3464c7250 100644 --- a/eqcorrscan/tests/test_data/expected_tutorial_detections.txt +++ b/eqcorrscan/tests/test_data/expected_tutorial_detections.txt @@ -1,24 +1,23 @@ Template name; Detection time (UTC); Number of channels; Channel list; Detection value; Threshold; Detection type -tutorial_template_3.ms; 2016-01-04T00:13:23.498399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.03043; 1.78002929688; corr -tutorial_template_3.ms; 2016-01-04T00:13:39.398399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 4.00244; 1.78002929688; corr -tutorial_template_3.ms; 2016-01-04T00:45:58.148399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 5.0; 1.78002929688; corr -tutorial_template_3.ms; 2016-01-04T01:21:42.748399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.53477; 1.78002929688; corr -tutorial_template_3.ms; 2016-01-04T01:48:52.948399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 3.67236; 1.78002929688; corr -tutorial_template_3.ms; 2016-01-04T02:17:23.898399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.20337; 1.78002929688; corr -tutorial_template_3.ms; 2016-01-04T03:30:07.898399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 1.84586; 1.78002929688; corr -tutorial_template_3.ms; 2016-01-04T09:22:14.048399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.95605; 1.78002929688; corr -tutorial_template_1.ms; 2016-01-04T01:57:21.398399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.50073; 1.81201171875; corr -tutorial_template_1.ms; 2016-01-04T02:01:03.098399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 1.88281; 1.81201171875; corr -tutorial_template_1.ms; 2016-01-04T02:10:47.898399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 5.0; 1.81201171875; corr -tutorial_template_2.ms; 2016-01-04T00:15:11.898399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 3.90381; 1.759765625; corr -tutorial_template_2.ms; 2016-01-04T00:17:02.098399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.35736; 1.759765625; corr -tutorial_template_2.ms; 2016-01-04T00:18:42.248399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.69165; 1.759765625; corr -tutorial_template_2.ms; 2016-01-04T00:23:15.148399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.11035; 1.759765625; corr -tutorial_template_2.ms; 2016-01-04T00:25:08.198399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 5.0; 1.759765625; corr -tutorial_template_2.ms; 2016-01-04T03:24:01.398399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.39648; 1.759765625; corr -tutorial_template_2.ms; 2016-01-04T09:21:42.998399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 3.37061; 1.759765625; corr -tutorial_template_2.ms; 2016-01-04T12:57:19.298399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.39505; 1.759765625; corr -tutorial_template_2.ms; 2016-01-04T14:01:08.798399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 3.09814; 1.759765625; corr -tutorial_template_0.ms; 2016-01-04T00:08:01.848399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 5.0; 2.13610839844; corr -tutorial_template_0.ms; 2016-01-04T00:49:00.398399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.14001; 2.13610839844; corr -tutorial_template_0.ms; 2016-01-04T16:29:43.298399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.30841; 2.13610839844; corr +tutorial_template_1.ms; 2016-01-04T00:15:11.898399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 3.35962; 1.73596191406; corr +tutorial_template_1.ms; 2016-01-04T00:17:02.098399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.20946; 1.73596191406; corr +tutorial_template_1.ms; 2016-01-04T00:18:42.248399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.15381; 1.73596191406; corr +tutorial_template_1.ms; 2016-01-04T00:23:15.148399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.27966; 1.73596191406; corr +tutorial_template_1.ms; 2016-01-04T00:25:08.198399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 4.64209; 1.73596191406; corr +tutorial_template_2.ms; 2016-01-04T00:13:23.498399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.50732; 1.77685546875; corr +tutorial_template_2.ms; 2016-01-04T00:13:39.398399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 4.11426; 1.77685546875; corr +tutorial_template_2.ms; 2016-01-04T00:45:58.148399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 4.66797; 1.77685546875; corr +tutorial_template_0.ms; 2016-01-04T00:08:01.848399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 4.81543; 2.16552734375; corr +tutorial_template_0.ms; 2016-01-04T00:49:00.398399Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.19128; 2.16552734375; corr +tutorial_template_1.ms; 2016-01-04T01:21:42.798394Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.3335; 1.763671875; corr +tutorial_template_1.ms; 2016-01-04T01:57:21.598394Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.82428; 1.763671875; corr +tutorial_template_2.ms; 2016-01-04T01:48:52.998394Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.74585; 1.80651855469; corr +tutorial_template_3.ms; 2016-01-04T02:01:03.048394Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 1.8984; 1.81433105469; corr +tutorial_template_3.ms; 2016-01-04T02:10:47.898394Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 3.86377; 1.81433105469; corr +tutorial_template_2.ms; 2016-01-04T02:17:23.898394Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.01324; 1.79795837402; corr +tutorial_template_1.ms; 2016-01-04T03:24:01.398394Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.36414; 1.75337219238; corr +tutorial_template_1.ms; 2016-01-04T09:21:42.998388Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 3.41602; 1.7763671875; corr +tutorial_template_2.ms; 2016-01-04T09:22:14.048388Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 3.2688; 1.78466796875; corr +tutorial_template_2.ms; 2016-01-04T12:57:19.298393Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.57898; 1.7080078125; corr +tutorial_template_1.ms; 2016-01-04T14:01:08.798396Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.84357; 1.6630859375; corr +tutorial_template_0.ms; 2016-01-04T16:29:43.298396Z; 5; [(u'BFZ', u'HZ'), (u'DVHZ', u'EZ'), (u'HOWZ', u'EZ'), (u'POWZ', u'EZ'), (u'CPWZ', u'EZ')]; 2.41583; 2.072265625; corr diff --git a/eqcorrscan/tests/test_data/known_alignment.csv b/eqcorrscan/tests/test_data/known_alignment.csv new file mode 100644 index 000000000..fa1905db1 --- /dev/null +++ b/eqcorrscan/tests/test_data/known_alignment.csv @@ -0,0 +1,23 @@ +-0.87, 0.0980298 +0.0, 1.0 +0.08, 0.221233 +0.785, -0.0750443 +0.345, 0.0558192 +-0.985, 0.0683634 +0.25, 0.067003 +0.45, 0.0658191 +0.435, -0.0566037 +0.735, -0.059616 +0.9, 0.0425985 +0.11, 0.0425395 +0.03, 0.0513985 +0.835, -0.0596345 +0.35, 0.0620863 +0.57, 0.063744 +0.865, -0.0469746 +0.745, -0.0427076 +-0.715, -0.0545739 +-0.485, 0.0668622 +0.5, -0.0743665 +0.885, -0.0618835 +-0.515, -0.0489677 diff --git a/eqcorrscan/tests/test_data/known_positive_alignment.csv b/eqcorrscan/tests/test_data/known_positive_alignment.csv new file mode 100644 index 000000000..d6020d60a --- /dev/null +++ b/eqcorrscan/tests/test_data/known_positive_alignment.csv @@ -0,0 +1,23 @@ +-0.87, 0.0980298 +0.0, 1.0 +0.08, 0.221233 +0.68, 0.0577464 +0.345, 0.0558192 +-0.985, 0.0683634 +0.25, 0.067003 +0.45, 0.0658191 +0.975, 0.0458894 +-0.845, 0.0461639 +0.9, 0.0425985 +0.11, 0.0425395 +0.03, 0.0513985 +0.735, 0.059203 +0.35, 0.0620863 +0.57, 0.063744 +0.485, 0.0376159 +0.685, 0.038656 +-0.625, 0.0519133 +-0.485, 0.0668622 +0.46, 0.0651429 +0.245, 0.0553979 +0.5, 0.0394219 diff --git a/eqcorrscan/tests/test_data/random_spiked.ms b/eqcorrscan/tests/test_data/random_spiked.ms new file mode 100644 index 000000000..b0b89b45c Binary files /dev/null and b/eqcorrscan/tests/test_data/random_spiked.ms differ diff --git a/eqcorrscan/tests/test_data/subspace/master_detector.h5 b/eqcorrscan/tests/test_data/subspace/master_detector.h5 new file mode 100644 index 000000000..afe06e762 Binary files /dev/null and b/eqcorrscan/tests/test_data/subspace/master_detector.h5 differ diff --git a/eqcorrscan/tests/test_data/subspace/master_detector_multi.h5 b/eqcorrscan/tests/test_data/subspace/master_detector_multi.h5 new file mode 100644 index 000000000..29276c730 Binary files /dev/null and b/eqcorrscan/tests/test_data/subspace/master_detector_multi.h5 differ diff --git a/eqcorrscan/tests/test_data/subspace/master_detector_multi_unaligned.h5 b/eqcorrscan/tests/test_data/subspace/master_detector_multi_unaligned.h5 new file mode 100644 index 000000000..4f877ac3b Binary files /dev/null and b/eqcorrscan/tests/test_data/subspace/master_detector_multi_unaligned.h5 differ diff --git a/eqcorrscan/tests/test_data/subspace/master_detector_unaligned.h5 b/eqcorrscan/tests/test_data/subspace/master_detector_unaligned.h5 new file mode 100644 index 000000000..4fa8adfc0 Binary files /dev/null and b/eqcorrscan/tests/test_data/subspace/master_detector_unaligned.h5 differ diff --git a/eqcorrscan/tests/test_data/subspace/stat_test_detector.h5 b/eqcorrscan/tests/test_data/subspace/stat_test_detector.h5 new file mode 100644 index 000000000..a75da60d5 Binary files /dev/null and b/eqcorrscan/tests/test_data/subspace/stat_test_detector.h5 differ diff --git a/eqcorrscan/tests/test_data/subspace/test_trace.ms b/eqcorrscan/tests/test_data/subspace/test_trace.ms new file mode 100644 index 000000000..3fb4fe327 Binary files /dev/null and b/eqcorrscan/tests/test_data/subspace/test_trace.ms differ diff --git a/eqcorrscan/detections/.run_start_20150513T0442.swp b/eqcorrscan/tests/test_data/temp1.ms similarity index 63% rename from eqcorrscan/detections/.run_start_20150513T0442.swp rename to eqcorrscan/tests/test_data/temp1.ms index fd31f337f..4cca6635f 100644 Binary files a/eqcorrscan/detections/.run_start_20150513T0442.swp and b/eqcorrscan/tests/test_data/temp1.ms differ diff --git a/eqcorrscan/tests/test_data/temp2.ms b/eqcorrscan/tests/test_data/temp2.ms new file mode 100644 index 000000000..e55390824 Binary files /dev/null and b/eqcorrscan/tests/test_data/temp2.ms differ diff --git a/eqcorrscan/tests/test_data/tutorial_template_0.ms b/eqcorrscan/tests/test_data/tutorial_template_0.ms index 597b6e8bf..e012b87ef 100644 Binary files a/eqcorrscan/tests/test_data/tutorial_template_0.ms and b/eqcorrscan/tests/test_data/tutorial_template_0.ms differ diff --git a/eqcorrscan/tests/test_data/tutorial_template_1.ms b/eqcorrscan/tests/test_data/tutorial_template_1.ms index 0c3bfe03a..267bf914c 100644 Binary files a/eqcorrscan/tests/test_data/tutorial_template_1.ms and b/eqcorrscan/tests/test_data/tutorial_template_1.ms differ diff --git a/eqcorrscan/tests/test_data/tutorial_template_2.ms b/eqcorrscan/tests/test_data/tutorial_template_2.ms index 2edae22a9..89b5a6d60 100644 Binary files a/eqcorrscan/tests/test_data/tutorial_template_2.ms and b/eqcorrscan/tests/test_data/tutorial_template_2.ms differ diff --git a/eqcorrscan/tests/test_data/tutorial_template_3.ms b/eqcorrscan/tests/test_data/tutorial_template_3.ms index d0c7a1087..24cfa7a00 100644 Binary files a/eqcorrscan/tests/test_data/tutorial_template_3.ms and b/eqcorrscan/tests/test_data/tutorial_template_3.ms differ diff --git a/eqcorrscan/tests/tutorials_test.py b/eqcorrscan/tests/tutorials_test.py index 8cafe33af..6b5e20c33 100644 --- a/eqcorrscan/tests/tutorials_test.py +++ b/eqcorrscan/tests/tutorials_test.py @@ -46,10 +46,6 @@ def test_match_filter(self): self.assertIn(detection.detect_time, expected_times, msg='Detection at %s is not in expected detections' % detection.detect_time) - # self.assertIn(round(detection.detect_val, 4), - # expected_correlations, - # msg='Detection with cross-correlation value %s not' + - # ' in expected detections' % detection.detect_val) if len(expected_detections) > len(tutorial_detections): # This is a fail but we are trying to debug actual_times = [tutorial_detection.detect_time @@ -58,12 +54,48 @@ def test_match_filter(self): self.assertIn(detection.detect_time, actual_times, msg='Expected detection at %s was not made' % detection.detect_time) - self.assertEqual(len(tutorial_detections), 23) + self.assertEqual(len(tutorial_detections), 22) # Cleanup the templates templates = glob.glob('tutorial_template_?.ms') for template in templates: os.remove(template) + def test_lag_calc(self): + """Test the lag calculation tutorial.""" + from eqcorrscan.tutorials.lag_calc import run_tutorial + + shift_len = 0.2 + min_mag = 4 + detections, picked_catalog, templates, template_names = \ + run_tutorial(min_magnitude=min_mag, shift_len=shift_len) + + self.assertEqual(len(picked_catalog), len(detections)) + self.assertEqual(len(detections), 8) + for event, detection in zip(picked_catalog, detections): + template = [t[0] for t in zip(templates, template_names) + if t[1] == detection.template_name][0] + template_stachans = [(tr.stats.station, tr.stats.channel) + for tr in template] + for pick in event.picks: + # First check that there is a template for the pick + stachan = (pick.waveform_id.station_code, + pick.waveform_id.channel_code) + self.assertTrue(stachan in template_stachans) + # Now check that the pick time is within +/- shift_len of + # The template + tr = template.select(station=stachan[0], channel=stachan[1])[0] + delay = tr.stats.starttime - \ + template.sort(['starttime'])[0].stats.starttime + re_picked_delay = pick.time - (detection.detect_time + delay) + self.assertTrue(abs(re_picked_delay) < shift_len) + + def test_subspace(self): + """Test the subspace tutorial.""" + from eqcorrscan.tutorials.subspace import run_tutorial + + detections = run_tutorial(plot=False) + self.assertEqual(len(detections), 2) + if __name__ == '__main__': """ Run tutorial tests diff --git a/eqcorrscan/tutorials/get_geonet_events.py b/eqcorrscan/tutorials/get_geonet_events.py new file mode 100644 index 000000000..ffd7671f2 --- /dev/null +++ b/eqcorrscan/tutorials/get_geonet_events.py @@ -0,0 +1,175 @@ +""" +Functions to aid downloading of GeoNet events into obspy catalog objects. + +:copyright: + Calum Chamberlain +:licence: + GNU Lesser General Public License, Version 3 + (https://www.gnu.org/copyleft/lesser.html) +""" + + +def get_geonet_ids(minlat, maxlat, minlon, maxlon, mindepth=None, + maxdepth=None, minmag=None, maxmag=None, + startdate=None, enddate=None): + """Generate quakesearch URL query and extract publicID from returned csv. + + :type minlat: float + :param minlat: Southern edge of bounding box. + :type maxlat: float + :param maxlat: Northern edge of bounding box. + :type minlon: float + :param minlon: Western edge of bounding box. + :type maxlon: float + :param maxlon: Eastern edge of bounding box. + :type mindepth: float + :param mindepth: Minimum depth for events (depth is positive down). + :type maxdepth: float + :param maxdepth: Maximum depth for events (depth is positive down). + :type minmag: float + :param minmag: Minimum magnitude to extract. + :type maxmag: float + :param maxmag: Maximum magnitude to extract. + :type startdate: obspy.core.UTCDateTime + :param startdate: Start to search. + :type enddate: obspy.core.UTCDateTime + :param enddate: End of search. + + :returns: list of str of event ids + """ + import csv + import sys + import io + if sys.version_info.major == 2: + from urllib2 import urlopen + else: + from urllib.request import urlopen + + base_url = "http://quakesearch.geonet.org.nz/services/1.0.0/csv?" + bbox_url = "bbox=" + ','.join([str(minlon), str(minlat), + str(maxlon), str(maxlat)]) + url = base_url + bbox_url + if mindepth: + url += "&mindepth=" + str(mindepth) + if maxdepth: + url += "&maxdepth=" + str(maxdepth) + if minmag: + url += "&minmag=" + str(minmag) + if maxmag: + url += "&maxmag=" + str(maxmag) + if startdate: + startdate_url = "&startdate=" + startdate.strftime('%Y-%m-%dT%H:%M:%S') + url += startdate_url + if enddate: + enddate_url = "&enddate=" + enddate.strftime('%Y-%m-%dT%H:%M:%S') + url += enddate_url + print("Downloading info from:") + print(url) + response = urlopen(url) + if sys.version_info.major == 3: + quake_search = csv.reader(io.TextIOWrapper(response)) + else: + quake_search = csv.reader(response) + + header = next(quake_search) + # Usually publicID is the first column, error if not true + if not header[0] == 'publicid': + raise IOError('Unexpected format, first column is not publicid') + event_ids = [row[0] for row in quake_search] + return event_ids + + +def _get_geonet_pubids(publicids, parallel=True): + """ + Get GeoNet events while they haven't included get_events in fdsn. + + :type publicids: list + :param publicids: List of public id numbers for events wanted. + + :returns: Catalog of events + :rtype: obspy.core.event.Catalog + """ + import obspy + if int(obspy.__version__.split('.')[0]) > 0: + from obspy.clients.fdsn import Client + else: + from obspy.fdsn import Client + from obspy.core.event import Catalog + from multiprocessing import Pool, cpu_count + + client = Client('GEONET') + catalog = Catalog() + # Multi-process this bad-boy + if not parallel: + for publicid in publicids: + catalog += _inner_get_event(publicid=publicid, client=client) + else: + pool = Pool(processes=cpu_count()) + results = [pool.apply_async(_inner_get_event, args=(publicid, client)) + for publicid in publicids] + pool.close() + cat_list = [p.get() for p in results] + pool.join() + for ev in cat_list: + catalog += ev + return catalog + + +def _inner_get_event(publicid, client): + """ + Inner loop for parallel processing + + :type publicid: str + :param publicid: GeoNet public ID + :return: catalog + """ + import warnings + from obspy.clients.fdsn.header import FDSNException + from obspy import read_events + try: + data_stream = client._download('http://quakeml.geonet.org.nz/' + + 'quakeml/1.2/' + publicid) + data_stream.seek(0, 0) + catalog = read_events(data_stream, format="quakeml") + data_stream.close() + except FDSNException: + warnings.warn('Unable to download event: ' + publicid) + return catalog + + +def get_geonet_events(minlat, maxlat, minlon, maxlon, mindepth=None, + maxdepth=None, minmag=None, maxmag=None, + startdate=None, enddate=None): + """Generate quakesearch URL query and extract publicID from returned csv. + + :type minlat: float + :param minlat: Southern edge of bounding box. + :type maxlat: float + :param maxlat: Northern edge of bounding box. + :type minlon: float + :param minlon: Western edge of bounding box. + :type maxlon: float + :param maxlon: Eastern edge of bounding box. + :type mindepth: float + :param mindepth: Minimum depth for events (depth is positive down). + :type maxdepth: float + :param maxdepth: Maximum depth for events (depth is positive down). + :type minmag: float + :param minmag: Minimum magnitude to extract. + :type maxmag: float + :param maxmag: Maximum magnitude to extract. + :type startdate: obspy.core.UTCDateTime + :param startdate: Start to search. + :type enddate: obspy.core.UTCDateTime + :param enddate: End of search. + + :returns: catalog of events + :rtype: obspy.core.event.Catalog + """ + pubids = get_geonet_ids(minlat=minlat, maxlat=maxlat, minlon=minlon, + maxlon=maxlon, mindepth=mindepth, + maxdepth=maxdepth, minmag=minmag, + maxmag=maxmag, startdate=startdate, + enddate=enddate) + catalog = _get_geonet_pubids(pubids) + return catalog diff --git a/eqcorrscan/tutorials/lag_calc.py b/eqcorrscan/tutorials/lag_calc.py new file mode 100644 index 000000000..dbad2b4cb --- /dev/null +++ b/eqcorrscan/tutorials/lag_calc.py @@ -0,0 +1,96 @@ +"""Tutorial to illustrate the lag_calc usage.""" + + +def run_tutorial(min_magnitude=2, shift_len=0.2, num_cores=4): + import obspy + if int(obspy.__version__.split('.')[0]) >= 1: + from obspy.clients.fdsn import Client + else: + from obspy.fdsn import Client + from obspy.core.event import Catalog + from obspy import UTCDateTime + from eqcorrscan.core import template_gen, match_filter, lag_calc + from eqcorrscan.utils import pre_processing, catalog_utils + + client = Client('NCEDC') + t1 = UTCDateTime(2004, 9, 28) + t2 = t1 + 86400 + print('Downloading catalog') + catalog = client.get_events(starttime=t1, endtime=t2, + minmagnitude=min_magnitude, + minlatitude=35.7, maxlatitude=36.1, + minlongitude=-120.6, maxlongitude=-120.2, + includearrivals=True) + # We don't need all the picks, lets take the information from the + # five most used stations - note that this is done to reduce computational + # costs. + catalog = catalog_utils.filter_picks(catalog, channels=['EHZ'], + top_n_picks=5) + print('Generating templates') + templates = template_gen.from_client(catalog=catalog, client_id='NCEDC', + lowcut=2.0, highcut=9.0, + samp_rate=50.0, filt_order=4, + length=3.0, prepick=0.15, + swin='all', process_len=3600) + start_time = UTCDateTime(2004, 9, 28, 17) + end_time = UTCDateTime(2004, 9, 28, 20) + process_len = 1800 + chunks = [] + chunk_start = start_time + while chunk_start < end_time: + chunk_end = chunk_start + process_len + if chunk_end > end_time: + chunk_end = end_time + chunks.append((chunk_start, chunk_end)) + chunk_start += process_len + + all_detections = [] + picked_catalog = Catalog() + template_names = [str(template[0].stats.starttime) + for template in templates] + for t1, t2 in chunks: + print('Downloading and processing for start-time: %s' % t1) + # Download and process the data + bulk_info = [(tr.stats.network, tr.stats.station, '*', + tr.stats.channel[0] + 'H' + tr.stats.channel[1], + t1, t2) for tr in templates[0]] + # Just downloading a chunk of data + st = client.get_waveforms_bulk(bulk_info) + st.merge(fill_value='interpolate') + st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, + filt_order=4, samp_rate=50.0, + debug=0, num_cores=num_cores) + detections = match_filter.match_filter(template_names=template_names, + template_list=templates, + st=st, threshold=8.0, + threshold_type='MAD', + trig_int=6.0, plotvar=False, + plotdir='.', cores=num_cores) + # Extract unique detections from set. + unique_detections = [] + for master in detections: + keep = True + for slave in detections: + if not master == slave and\ + abs(master.detect_time - slave.detect_time) <= 1.0: + # If the events are within 1s of each other then test which + # was the 'best' match, strongest detection + if not master.detect_val > slave.detect_val: + keep = False + break + if keep: + unique_detections.append(master) + all_detections += unique_detections + + picked_catalog += lag_calc.lag_calc(detections=unique_detections, + detect_data=st, + template_names=template_names, + templates=templates, + shift_len=shift_len, min_cc=0.5, + interpolate=True, plot=False) + # Return all of this so that we can use this function for testing. + return all_detections, picked_catalog, templates, template_names + +if __name__ == '__main__': + from multiprocessing import cpu_count + run_tutorial(min_magnitude=4, num_cores=cpu_count()) diff --git a/eqcorrscan/tutorials/match_filter.py b/eqcorrscan/tutorials/match_filter.py index 853bea4e4..a5a8b2a08 100644 --- a/eqcorrscan/tutorials/match_filter.py +++ b/eqcorrscan/tutorials/match_filter.py @@ -12,6 +12,7 @@ def run_tutorial(plot=False): from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter import glob + from multiprocessing import cpu_count # This import section copes with namespace changes between obspy versions import obspy @@ -38,40 +39,47 @@ def run_tutorial(plot=False): # Get a unique list of stations stations = list(set(stations)) - # We are going to look for detections on the day of our template, however, to - # generalize, we will write a loop through the days between our templates, in - # this case that is only one day. - - template_days = [] - for template in templates: - template_days.append(template[0].stats.starttime.date) - template_days = sorted(template_days) - kdays = (template_days[-1] - template_days[0]).days + 1 + # We will loop through the data chunks at a time, these chunks can be any + # size, in general we have used 1 day as our standard, but this can be + # as short as five minutes (for MAD thresholds) or shorter for other + # threshold metrics. However the chunk size should be the same as your + # template process_len. + + # You should test different parameters!!! + start_time = UTCDateTime(2016, 1, 4) + end_time = UTCDateTime(2016, 1, 5) + process_len = 3600 + chunks = [] + chunk_start = start_time + while chunk_start < end_time: + chunk_end = chunk_start + process_len + if chunk_end > end_time: + chunk_end = end_time + chunks.append((chunk_start, chunk_end)) + chunk_start += process_len unique_detections = [] + detections = [] - for i in range(kdays): - t1 = UTCDateTime(template_days[0]) + (86400 * i) - t2 = t1 + 86400 + # Set up a client to access the GeoNet database + client = Client("GEONET") + # Note that these chunks do not rely on each other, and could be paralleled + # on multiple nodes of a distributed cluster, see the SLURM tutorial for + # an example of this. + for t1, t2 in chunks: # Generate the bulk information to query the GeoNet database bulk_info = [] for station in stations: bulk_info.append(('NZ', station[0], '*', station[1][0] + 'H' + station[1][-1], t1, t2)) - # Set up a client to access the GeoNet database - client = Client("GEONET") - # Note this will take a little while. print('Downloading seismic data, this may take a while') st = client.get_waveforms_bulk(bulk_info) # Merge the stream, it will be downloaded in chunks st.merge(fill_value='interpolate') - # Work out what data we actually have to cope with possible lost data - stations = list(set([tr.stats.station for tr in st])) - # Set how many cores we want to parallel across, we will set this to four # as this is the number of templates, if your machine has fewer than four # cores/CPUs the multiprocessing will wait until there is a free core. @@ -79,42 +87,46 @@ def run_tutorial(plot=False): # increase in speed as only detections for each template are computed in # parallel. It may also slow your processing by using more memory than # needed, to the extent that swap may be filled. - ncores = 4 + if cpu_count() < 4: + ncores = cpu_count() + else: + ncores = 4 # Pre-process the data to set frequency band and sampling rate # Note that this is, and MUST BE the same as the parameters used for the # template creation. print('Processing the seismic data') - st = pre_processing.dayproc(st, lowcut=2.0, highcut=9.0, - filt_order=4, samp_rate=20.0, - debug=0, starttime=t1, num_cores=ncores) + st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, + filt_order=4, samp_rate=20.0, + debug=2, num_cores=ncores, starttime=t1, + endtime=t2) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection - detections = match_filter.match_filter(template_names=template_names, - template_list=templates, - st=st, threshold=8.0, - threshold_type='MAD', - trig_int=6.0, plotvar=plot, - plotdir='.', cores=ncores, - tempdir=False, debug=1, - plot_format='jpg') - - # Now lets try and work out how many unique events we have just to compare - # with the GeoNet catalog of 20 events on this day in this sequence - for master in detections: - keep = True - for slave in detections: - if not master == slave and\ - abs(master.detect_time - slave.detect_time) <= 1.0: - # If the events are within 1s of each other then test which - # was the 'best' match, strongest detection - if not master.detect_val > slave.detect_val: - keep = False - break - if keep: - unique_detections.append(master) + detections += match_filter.match_filter(template_names=template_names, + template_list=templates, + st=st, threshold=8.0, + threshold_type='MAD', + trig_int=6.0, plotvar=plot, + plotdir='.', cores=ncores, + tempdir=False, debug=1, + plot_format='jpg') + + # Now lets try and work out how many unique events we have just to compare + # with the GeoNet catalog of 20 events on this day in this sequence + for master in detections: + keep = True + for slave in detections: + if not master == slave and\ + abs(master.detect_time - slave.detect_time) <= 1.0: + # If the events are within 1s of each other then test which + # was the 'best' match, strongest detection + if not master.detect_val > slave.detect_val: + keep = False + break + if keep: + unique_detections.append(master) print('We made a total of ' + str(len(unique_detections)) + ' detections') diff --git a/eqcorrscan/tutorials/subspace.py b/eqcorrscan/tutorials/subspace.py new file mode 100644 index 000000000..73b22e90c --- /dev/null +++ b/eqcorrscan/tutorials/subspace.py @@ -0,0 +1,98 @@ +""" +Advanced subspace tutorial to show some of the capabilities of the method. + +This example uses waveforms from a known earthquake sequence (in the Wairarapa +region north of Wellington, New Zealand). The catalogue locations etc can +be downloaded from this link: + +http://quakesearch.geonet.org.nz/services/1.0.0/csv?bbox=175.37956,-40.97912,175.53097,-40.84628&startdate=2015-7-18T2:00:00&enddate=2016-7-18T3:00:00 + +""" + + +def run_tutorial(plot=False, multiplex=True, return_streams=False): + """ + Run the tutorial. + + :return: detections + """ + # We are going to use data from the GeoNet (New Zealand) catalogue. GeoNet + # do not implement the full FDSN system yet, so we have a hack to get + # around this. It is not strictly part of EQcorrscan, so we haven't + # included it here, but you can find it in the tutorials directory of the + # github repository + import obspy + if int(obspy.__version__.split('.')[0]) >= 1: + from obspy.clients.fdsn import Client + else: + from obspy.fdsn import Client + from eqcorrscan.tutorials.get_geonet_events import get_geonet_events + from obspy import UTCDateTime + from eqcorrscan.utils.catalog_utils import filter_picks + from eqcorrscan.utils.clustering import space_cluster + from eqcorrscan.core import subspace + + cat = get_geonet_events(minlat=-40.98, maxlat=-40.85, minlon=175.4, + maxlon=175.5, startdate=UTCDateTime(2016, 5, 1), + enddate=UTCDateTime(2016, 5, 20)) + # This gives us a catalog of events - it takes a while to download all + # the information, so give it a bit! + # We will generate a five station, multi-channel detector. + cat = filter_picks(catalog=cat, top_n_picks=5) + stachans = list(set([(pick.waveform_id.station_code, + pick.waveform_id.channel_code) for event in cat + for pick in event.picks])) + # In this tutorial we will only work on one cluster, defined spatially. + # You can work on multiple clusters, or try to whole set. + clusters = space_cluster(catalog=cat, d_thresh=2, show=False) + # We will work on the largest cluster + cluster = sorted(clusters, key=lambda c: len(c))[-1] + # This cluster contains 32 events, we will now download a trim the + # waveforms. Note that each chanel must start at the same time and be the + # same length for multiplexing. If not multiplexing EQcorrscan will + # maintain the individual differences in time between channels and delay + # the detection statistics by that amount before stacking and detection. + client = Client('GEONET') + design_set = [] + for event in cluster: + t1 = event.origins[0].time + t2 = t1 + 25 + bulk_info = [] + for station, channel in stachans: + bulk_info.append(('NZ', station, '*', channel[0:2] + '?', t1, t2)) + st = client.get_waveforms_bulk(bulk=bulk_info) + st.trim(t1, t2) + design_set.append(st) + # Construction of the detector will process the traces, then align them, + # before multiplexing. + detector = subspace.Detector() + detector.construct(streams=design_set, lowcut=2.0, highcut=9.0, + filt_order=4, sampling_rate=20, multiplex=multiplex, + name='Wairarapa1', align=True, reject=0.2, + shift_len=6, plot=plot).partition(9) + if plot: + detector.plot() + # We also want the continuous stream to detect in. + t1 = UTCDateTime(2016, 5, 11, 19) + t2 = UTCDateTime(2016, 5, 11, 20) + # We are going to look in a single hour just to minimize cost, but you can \ + # run for much longer. + bulk_info = [('NZ', stachan[0], '*', + stachan[1][0] + '?' + stachan[1][-1], + t1, t2) for stachan in detector.stachans] + st = client.get_waveforms_bulk(bulk_info) + st.merge().detrend('simple').trim(starttime=t1, endtime=t2) + # We set a very low threshold because the detector is not that great, we + # haven't aligned it particularly well - however, at this threshold we make + # two real detections. + detections, det_streams = detector.detect(st=st, threshold=0.005, + trig_int=2, + extract_detections=True) + if return_streams: + return detections, det_streams + else: + return detections + + +if __name__ == '__main__': + run_tutorial() diff --git a/eqcorrscan/tutorials/template_creation.py b/eqcorrscan/tutorials/template_creation.py index 12d237687..668653431 100644 --- a/eqcorrscan/tutorials/template_creation.py +++ b/eqcorrscan/tutorials/template_creation.py @@ -9,7 +9,7 @@ def mktemplates(network_code='GEONET', '2016p008194'], plot=True): """Functional wrapper to make templates""" - from collections import Counter + from eqcorrscan.utils.catalog_utils import filter_picks from eqcorrscan.core import template_gen # This import section copes with namespace changes between obspy versions @@ -49,31 +49,7 @@ def mktemplates(network_code='GEONET', # We don't need all the picks, lets take the information from the # five most used stations - note that this is done to reduce computational # costs. - all_picks = [] - for event in catalog: - all_picks += [(pick.waveform_id.station_code, - pick.waveform_id.channel_code) for pick in event.picks] - # Python 3.x and python 2.7 do not sort in the same way, this is a cludge - # to work around that... - counted = Counter(all_picks).most_common() - # Going to take an initial set that all have atleast 1 less pick than the - # highest pick-count... - all_picks = [] - for i in range(counted[0][1]): - highest = [item[0] for item in counted if item[1] >= counted[0][1] - i] - # Sort them by alphabetical order in station - highest = sorted(highest, key=lambda tup: tup[0]) - all_picks += highest - if len(all_picks) > 5: - all_picks = all_picks[0:5] - break - - for event in catalog: - if len(event.picks) == 0: - raise IOError('No picks found') - event.picks = [pick for pick in event.picks - if (pick.waveform_id.station_code, - pick.waveform_id.channel_code) in all_picks] + catalog = filter_picks(catalog, top_n_picks=5) # Now we can generate the templates templates = template_gen.from_client(catalog=catalog, @@ -81,9 +57,10 @@ def mktemplates(network_code='GEONET', lowcut=2.0, highcut=9.0, samp_rate=20.0, filt_order=4, length=3.0, prepick=0.15, - swin='all', debug=0, plot=plot) + swin='all', process_len=3600, + debug=0, plot=plot) - # We now have a series of templates! Using Obspys Stream.write() method we + # We now have a series of templates! Using Obspy's Stream.write() method we # can save these to disk for later use. We will do that now for use in the # following tutorials. for i, template in enumerate(templates): diff --git a/eqcorrscan/utils/__init__.py b/eqcorrscan/utils/__init__.py index b8f112968..f52207536 100644 --- a/eqcorrscan/utils/__init__.py +++ b/eqcorrscan/utils/__init__.py @@ -32,9 +32,11 @@ import warnings -__all__ = ['sfile_util', 'pre_processing', 'findpeaks', 'plotting', - 'mag_calc', 'catalog_to_dd', 'clustering', - 'seismo_logs', 'stacking', 'synth_seis', 'timer'] +__all__ = ['archive_read', 'catalog_to_dd', 'catalog_utils', + 'clustering', 'despike', 'findpeaks', 'mag_calc', + 'parameters', 'picker', 'plotting', 'pre_processing', + 'sac_util', 'seismo_logs', 'sfile_util', 'stacking', + 'synth_seis', 'timer', 'trigger'] # Cope with changes to name-space to remove most of the camel-case _import_map = { diff --git a/eqcorrscan/utils/archive_read.py b/eqcorrscan/utils/archive_read.py index 8e1bc5ee1..80c60aa89 100644 --- a/eqcorrscan/utils/archive_read.py +++ b/eqcorrscan/utils/archive_read.py @@ -19,7 +19,7 @@ from __future__ import unicode_literals -def read_data(archive, arc_type, day, stachans): +def read_data(archive, arc_type, day, stachans, length=86400): """ Function to read the appropriate data from your archive for your selected \ day. @@ -27,14 +27,17 @@ def read_data(archive, arc_type, day, stachans): :type archive: str :param archive: The archive source - if arc_type is seishub, this should \ be a url, if the arc_type is FDSN then this can be either a url or a \ - known obspy client. + known obspy client. If arc_type is day_vols, then this is the path \ + to the top directory. :type arc_type: str - :param arc_type: The type of archive, can be: seishub, FDSN, day_vols + :param arc_type: The type of archive, can be: seishub, FDSN, day_volves :type day: datetime.date :param day: Date to retrieve data for :type stachans: list :param stachans: List of tuples of Stations and channels to try and get, will not fail if stations are not available, but will warn. + :type length: float + :param length: Data length to extract in seconds, defaults to 1 day. :returns: obspy.core.stream.Stream @@ -70,8 +73,22 @@ def read_data(archive, arc_type, day, stachans): 1 Trace(s) in Stream: NZ.FOZ.10.HHZ | 2012-03-25T23:59:57.018393Z - 2012-03-27T00:00:00.688393Z | 100.0 Hz, 8640368 samples + + .. rubric:: Example, local day-volumes + + >>> from eqcorrscan.utils.archive_read import read_data + >>> from obspy import UTCDateTime + >>> t1 = UTCDateTime(2012, 3, 26) + >>> stachans = [('WHYM', 'SHZ'), ('EORO', 'SHZ')] + >>> st = read_data('eqcorrscan/tests/test_data/day_vols', 'day_vols', + ... t1, stachans) + >>> print(st) + 2 Trace(s) in Stream: + AF.WHYM..SHZ | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.000000Z | 1.0 Hz, 86400 samples + AF.EORO..SHZ | 2012-03-26T00:00:00.000000Z - 2012-03-26T23:59:59.000000Z | 1.0 Hz, 86400 samples """ import obspy + import os from obspy.clients.fdsn.header import FDSNException if arc_type.lower() == 'seishub': if int(obspy.__version__.split('.')[0]) >= 1: @@ -99,7 +116,7 @@ def read_data(archive, arc_type, day, stachans): available_stations_map = available_stations if station_map not in available_stations_map: msg = ' '.join([station[0], station_map[1], 'is not available for', - day.strftime('%d/%m/%Y')]) + day.strftime('%Y/%m/%d')]) warnings.warn(msg) continue if arc_type.lower() in ['seishub', 'fdsn']: @@ -109,7 +126,7 @@ def read_data(archive, arc_type, day, stachans): location='*', channel=station_map[1], starttime=UTCDateTime(day), - endtime=UTCDateTime(day) + 86400) + endtime=UTCDateTime(day) + length) except FDSNException: warnings.warn('No data on server despite station being ' + 'available...') @@ -121,7 +138,7 @@ def read_data(archive, arc_type, day, stachans): 'R%j.01')), station_map[0], station_map[1]) for wavfile in wavfiles: - st += read(wavfile) + st += read(wavfile, starttime=day, endtime=day + length) st = obspy.Stream(st) return st @@ -142,7 +159,6 @@ def _get_station_file(path_name, station, channel, debug=0): from multiprocessing import Pool, cpu_count pool = Pool(processes=cpu_count()) wavfiles = glob.glob(path_name + os.sep + '*') - out_files = [] results = [pool.apply_async(_parallel_checking_loop, args=(wavfile, station, channel, debug)) @@ -184,12 +200,15 @@ def _check_available_data(archive, arc_type, day): ..note:: Currently the seishub options are untested. """ from obspy import read, UTCDateTime + import glob + import os + available_stations = [] if arc_type.lower() == 'day_vols': - wavefiles = glob.glob(os.path.join(archive, day.strftime('%Y'), - day.strftime('%j.01'), '*')) + wavefiles = glob.glob(os.path.join(archive, day.strftime('Y%Y'), + day.strftime('R%j.01'), '*')) for wavefile in wavefiles: - header = read(wavfile, headonly=True) + header = read(wavefile, headonly=True) available_stations.append((header[0].stats.station, header[0].stats.channel)) elif arc_type.lower() == 'seishub': @@ -214,5 +233,5 @@ def _check_available_data(archive, arc_type, day): if __name__ == '__main__': - import doctest - doctest.testmod() \ No newline at end of file + import doctest + doctest.testmod() \ No newline at end of file diff --git a/eqcorrscan/utils/catalog_to_dd.py b/eqcorrscan/utils/catalog_to_dd.py index 2a5a58404..2def3ee0a 100644 --- a/eqcorrscan/utils/catalog_to_dd.py +++ b/eqcorrscan/utils/catalog_to_dd.py @@ -66,14 +66,20 @@ def _av_weight(W1, W2): :returns: str + .. rubric:: Example >>> _av_weight(1, 4) '0.3750' >>> _av_weight(0, 0) '1.0000' + >>> _av_weight(' ', ' ') + '1.0000' + >>> _av_weight(-9, 0) + '0.5000' + >>> _av_weight(1, -9) + '0.3750' """ import warnings - # print('Weight 1: ' + str(W1) + ', weight 2: ' + str(W2)) if str(W1) in [' ', '']: W1 = 1 elif str(W1) in ['-9', '9', '9.0', '-9.0']: @@ -194,10 +200,11 @@ def write_event(catalog): for i, event in enumerate(catalog): evinfo = event.origins[0] Mag_1 = event.magnitudes[0].mag or ' ' - if event.origins[0].time_errors: - t_RMS = event.origins[0].time_errors.Time_Residual_RMS or ' ' + if 'time_errors' in event.origins[0]: + t_RMS = event.origins[0].time_errors.Time_Residual_RMS or 0.0 else: - t_RMS = ' ' + print('No time residual in header') + t_RMS = 0.0 f.write(str(evinfo.time.year) + str(evinfo.time.month).zfill(2) + str(evinfo.time.day).zfill(2) + ' ' + str(evinfo.time.hour).rjust(2) + @@ -372,9 +379,9 @@ def write_correlations(event_list, wavbase, extract_len, pre_pick, shift_len, :type lowcut: float :param lowcut: Lowcut in Hz - default=1.0 :type highcut: float - :param highcut: Highcut in Hz - deafult=10.0 + :param highcut: Highcut in Hz - default=10.0 :type max_sep: float - :param max_sep: Maximum seperation between event pairs in km + :param max_sep: Maximum separation between event pairs in km :type min_link: int :param min_link: Minimum links for an event to be paired :type cc_thresh: float @@ -394,6 +401,10 @@ def write_correlations(event_list, wavbase, extract_len, pre_pick, shift_len, unassociated event objects and wavefiles. As such if you have events \ with associated wavefiles you are advised to generate Sfiles for each \ event using the sfile_util module prior to this step. + + .. note:: There is no provision to taper waveforms within these functions, \ + if you desire this functionality, you should apply the taper before \ + calling this. Note the obspy.Trace.taper functions. """ import obspy if int(obspy.__version__.split('.')[0]) > 0: @@ -444,10 +455,8 @@ def write_correlations(event_list, wavbase, extract_len, pre_pick, shift_len, slave_event_id = event_list[j][0] slave_wavefiles = sfile_util.readwavename(slave_sfile) try: - # slavestream=read(wavbase+'/*/*/'+slave_wavefiles[0]) slavestream = read(wavbase + os.sep + slave_wavefiles[0]) except: - # print(slavestream) raise IOError('No wavefile found: ' + slave_wavefiles[0] + ' ' + slave_sfile) if len(slave_wavefiles) > 1: @@ -561,12 +570,10 @@ def write_correlations(event_list, wavbase, extract_len, pre_pick, shift_len, ' ' + pick.phase_hint + '\n' if debug > 3: print(event_text) - # links+=1 else: print('cc too low: %s' % cc) corr_list.append(cc * cc) except: - # Should warn here msg = "Couldn't compute correlation correction" warnings.warn(msg) continue diff --git a/eqcorrscan/utils/catalog_utils.py b/eqcorrscan/utils/catalog_utils.py new file mode 100644 index 000000000..acec29bdd --- /dev/null +++ b/eqcorrscan/utils/catalog_utils.py @@ -0,0 +1,146 @@ +""" +Helper functions for common handling tasks for catalog objects. + +.. note:: These functions are tools to aid simplification of general scripts, \ + they do not cover all use cases, however if you have a use case you want \ + to see here, then let the authors know, or implement it yourself and \ + contribute it back to the project. + +:copyright: + Calum Chamberlain, Chet Hopp. + +:license: + GNU Lesser General Public License, Version 3 + (https://www.gnu.org/copyleft/lesser.html) +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +def filter_picks(catalog, stations=None, channels=None, networks=None, + locations=None, top_n_picks=None): + """ + Filter events in the catalog based on a number of parameters. + + :param catalog: Catalog to filter. + :type catalog: obspy.core.event.Catalog + :param stations: List for stations to keep picks from. + :type stations: list + :param channels: List of channels to keep picks from. + :type channels: list + :param networks: List of networks to keep picks from. + :type networks: list + :param locations: List of location codes to use + :type locations: list + :param top_n_picks: Filter only the top N most used station-channel pairs. + :type top_n_picks: int + + :return: Filtered Catalog - if events are left with no picks, they are \ + removed from the catalog. + :rtype: obspy.core.event.Catalog + + .. note:: Will filter first by station, then by channel, then by network, \ + if using top_n_picks, this will be done last, after the other filters \ + have been applied. + + .. note:: Doesn't work in place on the catalog, your input catalog will be \ + safe unless you overwrite it. + + .. note:: Doesn't expand wildcard characters. + + .. rubric:: Example + + >>> from obspy.clients.fdsn import Client + >>> from eqcorrscan.utils.catalog_utils import filter_picks + >>> from obspy import UTCDateTime + >>> client = Client('NCEDC') + >>> t1 = UTCDateTime(2004, 9, 28) + >>> t2 = t1 + 86400 + >>> catalog = client.get_events(starttime=t1, endtime=t2, minmagnitude=3, + ... minlatitude=35.7, maxlatitude=36.1, + ... minlongitude=-120.6, maxlongitude=-120.2, + ... includearrivals=True) + >>> print(len(catalog)) + 12 + >>> filtered_catalog = filter_picks(catalog, stations=['BMS', 'BAP', + ... 'PAG', 'PAN', + ... 'PBI', 'PKY', + ... 'YEG', 'WOF']) + >>> print(len(filtered_catalog)) + 12 + >>> stations = [] + >>> for event in filtered_catalog: + ... for pick in event.picks: + ... stations.append(pick.waveform_id.station_code) + >>> print(sorted(list(set(stations)))) + ['BAP', 'BMS', 'PAG', 'PAN', 'PBI', 'PKY', 'WOF', 'YEG'] + """ + from collections import Counter + from obspy.core.event import Catalog + + # Don't work in place on the catalog + filtered_catalog = catalog.copy() + + if stations: + for event in filtered_catalog: + if len(event.picks) == 0: + continue + event.picks = [pick for pick in event.picks + if pick.waveform_id.station_code in stations] + if channels: + for event in filtered_catalog: + if len(event.picks) == 0: + continue + event.picks = [pick for pick in event.picks + if pick.waveform_id.channel_code in channels] + if networks: + for event in filtered_catalog: + if len(event.picks) == 0: + continue + event.picks = [pick for pick in event.picks + if pick.waveform_id.network_code in networks] + if locations: + for event in filtered_catalog: + if len(event.picks) == 0: + continue + event.picks = [pick for pick in event.picks + if pick.waveform_id.location_code in locations] + if top_n_picks: + all_picks = [] + for event in filtered_catalog: + all_picks += [(pick.waveform_id.station_code, + pick.waveform_id.channel_code) + for pick in event.picks] + counted = Counter(all_picks).most_common() + all_picks = [] + # Hack around sorting the counter object: Py 2 does it differently to 3 + for i in range(counted[0][1]): + highest = [item[0] for item in counted + if item[1] >= counted[0][1] - i] + # Sort them by alphabetical order in station + highest = sorted(highest, key=lambda tup: tup[0]) + for stachan in highest: + if stachan not in all_picks: + all_picks.append(stachan) + if len(all_picks) > top_n_picks: + all_picks = all_picks[0:top_n_picks] + break + for event in filtered_catalog: + if len(event.picks) == 0: + continue + event.picks = [pick for pick in event.picks + if (pick.waveform_id.station_code, + pick.waveform_id.channel_code) in all_picks] + # Remove events without picks + tmp_catalog = Catalog() + for event in filtered_catalog: + if len(event.picks) > 0: + tmp_catalog.append(event) + + return tmp_catalog + + +if __name__ == "__main__": + import doctest + doctest.testmod() diff --git a/eqcorrscan/utils/clustering.py b/eqcorrscan/utils/clustering.py index b72174f28..cd1374a95 100644 --- a/eqcorrscan/utils/clustering.py +++ b/eqcorrscan/utils/clustering.py @@ -16,35 +16,48 @@ import warnings -def cross_chan_coherence(st1, st2, i=0): +def cross_chan_coherence(st1, st2, allow_shift=False, shift_len=0.2, i=0): """ Calculate cross-channel coherency. Determine the cross-channel coherency between two streams of \ multichannel seismic data. - :type st1: obspy Stream + :type st1: obspy.core.stream.Stream :param st1: Stream one - :type st2: obspy Stream + :type st2: obspy.core.stream.Stream :param st2: Stream two + :type allow_shift: bool + :param allow_shift: Allow shift? + :type shift_len: int + :param shift_len: Samples to shift :type i: int :param i: index used for parallel async processing, returned unaltered :returns: cross channel coherence, float - normalized by number of\ - channels, if i, returns tuple of (cccoh, i) where i is int, as intput. + channels, if i, returns tuple of (cccoh, i) where i is int, as input. """ from eqcorrscan.core.match_filter import normxcorr2 + from obspy.signal.cross_correlation import xcorr cccoh = 0.0 kchan = 0 - for tr in st1: - tr1 = tr.data - # Assume you only have one waveform for each channel - tr2 = st2.select(station=tr.stats.station, - channel=tr.stats.channel) - if tr2: - cccoh += normxcorr2(tr1, tr2[0].data)[0][0] - kchan += 1 + if allow_shift: + for tr in st1: + tr2 = st2.select(station=tr.stats.station, channel=tr.stats.channel) + if tr2: + index, corval = xcorr(tr, tr2[0], shift_len) + cccoh += corval + kchan += 1 + else: + for tr in st1: + tr1 = tr.data + # Assume you only have one waveform for each channel + tr2 = st2.select(station=tr.stats.station, + channel=tr.stats.channel) + if tr2: + cccoh += normxcorr2(tr1, tr2[0].data)[0][0] + kchan += 1 if kchan: cccoh = cccoh / kchan return (cccoh, i) @@ -53,7 +66,7 @@ def cross_chan_coherence(st1, st2, i=0): return (0, i) -def distance_matrix(stream_list, cores=1): +def distance_matrix(stream_list, allow_shift=False, shift_len=0, cores=1): """ Compute distance matrix for waveforms based on cross-correlations. @@ -64,6 +77,10 @@ def distance_matrix(stream_list, cores=1): :type stream_list: List of obspy.Streams :param stream_list: List of the streams to compute the distance matrix for + :type allow_shift: bool + :param allow_shift: To allow templates to shift or not? + :type shift_len: int + :param shift_len: How many samples for templates to shift in time :type cores: int :param cores: Number of cores to parallel process using, defaults to 1. @@ -80,6 +97,8 @@ def distance_matrix(stream_list, cores=1): # Parallel processing results = [pool.apply_async(cross_chan_coherence, args=(master, stream_list[j], + allow_shift, + shift_len, j)) for j in range(len(stream_list))] pool.close() @@ -102,7 +121,8 @@ def distance_matrix(stream_list, cores=1): return dist_mat -def cluster(template_list, show=True, corr_thresh=0.3, save_corrmat=False, +def cluster(template_list, show=True, corr_thresh=0.3, allow_shift=False, + shift_len=0, save_corrmat=False, cores='all', debug=1): """ Cluster template waveforms based on average correlations. @@ -124,11 +144,15 @@ def cluster(template_list, show=True, corr_thresh=0.3, save_corrmat=False, :param show: plot linkage on screen if True, defaults to True :type corr_thresh: float :param corr_thresh: Cross-channel correlation threshold for grouping + :type allow_shift: bool + :param allow_shift: Whether to allow the templates to shift when correlating + :type shift_len: int + :param shift_len: How many samples to allow the templates to shift in time :type save_corrmat: bool :param save_corrmat: If True will save the distance matrix to \ dist_mat.npy in the local directory. :type cores: int - :param cores: numebr of cores to use when computing the distance matrix, \ + :param cores: number of cores to use when computing the distance matrix, \ defaults to 'all' which will work out how many cpus are available \ and hog them. :type debug: int @@ -151,7 +175,7 @@ def cluster(template_list, show=True, corr_thresh=0.3, save_corrmat=False, # Compute the distance matrix if debug >= 1: print('Computing the distance matrix using '+str(num_cores)+' cores') - dist_mat = distance_matrix(stream_list, cores=num_cores) + dist_mat = distance_matrix(stream_list, allow_shift, shift_len, cores=num_cores) if save_corrmat: np.save('dist_mat.npy', dist_mat) if debug >= 1: @@ -274,24 +298,37 @@ def group_delays(stream_list): return groups -def SVD(stream_list): +def SVD(stream_list, full=False): + """ + Depreciated. Use svd. + """ + warnings.warn('Depreciated, use svd instead.') + return svd(stream_list=stream_list, full=full) + + +def svd(stream_list, full=False): """ Compute the SVD of a number of templates. - Returns the \ - singular vectors and singular values of the templates. + Returns the singular vectors and singular values of the templates. - :type stream_list: List of Obspy.Stream + :type stream_list: List of :class: obspy.Stream :param stream_list: List of the templates to be analysed + :type full: bool + :param full: Whether to compute the full input vector matrix or not. - :return: SVector(list of ndarray), SValues(list) for each channel, \ - Uvalues(list of ndarray) for each channel, \ + :return: SValues(list) for each channel, SVectors(list of ndarray), \ + UVectors(list of ndarray) for each channel, \ stachans, List of String (station.channel) .. note:: We recommend that you align the data before computing the \ SVD, e.g., the P-arrival on all templates for the same channel \ should appear at the same time in the trace. See the \ stacking.align_traces function for a way to do this. + + .. note:: Uses the numpy.linalg.svd function, their U, s and V are mapped \ + to UVectors, SValues and SVectors respectively. Their V (and ours) \ + corresponds to V.H. """ # Convert templates into ndarrays for each channel # First find all unique channels: @@ -300,11 +337,11 @@ def SVD(stream_list): for tr in st: stachans.append(tr.stats.station+'.'+tr.stats.channel) stachans = list(set(stachans)) - print(stachans) + stachans.sort() # Initialize a list for the output matrices, one matrix per-channel - SValues = [] - SVectors = [] - Uvectors = [] + svalues = [] + svectors = [] + uvectors = [] for stachan in stachans: lengths = [] for st in stream_list: @@ -313,7 +350,6 @@ def SVD(stream_list): if len(tr) > 0: tr = tr[0] else: - print(st) warnings.warn('Stream does not contain ' + stachan) continue lengths.append(len(tr.data)) @@ -337,12 +373,12 @@ def SVD(stream_list): warnings.warn('Matrix of traces is less than 2D for %s' % stachan) continue chan_mat = np.asarray(chan_mat) - U, s, V = np.linalg.svd(chan_mat, full_matrices=False) - SValues.append(s) - SVectors.append(V) - Uvectors.append(U) + u, s, v = np.linalg.svd(chan_mat, full_matrices=full) + svalues.append(s) + svectors.append(v) + uvectors.append(u) del(chan_mat) - return SVectors, SValues, Uvectors, stachans + return svectors, svalues, uvectors, stachans def empirical_SVD(stream_list, linear=True): @@ -422,10 +458,15 @@ def SVD_2_stream(SVectors, stachans, k, sampling_rate): for i in range(k): SVstream = [] for j, stachan in enumerate(stachans): - SVstream.append(Trace(SVectors[j][i], - header={'station': stachan.split('.')[0], - 'channel': stachan.split('.')[1], - 'sampling_rate': sampling_rate})) + if len(SVectors[j]) <= k: + warnings.warn('Too few traces at %s for a %02d ' % (stachan, k) + + 'dimensional subspace. Detector streams will' + + ' not include this stachan.') + else: + SVstream.append(Trace(SVectors[j][i], + header={'station': stachan.split('.')[0], + 'channel': stachan.split('.')[1], + 'sampling_rate': sampling_rate})) SVstreams.append(Stream(SVstream)) return SVstreams @@ -473,8 +514,9 @@ def corr_cluster(trace_list, thresh=0.9): return output -def extract_detections(detections, templates, contbase_list, extract_len=90.0, - outdir=None, extract_Z=True, additional_stations=[]): +def extract_detections(detections, templates, archive, arc_type, + extract_len=90.0, outdir=None, extract_Z=True, + additional_stations=[]): """ Extract waveforms associated with detections @@ -483,22 +525,20 @@ def extract_detections(detections, templates, contbase_list, extract_len=90.0, They will also be saved if outdir is set. The default is unset. The \ default extract_len is 90 seconds per channel. - :type detections: list tuple of of :class: datetime.datetime, string - :param detections: List of datetime objects, and their associated \ - template name. - :type templates: list of tuple of string and :class: obspy.Stream - :param templates: A list of the tuples of the template name and the \ + :type detections: list + :param detections: List of eqcorrscan.core.match_filter.DETECTION objects. + :type templates: list + :param templates: A list of tuples of the template name and the \ template Stream used to detect detections. - :type contbase_list: list of tuple of string - :param contbase_list: List of tuples of the form \ - ['path', 'type', 'network'] Where path is the path to the continuous \ - database, type is the directory structure, which can be either \ - Yyyyy/Rjjj.01, which is the standard IRIS Year, julian day structure, \ - or, yyyymmdd which is a single directory for every day. + :type archive: str + :param archive: Either name of archive or path to continuous data, see \ + eqcorrscan.utils.archive_read for details + :type arc_type: str + :param arc_type: Type of archive, either seishub, FDSN, day_vols :type extract_len: float :param extract_len: Length to extract around the detection (will be \ equally cut around the detection time) in seconds. Default is 90.0. - :type outdir: bool or str + :type outdir: str :param outdir: Default is None, with None set, no files will be saved, \ if set each detection will be saved into this directory with files \ named according to the detection time, NOT than the waveform \ @@ -507,32 +547,66 @@ def extract_detections(detections, templates, contbase_list, extract_len=90.0, :param extract_Z: Set to True to also extract Z channels for detections \ delays will be the same as horizontal channels, only applies if \ only horizontal channels were used in the template. - :type additional_stations: list of tuple - :param additional_stations: List of stations, chanels and networks to \ - also extract data for using an average delay. - - :returns: list of :class: obspy.Stream + :type additional_stations: list + :param additional_stations: List of tuples of (station, channel, network) \ + to also extract data for using an average delay. + + :returns: list of streams + :rtype: obspy.core.stream.Stream + + .. rubric: Example + + >>> from eqcorrscan.utils.clustering import extract_detections + >>> from eqcorrscan.core.match_filter import DETECTION + >>> from obspy import read, UTCDateTime + >>> import os + >>> # Use some dummy detections, you would use real one + >>> detections = [DETECTION('temp1', UTCDateTime(2012, 3, 26, 9, 15), 2, + ... ['WHYM', 'EORO'], 2, 1.2, 'corr'), + ... DETECTION('temp2',UTCDateTime(2012, 3, 26, 18, 5), 2, + ... ['WHYM', 'EORO'], 2, 1.2, 'corr')] + >>> path_to_templates = os.path.join('eqcorrscan', 'tests', 'test_data') + >>> archive = os.path.join(path_to_templates, 'day_vols') + >>> template_files = [os.path.join(path_to_templates, 'temp1.ms'), + ... os.path.join(path_to_templates, 'temp2.ms')] + >>> templates = [('temp' + str(i), read(filename)) + ... for i, filename in enumerate(template_files)] + >>> extracted = extract_detections(detections, templates, + ... archive=archive, arc_type='day_vols') + Working on detections for day: 2012-03-26T00:00:00.000000Z + Cutting for detections at: 2012/03/26 09:15:00 + Cutting for detections at: 2012/03/26 18:05:00 + >>> print(extracted[0].sort()) + 2 Trace(s) in Stream: + AF.EORO..SHZ | 2012-03-26T09:14:15.000000Z - 2012-03-26T09:15:45.000000Z | 1.0 Hz, 91 samples + AF.WHYM..SHZ | 2012-03-26T09:14:15.000000Z - 2012-03-26T09:15:45.000000Z | 1.0 Hz, 91 samples + >>> print(extracted[1].sort()) + 2 Trace(s) in Stream: + AF.EORO..SHZ | 2012-03-26T18:04:15.000000Z - 2012-03-26T18:05:45.000000Z | 1.0 Hz, 91 samples + AF.WHYM..SHZ | 2012-03-26T18:04:15.000000Z - 2012-03-26T18:05:45.000000Z | 1.0 Hz, 91 samples """ - from obspy import read from obspy import UTCDateTime import os - # Sort the template according to starttimes, needed so that stachan[i] + from eqcorrscan.utils.archive_read import read_data + # Sort the template according to start-times, needed so that stachan[i] # corresponds to delays[i] all_delays = [] # List of tuples of template name, delays all_stachans = [] for template in templates: templatestream = template[1].sort(['starttime']) - stachans = [(tr.stats.station, tr.stats.channel, tr.stats.network) + stachans = [(tr.stats.station, tr.stats.channel) for tr in templatestream] mintime = templatestream[0].stats.starttime delays = [tr.stats.starttime - mintime for tr in templatestream] all_delays.append((template[0], delays)) all_stachans.append((template[0], stachans)) # Sort the detections and group by day - detections.sort() - detection_days = [detection[0].date() for detection in detections] + detections.sort(key=lambda d: d.detect_time) + detection_days = [detection.detect_time.date + for detection in detections] detection_days = list(set(detection_days)) detection_days.sort() + detection_days = [UTCDateTime(d) for d in detection_days] # Initialize output list detection_wavefiles = [] @@ -549,8 +623,7 @@ def extract_detections(detections, templates, contbase_list, extract_len=90.0, j = 0 for i, stachan in enumerate(stachans): if j == 1: - new_stachans.append((stachan[0], stachan[1][0]+'Z', - stachan[2])) + new_stachans.append((stachan[0], stachan[1][0]+'Z')) new_delays.append(delays[i]) new_stachans.append(stachan) new_delays.append(delays[i]) @@ -578,52 +651,33 @@ def extract_detections(detections, templates, contbase_list, extract_len=90.0, print('Working on detections for day: ' + str(detection_day)) stachans = list(set([stachans[1] for stachans in all_stachans][0])) # List of all unique stachans - read in all data - for stachan in stachans: - print('Extracting data for ' + '.'.join(stachan)) - contbase = [base for base in contbase_list - if base[2] == stachan[2]][0] - if contbase[1] == 'yyyymmdd': - dayfile = detection_day.strftime('%Y%m%d') + '/*' +\ - stachan[0] + '.' + stachan[1][0] + '?' + stachan[1][-1] +\ - '.*' - elif contbase[1] == 'Yyyyy/Rjjj.01': - dayfile = detection_day.strftime('Y%Y/R%j.01')+'/'+stachan[0] +\ - '.*.'+stachan[1][0]+'?'+stachan[1][-1]+'.' +\ - detection_day.strftime('%Y.%j') - if 'st' not in locals(): - try: - st = read(contbase[0]+'/'+dayfile) - except: - print('No data for '+contbase[0]+'/'+dayfile) - else: - try: - st += read(contbase[0]+'/'+dayfile) - except: - print('No data for '+contbase[0]+'/'+dayfile) + st = read_data(archive=archive, arc_type=arc_type, day=detection_day, + stachans=stachans) st.merge(fill_value='interpolate') day_detections = [detection for detection in detections - if detection[0].date() == detection_day] + if UTCDateTime(detection.detect_time.date) == + detection_day] del stachans, delays for detection in day_detections: - template = detection[1] - t_stachans = [stachans[1] for stachans in all_stachans - if stachans[0] == template][0] - t_delays = [delays[1] for delays in all_delays - if delays[0] == template][0] + template = [t[1] for t in templates + if t[0] == detection.template_name] print('Cutting for detections at: ' + - detection[0].strftime('%Y/%m/%d %H:%M:%S')) + detection.detect_time.strftime('%Y/%m/%d %H:%M:%S')) detect_wav = st.copy() for tr in detect_wav: - tr.trim(starttime=UTCDateTime(detection[0]) - extract_len / 2, - endtime=UTCDateTime(detection[0]) + extract_len / 2) + tr.trim(starttime=UTCDateTime(detection.detect_time) - + extract_len / 2, + endtime=UTCDateTime(detection.detect_time) + + extract_len / 2) if outdir: if not os.path.isdir(outdir+'/'+template): os.makedirs(outdir+'/'+template) detect_wav.write(outdir+'/'+template+'/' + - detection[0].strftime('%Y-%m-%d_%H-%M-%S') + + detection.detect_time. + strftime('%Y-%m-%d_%H-%M-%S') + '.ms', format='MSEED', encoding='STEIM2') print('Written file: '+outdir+'/'+template+'/' + - detection[0].strftime('%Y-%m-%d_%H-%M-%S')+'.ms') + detection.detect_time.strftime('%Y-%m-%d_%H-%M-%S')+'.ms') if not outdir: detection_wavefiles.append(detect_wav) del detect_wav @@ -655,13 +709,21 @@ def dist_mat_km(catalog): # Calculate distance vector for each event for i, master in enumerate(catalog): mast_list = [] - master_tup = (master.preferred_origin().latitude, - master.preferred_origin().longitude, - master.preferred_origin().depth // 1000) + if master.preferred_origin(): + master_ori = master.preferred_origin() + else: + master_ori = master.origins[0] + master_tup = (master_ori.latitude, + master_ori.longitude, + master_ori.depth // 1000) for slave in catalog: - slave_tup = (slave.preferred_origin().latitude, - slave.preferred_origin().longitude, - slave.preferred_origin().depth // 1000) + if master.preferred_origin(): + slave_ori = slave.preferred_origin() + else: + slave_ori = slave.origins[0] + slave_tup = (slave_ori.latitude, + slave_ori.longitude, + slave_ori.depth // 1000) mast_list.append(dist_calc(master_tup, slave_tup)) # Sort the list into the dist_mat structure for j in range(i, len(catalog)): @@ -679,7 +741,7 @@ def space_cluster(catalog, d_thresh, show=True): Will compute the\ matrix of physical distances between events and utilize the\ - scipy.clusering.hierarchy module to perform the clustering. + scipy.clustering.hierarchy module to perform the clustering. :type catalog: obspy.Catalog :param catalog: Catalog of events to clustered @@ -777,22 +839,33 @@ def re_thresh_csv(path, old_thresh, new_thresh, chan_thresh): :param chan_thresh: Minimum number of channels for a detection :returns: List of detections + + .. rubric:: Example + + >>> from eqcorrscan.utils.clustering import re_thresh_csv + >>> import os + >>> det_file = os.path.join('eqcorrscan', 'tests', 'test_data', + ... 'expected_tutorial_detections.txt') + >>> detections = re_thresh_csv(path=det_file, old_thresh=8, new_thresh=10, + ... chan_thresh=3) + Read in 22 detections + Left with 17 detections """ - f = open(path, 'r') + from eqcorrscan.core.match_filter import read_detections + old_detections = read_detections(path) old_thresh = float(old_thresh) new_thresh = float(new_thresh) # Be nice, ensure that the thresholds are float detections = [] detections_in = 0 detections_out = 0 - for line in f: - if not line.split(', ')[0] == 'template' and len(line) > 2: - detections_in += 1 - if abs(float(line.split(', ')[3])) >=\ - (new_thresh / old_thresh) * float(line.split(', ')[2]) and\ - int(line.split(', ')[4]) >= chan_thresh: - detections_out += 1 - detections.append(line.split(', ')) + for detection in old_detections: + detections_in += 1 + if abs(detection.detect_val) >=\ + (new_thresh / old_thresh) * detection.threshold and\ + detection.no_chans >= chan_thresh: + detections_out += 1 + detections.append(detection) print('Read in '+str(detections_in)+' detections') print('Left with '+str(detections_out)+' detections') return detections diff --git a/eqcorrscan/utils/despike.py b/eqcorrscan/utils/despike.py index 5776b1e41..7be54a321 100644 --- a/eqcorrscan/utils/despike.py +++ b/eqcorrscan/utils/despike.py @@ -206,4 +206,4 @@ def template_remove(tr, template, cc_thresh, windowlength, if __name__ == '__main__': import doctest - doctest.modtest() + doctest.testmod() diff --git a/eqcorrscan/utils/findpeaks.py b/eqcorrscan/utils/findpeaks.py index 895e01037..c67002fd0 100644 --- a/eqcorrscan/utils/findpeaks.py +++ b/eqcorrscan/utils/findpeaks.py @@ -34,154 +34,19 @@ def is_prime(number): """ import random ''' if number != 1 ''' - if (number > 1): + if number > 1: ''' repeat the test few times ''' for time in range(3): ''' Draw a RANDOM number in range of number ( Z_number ) ''' - randomNumber = random.randint(2, number)-1 + randomNumber = random.randint(2, number - 1) ''' Test if a^(n-1) = 1 mod n ''' - if (pow(randomNumber, number-1, number) != 1): + if pow(randomNumber, number-1, number) != 1: return False return True else: ''' case number == 1 ''' return False -# Note this function doesn't give the expected results. -# def find_peaks2(arr, thresh, trig_int, debug=0, maxwidth=10, -# starttime=False, samp_rate=1.0): -# r"""Function to determine peaks in an array of data using scipy \ -# find_peaks_cwt, works fast in certain cases, but for match_filter cccsum \ -# peak finding, find_peaks2_short works better. Test it out and see which \ -# works best for your application. -# -# :type arr: ndarray -# :param arr: 1-D numpy array is required -# :type thresh: float -# :param thresh: The threshold below which will be considered noise and \ -# peaks will not be found in. -# :type trig_int: int -# :param trig_int: The minimum difference in samples between triggers, \ -# if multiple peaks within this window this code will find the highest. -# :type debug: int -# :param debug: Optional, debug level 0-5 -# :type maxwidth: int -# :param maxwidth: Maximum peak width to look for in samples -# :type starttime: osbpy.UTCDateTime -# :param starttime: Starttime for plotting, only used if debug > 2. -# :type samp_rate: float -# :param samp_rate: Sampling rate in Hz, only used for plotting if debug > 2. -# -# :return: peaks: Lists of tuples of peak values and locations. -# -# >>> import numpy as np -# >>> arr = np.random.randn(100) -# >>> threshold = arr.max() -# >>> arr[40] = 20 -# >>> arr[60] = 100 -# >>> find_peaks2(arr, threshold, 3) -# Finding peaks -# sorting peaks -# [(20.0, 40), (100.0, 60)] -# """ -# from scipy.signal import find_peaks_cwt -# import numpy as np -# from obspy import UTCDateTime -# if not starttime: -# starttime = UTCDateTime(0) -# # Set everything below the threshold to zero -# image = np.copy(arr) -# image = np.abs(image) -# image[image < thresh] = thresh -# # We need to check if the number of samples in the image is prime, if it -# # is this method will be really slow, so we add a pad to the end to make -# # it not of prime length! -# if is_prime(len(image)): -# image = np.append(image, 0.0) -# print('Input array has a prime number of samples, appending a zero') -# print(len(image)) -# if len(image[image > thresh]) == 0: -# print('No values over threshold found') -# return [] -# if debug > 0: -# msg = ' '.join(['Found', str(len(image[image > thresh])), -# 'samples above the threshold']) -# print(msg) -# initial_peaks = [] -# peaks = [] -# # Find the peaks -# print('Finding peaks') -# peakinds = find_peaks_cwt(image, np.arange(1, maxwidth)) -# initial_peaks = [(image[peakind], peakind) for peakind in peakinds] -# # Sort initial peaks according to amplitude -# print('sorting peaks') -# peaks_sort = sorted(initial_peaks, key=lambda amplitude: amplitude[0], -# reverse=True) -# if debug >= 4: -# for peak in initial_peaks: -# print(peak) -# if initial_peaks: -# peaks.append(peaks_sort[0]) # Definitely take the biggest peak -# if debug > 3: -# msg = ' '.join(['Added the biggest peak of', str(peaks[0][0]), -# 'at sample', str(peaks[0][1])]) -# print(msg) -# if len(initial_peaks) > 1: -# if debug > 3: -# msg = ' '.join(['Multiple peaks found, checking them', -# 'now to see if they overlap']) -# print(msg) -# for next_peak in peaks_sort: -# # i in range(1,len(peaks_sort)): -# # Loop through the amplitude sorted peaks -# # if the next highest amplitude peak is within trig_int of any -# # peak already in peaks then we don't want it, else, add it -# # next_peak = peaks_sort[i] -# if debug > 3: -# print(next_peak) -# for peak in peaks: -# add = False -# # Use add as a switch for whether or not to append -# # next peak to peaks, if once gone through all the peaks -# # it is True, then we will add it, otherwise we won't! -# if abs(next_peak[1] - peak[1]) < trig_int: -# if debug > 3: -# msg = ' '.join(['Difference in time is', -# str(next_peak[1] - peak[1]), '\n' -# 'Which is less than', -# str(trig_int)]) -# print(msg) -# add = False -# # Need to exit the loop here if false -# break -# else: -# add = True -# if add: -# if debug > 3: -# msg = ' '.join(['Adding peak of', str(next_peak[0]), -# 'at sample', str(next_peak[1])]) -# print(msg) -# peaks.append(next_peak) -# elif debug > 3: -# msg = ' '.join(['I did not add peak of', -# str(next_peak[0]), 'at sample', -# str(next_peak[1])]) -# print(msg) -# -# if debug >= 3: -# from eqcorrscan.utils import plotting -# _fname = ''.join(['peaks_', -# starttime.datetime.strftime('%Y-%m-%d'), -# '.pdf']) -# print(' '.join(['Saving plot to', _fname])) -# plotting.peaks_plot(image, starttime, samp_rate, True, -# peaks, _fname) -# peaks = sorted(peaks, key=lambda time: time[1], reverse=False) -# return peaks -# else: -# print('No peaks for you!') -# return peaks - def find_peaks2_short(arr, thresh, trig_int, debug=0, starttime=False, samp_rate=1.0): @@ -267,7 +132,6 @@ def find_peaks2_short(arr, thresh, trig_int, debug=0, starttime=False, if debug > 3: print(next_peak) for peak in peaks: - add = False # Use add as a switch for whether or not to append # next peak to peaks, if once gone through all the peaks # it is True, then we will add it, otherwise we won't! @@ -392,6 +256,78 @@ def find_peaks_dep(arr, thresh, trig_int, debug=0, starttime=False, return peaks +def coin_trig(peaks, stachans, samp_rate, moveout, min_trig, trig_int): + """ + Find network coincidence triggers within peaks of detection statistics. + + :type peaks: list + :param peaks: List of lists of tuples of (peak, index) for each \ + station-channel. Index should be in samples. + :type stachans: list + :param stachans: List of tuples of (station, channel) in the order of \ + peaks. + :type samp_rate: float + :param samp_rate: Sampling rate in Hz + :type moveout: float + :param moveout: Allowable network moveout in seconds. + :type min_trig: int + :param min_trig: Minimum station-channels required to declare a trigger. + :type trig_int: float + :param trig_int: Minimum allowable time between network triggers in seconds. + + :return: List of tuples of (peak, index), for the earliest detected station. + :rtype: list + + .. rubric:: Example + + >>> peaks = [[(0.5, 100), (0.3, 800)], [(0.4, 120), (0.7, 850)]] + >>> triggers = coin_trig(peaks, [('a', 'Z'), ('b', 'Z')], 10, 3, 2, 1) + >>> print(triggers) + [(0.45, 100)] + """ + triggers = [] + for stachan, _peaks in zip(stachans, peaks): + for peak in _peaks: + trigger = (peak[1], peak[0], '.'.join(stachan)) + triggers.append(trigger) + coincidence_triggers = [] + for i, master in enumerate(triggers): + slaves = triggers[i+1:] + coincidence = 1 + trig_time = master[0] + trig_val = master[1] + for slave in slaves: + if abs(slave[0] - master[0]) <= (moveout * samp_rate) and \ + slave[2] != master[2]: + coincidence += 1 + if slave[0] < master[0]: + trig_time = slave[0] + trig_val += slave[1] + if coincidence >= min_trig: + coincidence_triggers.append((trig_val / coincidence, + trig_time)) + # Sort by trigger-value, largest to smallest - remove duplicate detections + if coincidence_triggers: + coincidence_triggers.sort(key=lambda tup: tup[0], reverse=True) + output = [coincidence_triggers[0]] + for coincidence_trigger in coincidence_triggers[1:]: + add = True + for peak in output: + # If the event occurs within the trig_int time then do not add + # it, and break out of the inner loop. + if abs(coincidence_trigger[1] - peak[1]) < (trig_int * + samp_rate): + add = False + break + if add: + output.append((coincidence_trigger[0], + coincidence_trigger[1])) + output.sort(key=lambda tup: tup[1]) + return output + else: + return [] + + if __name__ == "__main__": import doctest doctest.testmod() diff --git a/eqcorrscan/utils/parameters.py b/eqcorrscan/utils/parameters.py index 312df4470..c4a53e5b6 100644 --- a/eqcorrscan/utils/parameters.py +++ b/eqcorrscan/utils/parameters.py @@ -143,8 +143,6 @@ def read_parameters(infile='../parameters/EQcorrscan_parameters.txt'): :returns: parameters as EQcorrscanParameters """ - import glob - from obspy import UTCDateTime try: import ConfigParser except ImportError: @@ -194,3 +192,8 @@ def read_parameters(infile='../parameters/EQcorrscan_parameters.txt'): ) return parameters + + +if __name__ == '__main__': + import doctest + doctest.testmod() \ No newline at end of file diff --git a/eqcorrscan/utils/picker.py b/eqcorrscan/utils/picker.py index dc805bd28..d81a6cc4a 100644 --- a/eqcorrscan/utils/picker.py +++ b/eqcorrscan/utils/picker.py @@ -16,72 +16,72 @@ from __future__ import unicode_literals -def synth_compare(stream, stream_list, cores=4, debug=0): - """ - Find best matching template or earthquake for a given stream. - Compare a specific stream to a list of synthetic templates, or \ - earthquakes of known source and find the best matching event. - - This can be used to assign the event to a family, which has a known \ - location. - - :type stream: :class: obspy.Stream - :param stream: Stream to be compared to streams with known locations. - :type stream_list: list - :param stream_list: List of streams with known locations - :type cores: int - :param cores: Number of cores to parallel over - :type debug: int - :param debug: Debug level, high is more debug - - :returns: int, float: index of best match and cross-correlation sum - :rtype: tuple - """ - from eqcorrscan.core.match_filter import _channel_loop - import numpy as np - import copy - from obspy import Trace - - stream_copy = stream.copy() - templates = copy.deepcopy(stream_list) - # Need to fill the stream_list - template - channels - template_stachan = [] - for template in templates: - for tr in template: - template_stachan += [(tr.stats.station, tr.stats.channel)] - template_stachan = list(set(template_stachan)) - - for stachan in template_stachan: - if not stream_copy.select(station=stachan[0], channel=stachan[1]): - # Remove template traces rather than adding NaN data - for template in templates: - if template.select(station=stachan[0], channel=stachan[1]): - for tr in template.select(station=stachan[0], - channel=stachan[1]): - template.remove(tr) - # Remove un-needed channels - for tr in stream_copy: - if not (tr.stats.station, tr.stats.channel) in template_stachan: - stream_copy.remove(tr) - # Also pad out templates to have all channels - for template in templates: - for stachan in template_stachan: - if not template.select(station=stachan[0], channel=stachan[1]): - nulltrace = Trace() - nulltrace.stats.station = stachan[0] - nulltrace.stats.channel = stachan[1] - nulltrace.stats.sampling_rate = template[0].stats.sampling_rate - nulltrace.stats.starttime = template[0].stats.starttime - nulltrace.data = np.array([np.NaN] * len(template[0].data), - dtype=np.float32) - template += nulltrace - # Hand off cross-correaltion to _channel_loop, which runs in parallel - [cccsums, no_chans] = _channel_loop(templates, stream_copy, cores, debug) - cccsums = [np.max(cccsum) for cccsum in cccsums] - # Find the maximum cccsum and index thereof - index = np.argmax(cccsums) - cccsum = cccsums[index] - return index, cccsum +# def synth_compare(stream, stream_list, cores=4, debug=0): +# """ +# Find best matching template or earthquake for a given stream. +# Compare a specific stream to a list of synthetic templates, or \ +# earthquakes of known source and find the best matching event. +# +# This can be used to assign the event to a family, which has a known \ +# location. +# +# :type stream: :class: obspy.Stream +# :param stream: Stream to be compared to streams with known locations. +# :type stream_list: list +# :param stream_list: List of streams with known locations +# :type cores: int +# :param cores: Number of cores to parallel over +# :type debug: int +# :param debug: Debug level, high is more debug +# +# :returns: int, float: index of best match and cross-correlation sum +# :rtype: tuple +# """ +# from eqcorrscan.core.match_filter import _channel_loop +# import numpy as np +# import copy +# from obspy import Trace +# +# stream_copy = stream.copy() +# templates = copy.deepcopy(stream_list) +# # Need to fill the stream_list - template - channels +# template_stachan = [] +# for template in templates: +# for tr in template: +# template_stachan += [(tr.stats.station, tr.stats.channel)] +# template_stachan = list(set(template_stachan)) +# +# for stachan in template_stachan: +# if not stream_copy.select(station=stachan[0], channel=stachan[1]): +# # Remove template traces rather than adding NaN data +# for template in templates: +# if template.select(station=stachan[0], channel=stachan[1]): +# for tr in template.select(station=stachan[0], +# channel=stachan[1]): +# template.remove(tr) +# # Remove un-needed channels +# for tr in stream_copy: +# if not (tr.stats.station, tr.stats.channel) in template_stachan: +# stream_copy.remove(tr) +# # Also pad out templates to have all channels +# for template in templates: +# for stachan in template_stachan: +# if not template.select(station=stachan[0], channel=stachan[1]): +# nulltrace = Trace() +# nulltrace.stats.station = stachan[0] +# nulltrace.stats.channel = stachan[1] +# nulltrace.stats.sampling_rate = template[0].stats.sampling_rate +# nulltrace.stats.starttime = template[0].stats.starttime +# nulltrace.data = np.array([np.NaN] * len(template[0].data), +# dtype=np.float32) +# template += nulltrace +# # Hand off cross-correaltion to _channel_loop, which runs in parallel +# [cccsums, no_chans] = _channel_loop(templates, stream_copy, cores, debug) +# cccsums = [np.max(cccsum) for cccsum in cccsums] +# # Find the maximum cccsum and index thereof +# index = np.argmax(cccsums) +# cccsum = cccsums[index] +# return index, cccsum def cross_net(stream, env=False, debug=0, master=False): diff --git a/eqcorrscan/utils/plotting.py b/eqcorrscan/utils/plotting.py index b66adbdc2..84d60dce1 100644 --- a/eqcorrscan/utils/plotting.py +++ b/eqcorrscan/utils/plotting.py @@ -14,6 +14,7 @@ from __future__ import unicode_literals import numpy as np import matplotlib.pylab as plt +import warnings def _check_save_args(save, savefile): @@ -75,6 +76,48 @@ def chunk_data(tr, samp_rate, state='mean'): return trout +def xcorr_plot(template, image, shift=None, cc=None, cc_vec=None, save=False, + savefile=None): + """ + Plot a template overlying an image aligned by correlation. + + :type template: numpy.ndarray + :param template: Short template image + :type image: numpy.ndarray + :param image: Long master image + :type shift: int + :param shift: Shift to apply to template relative to image, in samples + :type cc: float + :param cc: Cross-correlation at shift + :type cc_vec: numpy.ndarray + :param cc_vec: Cross-correlation vector. + + .. rubric:: Example + + >>> from obspy import read + >>> from eqcorrscan.utils.plotting import xcorr_plot + >>> from eqcorrscan.utils.stacking import align_traces + >>> st = read().detrend('simple').filter('bandpass', freqmin=2, freqmax=15) + >>> shifts, ccs = align_traces([st[0], st[1]], 40) + >>> xcorr_plot(template=st[1].data, image=st[0].data, shift=shifts[1], + ... cc=ccs[1]) # doctest: +SKIP + + .. image:: ../../plots/xcorr_plot.png + """ + _check_save_args(save, savefile) + if not cc or not shift: + if not cc_vec: + raise IOError('Must provide either cc_vec, or cc and shift') + shift = np.abs(cc_vec).argmax() + cc = cc_vec[shift] + x = np.arange(len(image)) + plt.plot(x, image / abs(image).max(), 'k', lw=1.3, label='Image') + x = np.arange(len(template)) + shift + plt.plot(x, template / abs(template).max(), 'r', lw=1.1, label='Template') + plt.title('Shift=%s, Correlation=%s' % (shift, cc)) + plt.show() + + def triple_plot(cccsum, cccsum_hist, trace, threshold, save=False, savefile=None): r"""Plot a day-long seismogram, correlogram and histogram. @@ -94,6 +137,23 @@ def triple_plot(cccsum, cccsum_hist, trace, threshold, save=False, :param savefile: Path to save figure to, only required if save=True :returns: matplotlib.figure + + .. rubric:: Example + + >>> from obspy import read + >>> from eqcorrscan.core.match_filter import normxcorr2 + >>> from eqcorrscan.utils.plotting import triple_plot + >>> st = read() + >>> template = st[0].copy().trim(st[0].stats.starttime + 8, + ... st[0].stats.starttime + 12) + >>> tr = st[0] + >>> ccc = normxcorr2(template=template.data, image=tr.data) + >>> tr.data = tr.data[0:len(ccc[0])] + >>> triple_plot(cccsum=ccc, cccsum_hist=ccc, trace=tr, + ... threshold=0.8) # doctest: +SKIP + + + .. image:: ../../plots/triple_plot.png """ _check_save_args(save, savefile) if len(cccsum) != len(trace.data): @@ -211,19 +271,26 @@ def peaks_plot(data, starttime, samp_rate, save=False, peaks=[(0, 0)], return fig -def cumulative_detections(dates, template_names, show=True, plot_legend=True, +def cumulative_detections(dates=None, template_names=None, detections=None, + plot_grouped=False, show=True, plot_legend=True, save=False, savefile=None): r"""Plot cumulative detections in time. Simple plotting function to take a list of datetime objects and plot \ a cumulative detections list. Can take dates as a list of lists and will \ - plot each list seperately, e.g. if you have dates from more than one \ + plot each list separately, e.g. if you have dates from more than one \ template it will overlay them in different colours. :type dates: list :param dates: Must be a list of lists of datetime.datetime objects :type template_names: list :param template_names: List of the template names in order of the dates + :type detections: list + :param detections: List of eqcorrscan.core.match_filter.DETECTION + :type plot_grouped: bool + :param plot_grouped: Plot detections for each template individually, or \ + group them all together - set to False (plot template detections \ + individually) by default. :type show: bool :param show: Whether or not to show the plot, defaults to True. :type plot_legend: bool @@ -236,6 +303,9 @@ def cumulative_detections(dates, template_names, show=True, plot_legend=True, :returns: :class: matplotlib.figure + .. note:: Can either take lists of DETECTION objects directly, or two \ + lists of dates and template names - either/or, not both. + .. rubric:: Example >>> import datetime as dt @@ -261,14 +331,40 @@ def cumulative_detections(dates, template_names, show=True, plot_legend=True, """ import matplotlib.dates as mdates from copy import deepcopy + from eqcorrscan.core.match_filter import DETECTION _check_save_args(save, savefile) # Set up a default series of parameters for lines colors = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'firebrick', 'purple', 'darkgoldenrod', 'gray'] linestyles = ['-', '-.', '--', ':'] # Check that dates is a list of lists - if type(dates[0]) != list: - dates = [dates] + if not detections: + if type(dates[0]) != list: + dates = [dates] + else: + dates = [] + template_names = [] + for detection in detections: + if not type(detection) == DETECTION: + msg = 'detection not of type: ' +\ + 'eqcorrscan.core.match_filter.DETECTION' + raise IOError(msg) + dates.append(detection.detect_time.datetime) + template_names.append(detection.template_name) + _dates = [] + _template_names = [] + for template_name in list(set(template_names)): + _template_names.append(template_name) + _dates.append([date for i, date in enumerate(dates) + if template_names[i] == template_name]) + dates = _dates + template_names = _template_names + if plot_grouped: + _dates = [] + for template_dates in dates: + _dates += template_dates + dates = [_dates] + template_names = ['all'] i = 0 j = 0 # This is an ugly way of looping through colours and linestyles, it would @@ -279,7 +375,6 @@ def cumulative_detections(dates, template_names, show=True, plot_legend=True, template_dates.sort() plot_dates = deepcopy(template_dates) plot_dates.insert(0, min_date) - print(plot_dates) counts = np.arange(-1, len(template_dates)) ax1.step(plot_dates, counts, linestyles[j], color=colors[i], label=template_names[k], @@ -305,28 +400,25 @@ def cumulative_detections(dates, template_names, show=True, plot_legend=True, if min(date_list) < min_date: min_date = min(date_list) timedif = max_date - min_date - if timedif.total_seconds() >= 10800 and timedif.total_seconds() <= 25200: - print('Using quarter of an hour stamps') + if 10800 <= timedif.total_seconds() <= 25200: + hours = mdates.MinuteLocator(byminute=[0, 30]) + mins = mdates.MinuteLocator(byminute=range(0, 60, 10)) + elif 7200 <= timedif.total_seconds() < 10800: hours = mdates.MinuteLocator(byminute=[0, 15, 30, 45]) - mins = mdates.HourLocator(byminute=range(0, 60, 5)) + mins = mdates.MinuteLocator(byminute=range(0, 60, 5)) elif timedif.total_seconds() <= 1200: - print('Using 2 min stamps') hours = mdates.MinuteLocator(byminute=range(0, 60, 2)) - mins = mdates.HourLocator(byminute=range(0, 60, 0.5)) - elif timedif.total_seconds > 25200 and timedif.total_seconds() <= 86400: - print('Using hour stamps') + mins = mdates.MinuteLocator(byminute=range(0, 60, 0.5)) + elif 25200 < timedif.total_seconds() <= 86400: hours = mdates.HourLocator(byhour=range(0, 24, 3)) mins = mdates.HourLocator(byhour=range(0, 24, 1)) - elif timedif.total_seconds > 86400 and timedif.total_seconds() <= 172800: - print('Using hour stamps') + elif 86400 < timedif.total_seconds() <= 172800: hours = mdates.HourLocator(byhour=range(0, 24, 6)) mins = mdates.HourLocator(byhour=range(0, 24, 1)) elif timedif.total_seconds() > 172800: - print('Using day stamps') hours = mdates.AutoDateLocator() mins = mdates.HourLocator(byhour=range(0, 24, 3)) else: - print('Using 5 min stamps') hours = mdates.MinuteLocator(byminute=range(0, 60, 5)) hrFMT = mdates.DateFormatter('%Y/%m/%d %H:%M:%S') ax1.xaxis.set_major_locator(hours) @@ -361,6 +453,18 @@ def threeD_gridplot(nodes, save=False, savefile=None): :param savefile: required if save=True, path to save figure to. :returns: :class: matplotlib.figure + + .. rubric:: Example + + >>> from eqcorrscan.utils.plotting import threeD_gridplot + >>> nodes = [(-43.5, 170.4, 4), (-43.3, 170.8, 12), (-43.4, 170.3, 8)] + >>> threeD_gridplot(nodes=nodes) # doctest: +SKIP + + .. plot:: + + from eqcorrscan.utils.plotting import threeD_gridplot + nodes = [(-43.5, 170.4, 4), (-43.3, 170.8, 12), (-43.4, 170.3, 8)] + threeD_gridplot(nodes=nodes) """ _check_save_args(save, savefile) lats = [] @@ -391,7 +495,8 @@ def multi_event_singlechan(streams, catalog, station, channel, freqmin=False, freqmax=False, realign=False, cut=(-3.0, 5.0), PWS=False, title=False, save=False, savefile=None): - r"""Plot data from a single channel for multiple events. + """ + Plot data from a single channel for multiple events. Data will be aligned by their pick-time given in the \ picks. Requires an individual stream for each event you want to plot, @@ -438,33 +543,67 @@ def multi_event_singlechan(streams, catalog, station, channel, :returns: new picks :rtype: list :returns: matplotlib.figure + + .. rubric:: Example + + >>> from obspy import read, Catalog + >>> from eqcorrscan.utils.sfile_util import read_event, readwavename + >>> from eqcorrscan.utils.plotting import multi_event_singlechan + >>> import glob + >>> sfiles = glob.glob('eqcorrscan/tests/test_data/REA/TEST_/*') + >>> catalog = Catalog() + >>> streams = [] + >>> for sfile in sfiles: + ... catalog.append(read_event(sfile)) + ... wavfile = readwavename(sfile)[0] + ... stream_path = 'eqcorrscan/tests/test_data/WAV/TEST_/' + wavfile + ... stream = read(stream_path) + ... # Annoying coping with seisan 2 letter channels + ... for tr in stream: + ... tr.stats.channel = tr.stats.channel[0] + tr.stats.channel[-1] + ... streams.append(stream) + >>> multi_event_singlechan(streams=streams, catalog=catalog, + ... station='GCSZ', channel='EZ') # doctest: +SKIP + + .. image:: ../../plots/multi_event_singlechan.png """ _check_save_args(save, savefile) from eqcorrscan.utils import stacking import copy - from eqcorrscan.core.match_filter import normxcorr2 - from obspy import Stream + from obspy import Stream, Catalog import warnings - fig, axes = plt.subplots(len(catalog) + 1, 1, sharex=True, figsize=(7, 12)) - if len(catalog) > 1: - axes = axes.ravel() + # Work out how many picks we should have... + short_cat = Catalog() + short_streams = [] + for i, event in enumerate(catalog): + event_stachans = [(pick.waveform_id.station_code, + pick.waveform_id.channel_code) + for pick in event.picks] + if (station, channel) in event_stachans: + short_cat.append(event) + short_streams.append(streams[i]) + if len(short_cat) == 0: + raise IOError('No picks for ' + station + ' ' + channel) traces = [] al_traces = [] - # Keep input safe - clist = copy.deepcopy(catalog) - if isinstance(streams, Stream): - streams = [streams] - st_list = copy.deepcopy(streams) - for i, event in enumerate(clist): + if isinstance(short_streams, Stream): + short_streams = [short_streams] + st_list = copy.deepcopy(short_streams) + for i, event in enumerate(short_cat): # Extract the appropriate pick _pick = [pick for pick in event.picks if pick.waveform_id.station_code == station and - pick.waveform_id.channel_code == channel][0] + pick.waveform_id.channel_code == channel] + if len(_pick) == 0: + print('No pick for channel') + continue + else: + _pick = _pick[0] if st_list[i].select(station=station, channel=channel): tr = st_list[i].select(station=station, channel=channel)[0] else: - print('No data for ' + _pick.waveform_id) + print('No data for ' + _pick.waveform_id.station_code) continue tr.detrend('linear') if freqmin: @@ -502,11 +641,62 @@ def multi_event_singlechan(streams, catalog, station, channel, shifts = stacking.align_traces(al_traces, shift_len) for i in xrange(len(shifts)): print('Shifting by ' + str(shifts[i]) + ' seconds') - event.picks[0].time -= shifts[i] + _pick.time -= shifts[i] traces[i].trim(_pick.time - pre_pick, _pick.time + clip - pre_pick, nearest_sample=False) # We now have a list of traces + if PWS: + stack = 'PWS' + else: + stack = 'linstack' + fig = multi_trace_plot(traces=traces, corr=True, stack=stack) + if title: + fig.suptitle(title) + plt.subplots_adjust(hspace=0) + if not save: + plt.show() + else: + plt.savefig(savefile) + plt.close() + return traces, short_cat, fig + + +def multi_trace_plot(traces, corr=True, stack='linstack', size=(7, 12), + show=True, title=None): + """ + Plot multiple traces (usually from the same station) on the same plot. + + Differs somewhat to obspys stream.plot in that only relative time within \ + traces is worried about, it will not merge traces together. + + :type traces: list + :param traces: List of obspy.core.Trace + :type corr: bool + :param corr: To calculate the correlation or not, if True, will add this \ + to the axes + :type stack: str + :param stack: To plot the stack as the first trace or not, select type of \ + stack: 'linstack' or 'PWS', or None. + :type size: tuple + :param size: Size of figure. + :type show: bool + :param show: Whether to plot the figure to screen or not. + :type title: str + :param title: Title to plot + """ + from obspy import Stream + from eqcorrscan.utils import stacking + from eqcorrscan.core.match_filter import normxcorr2 + + if stack in ['linstack', 'PWS']: + fig, axes = plt.subplots(len(traces) + 1, 1, sharex=True, + figsize=size) + else: + fig, axes = plt.subplots(len(traces), 1, sharex=True, + figsize=size) + if len(traces) > 1: + axes = axes.ravel() traces = [(trace, trace.stats.starttime.datetime) for trace in traces] traces.sort(key=lambda tup: tup[1]) traces = [trace[0] for trace in traces] @@ -515,44 +705,52 @@ def multi_event_singlechan(streams, catalog, station, channel, y = tr.data x = np.arange(len(y)) x = x / tr.stats.sampling_rate # convert to seconds - axes[i + 1].plot(x, y, 'k', linewidth=1.1) - axes[i + 1].yaxis.set_ticks([]) + if not stack: + ind = i + else: + ind = i + 1 + axes[ind].plot(x, y, 'k', linewidth=1.1) + axes[ind].yaxis.set_ticks([]) traces = [Stream(trace) for trace in traces] - if PWS: + if stack == 'PWS': linstack = stacking.PWS_stack(traces) - else: + elif stack == 'linstack': linstack = stacking.linstack(traces) - tr = linstack.select(station=station, channel=channel)[0] - y = tr.data - x = np.arange(len(y)) - x = x / tr.stats.sampling_rate - axes[0].plot(x, y, 'r', linewidth=2.0) - axes[0].set_ylabel('Stack', rotation=0) - axes[0].yaxis.set_ticks([]) + if stack in ['linstack', 'PWS']: + tr = linstack[0] + y = tr.data + x = np.arange(len(y)) + x = x / tr.stats.sampling_rate + axes[0].plot(x, y, 'r', linewidth=2.0) + axes[0].set_ylabel('Stack', rotation=0) + axes[0].yaxis.set_ticks([]) for i, slave in enumerate(traces): - cc = normxcorr2(tr.data, slave[0].data) - axes[i + 1].set_ylabel('cc=' + str(round(np.max(cc), 2)), rotation=0) - axes[i + 1].text(0.9, 0.15, str(round(np.max(slave[0].data))), - bbox=dict(facecolor='white', alpha=0.95), - transform=axes[i + 1].transAxes) - axes[i + 1].text(0.7, 0.85, slave[0].stats.starttime.datetime. - strftime('%Y/%m/%d %H:%M:%S'), - bbox=dict(facecolor='white', alpha=0.95), - transform=axes[i + 1].transAxes) + if corr: + cc = normxcorr2(tr.data, slave[0].data) + if not stack: + ind = i + else: + ind = i + 1 + if corr: + axes[ind].set_ylabel('cc=' + str(round(np.max(cc), 2)), rotation=0) + axes[ind].text(0.9, 0.15, str(round(np.max(slave[0].data))), + bbox=dict(facecolor='white', alpha=0.95), + transform=axes[ind].transAxes) + axes[ind].text(0.7, 0.85, slave[0].stats.starttime.datetime. + strftime('%Y/%m/%d %H:%M:%S'), + bbox=dict(facecolor='white', alpha=0.95), + transform=axes[ind].transAxes) axes[-1].set_xlabel('Time (s)') if title: - axes[0].set_title(title) - plt.subplots_adjust(hspace=0) - if not save: + fig.suptitle(title) + if show: plt.show() - else: - plt.savefig(savefile) - plt.close() - return traces, clist, fig + return fig def detection_multiplot(stream, template, times, streamcolour='k', - templatecolour='r', save=False, savefile=None): + templatecolour='r', save=False, savefile=None, + size=(10.5, 7.5)): r"""Plot a stream of data with a template on top of it at detection times. :type stream: obspy.core.stream.Stream @@ -571,8 +769,15 @@ def detection_multiplot(stream, template, times, streamcolour='k', to screen. :type savefile: str :param savefile: Filename to save to, required for save=True + :type size: tuple + :param size: Figure size. :returns: :class: matplotlib.figure + + + .. image:: ../../plots/detection_multiplot.png + + """ _check_save_args(save, savefile) import datetime as dt @@ -580,17 +785,21 @@ def detection_multiplot(stream, template, times, streamcolour='k', # Sort before plotting template = template.sort() # Only take traces that match in both - template_stachans = [(tr.stats.station, tr.stats.channel) for tr in template] + template_stachans = [(tr.stats.station, tr.stats.channel) + for tr in template] stream = Stream([tr for tr in stream - if (tr.stats.station, tr.stats.channel) in template_stachans]) + if (tr.stats.station, + tr.stats.channel) in template_stachans]) ntraces = min(len(template), len(stream)) - print('Only plotting %s traces' % str(ntraces)) - fig, axes = plt.subplots(ntraces, 1, sharex=True) + fig, axes = plt.subplots(ntraces, 1, sharex=True, figsize=size) if len(template) > 1: axes = axes.ravel() mintime = min([tr.stats.starttime for tr in template]) - i = 0 - for template_tr in template: + for i, template_tr in enumerate(template): + if len(template) > 1: + axis = axes[i] + else: + axis = axes image = stream.select(station=template_tr.stats.station, channel='*'+template_tr.stats.channel[-1]) if not image: @@ -607,8 +816,8 @@ def detection_multiplot(stream, template, times, streamcolour='k', image_times = [image.stats.starttime.datetime + dt.timedelta((j * image.stats.delta) / 86400) for j in range(len(image.data))] - axes[i].plot(image_times, image.data / max(image.data), - streamcolour, linewidth=1.2) + axis.plot(image_times, image.data / max(image.data), + streamcolour, linewidth=1.2) for k, time in enumerate(times): lagged_time = UTCDateTime(time) + (template_tr.stats.starttime - mintime) @@ -617,17 +826,28 @@ def detection_multiplot(stream, template, times, streamcolour='k', dt.timedelta((j * template_tr.stats.delta) / 86400) for j in range(len(template_tr.data))] - axes[i].plot(template_times, - template_tr.data / max(template_tr.data), - templatecolour, linewidth=1.2) + # Normalize the template according to the data detected in + normalizer = max(image.data[int((template_times[0] - + image_times[0]).total_seconds() / + image.stats.delta): + int((template_times[-1] - + image_times[0]).total_seconds() / + image.stats.delta)] / + max(image.data)) + axis.plot(template_times, + template_tr.data * normalizer, + templatecolour, linewidth=1.2) ylab = '.'.join([template_tr.stats.station, template_tr.stats.channel]) - axes[i].set_ylabel(ylab, rotation=0, - horizontalalignment='right') - axes[i].yaxis.set_ticks([]) - i += 1 - axes[len(axes) - 1].set_xlabel('Time') + axis.set_ylabel(ylab, rotation=0, + horizontalalignment='right') + axis.yaxis.set_ticks([]) + if len(template) > 1: + axes[len(axes) - 1].set_xlabel('Time') + else: + axis.set_xlabel('Time') plt.subplots_adjust(hspace=0, left=0.175, right=0.95, bottom=0.07) + plt.xticks(rotation=10) if not save: plt.show() else: @@ -636,7 +856,7 @@ def detection_multiplot(stream, template, times, streamcolour='k', return fig -def interev_mag_sfiles(sfiles, save=False, savefile=None): +def interev_mag_sfiles(sfiles, save=False, savefile=None, size=(10.5, 7.5)): r"""Plot inter-event time versus magnitude for series of events. **thin** Wrapper for interev_mag. @@ -648,20 +868,55 @@ def interev_mag_sfiles(sfiles, save=False, savefile=None): to screen. :type savefile: str :param savefile: Filename to save to, required for save=True + :type size: tuple + :param size: Size of figure in inches. :returns: :class: matplotlib.figure + + .. rubric:: Example + + >>> import glob + >>> from eqcorrscan.utils.plotting import interev_mag_sfiles + >>> sfiles = glob.glob('eqcorrscan/tests/test_data/REA/TEST_/*') + >>> interev_mag_sfiles(sfiles=sfiles) # doctest: +SKIP + + .. plot:: + + import glob, os + from eqcorrscan.utils.plotting import interev_mag_sfiles + sfiles = glob.glob(os.path. + realpath('../../../tests/test_data/REA/TEST_') + + os.sep + '*') + print(sfiles) + interev_mag_sfiles(sfiles=sfiles) """ _check_save_args(save, savefile) from eqcorrscan.utils import sfile_util - times = [sfile_util.readheader(sfile)[0].origins[0].time - for sfile in sfiles] - mags = [sfile_util.readheader(sfile)[0].magnitudes[0].mag - for sfile in sfiles] - fig = interev_mag(times, mags, save, savefile) + times = [] + mags = [] + for sfile in sfiles: + head = sfile_util.readheader(sfile) + if head.preferred_origin(): + origin = head.preferred_origin() + elif len(head.origins) > 0: + origin = head.origins[0] + else: + origin = False + if head.preferred_magnitude(): + magnitude = head.preferred_magnitude() + elif len(head.magnitudes) > 0: + magnitude = head.magnitudes[0] + else: + magnitude = False + if origin and magnitude: + times.append(origin.time) + mags.append(magnitude.mag) + fig = interev_mag(times=times, mags=mags, save=save, savefile=savefile, + size=size) return fig -def interev_mag(times, mags, save=False, savefile=None): +def interev_mag(times, mags, save=False, savefile=None, size=(10.5, 7.5)): r"""Plot inter-event times against magnitude. :type times: list @@ -673,8 +928,10 @@ def interev_mag(times, mags, save=False, savefile=None): to screen. :type savefile: str :param savefile: Filename to save to, required for save=True + :type size: tuple + :param size: Size of figure in inches. - :returns: :class: matplotlib.figure + :returns: matplotlib.figure .. rubric:: Example @@ -708,7 +965,7 @@ def interev_mag(times, mags, save=False, savefile=None): times = [x[0] for x in l] mags = [x[1] for x in l] # Make two subplots next to each other of time before and time after - fig, axes = plt.subplots(1, 2, sharey=True) + fig, axes = plt.subplots(1, 2, sharey=True, figsize=size) axes = axes.ravel() pre_times = [] post_times = [] @@ -734,8 +991,10 @@ def interev_mag(times, mags, save=False, savefile=None): return fig -def obspy_3d_plot(inventory, catalog, save=False, savefile=None): - r"""Plot obspy.Inventory and obspy.Catalog classes in three dimensions. +def obspy_3d_plot(inventory, catalog, save=False, savefile=None, + size=(10.5, 7.5)): + """ + Plot obspy Inventory and obspy Catalog classes in three dimensions. :type inventory: obspy.core.inventory.inventory.Inventory :param inventory: Obspy inventory class containing station metadata @@ -746,27 +1005,67 @@ def obspy_3d_plot(inventory, catalog, save=False, savefile=None): to screen. :type savefile: str :param savefile: Filename to save to, required for save=True + :type size: tuple + :param size: Size of figure in inches. :returns: :class: matplotlib.figure + + .. rubric:: Example: + + >>> from obspy.clients.fdsn import Client + >>> from obspy import UTCDateTime + >>> from eqcorrscan.utils.plotting import obspy_3d_plot + >>> client = Client('IRIS') + >>> t1 = UTCDateTime(2012, 3, 26) + >>> t2 = t1 + 86400 + >>> catalog = client.get_events(starttime=t1, endtime=t2, latitude=-43, + ... longitude=170, maxradius=5) + >>> inventory = client.get_stations(starttime=t1, endtime=t2, latitude=-43, + ... longitude=170, maxradius=10) + >>> obspy_3d_plot(inventory=inventory, catalog=catalog) # doctest: +SKIP + + .. plot:: + + from obspy.clients.fdsn import Client + from obspy import UTCDateTime + from eqcorrscan.utils.plotting import obspy_3d_plot + client = Client('IRIS') + t1 = UTCDateTime(2012, 3, 26) + t2 = t1 + 86400 + catalog = client.get_events(starttime=t1, endtime=t2, latitude=-43, + longitude=170, maxradius=5) + inventory = client.get_stations(starttime=t1, endtime=t2, latitude=-43, + longitude=170, maxradius=10) + obspy_3d_plot(inventory=inventory, catalog=catalog) """ _check_save_args(save, savefile) - from eqcorrscan.utils.plotting import threeD_seismplot - nodes = [(ev.preferred_origin().latitude, - ev.preferred_origin().longitude, - ev.preferred_origin().depth / 1000) for ev in catalog] + nodes = [] + for ev in catalog: + nodes.append((ev.preferred_origin().latitude, + ev.preferred_origin().longitude, + ev.preferred_origin().depth / 1000)) # Will plot borehole instruments at elevation - depth if provided all_stas = [] for net in inventory: - stations = [(sta.latitude, sta.longitude, - sta.elevation / 1000 - sta.channels[0].depth / 1000) - for sta in net] - all_stas += stations - fig = threeD_seismplot(all_stas, nodes, save, savefile) + for sta in net: + if len(sta.channels) > 0: + all_stas.append((sta.latitude, sta.longitude, + sta.elevation / 1000 - + sta.channels[0].depth / 1000)) + else: + warnings.warn('No channel information attached, ' + 'setting elevation without depth') + all_stas.append((sta.latitude, sta.longitude, + sta.elevation / 1000)) + fig = threeD_seismplot(stations=all_stas, nodes=nodes, save=save, + savefile=savefile, size=size) return fig -def threeD_seismplot(stations, nodes, save=False, savefile=None): - r"""Plot seismicity and stations in a 3D, movable, zoomable space. +def threeD_seismplot(stations, nodes, save=False, savefile=None, + size=(10.5, 7.5)): + """ + Plot seismicity and stations in a 3D, movable, zoomable space. Uses matplotlibs Axes3D package. @@ -781,22 +1080,44 @@ def threeD_seismplot(stations, nodes, save=False, savefile=None): to screen. :type savefile: str :param savefile: Filename to save to, required for save=True + :type size: tuple + :param size: Size of figure in inches. :returns: :class: matplotlib.figure + + .. Note:: See obspy_3d_plot for example output. """ _check_save_args(save, savefile) stalats, stalongs, staelevs = zip(*stations) evlats, evlongs, evdepths = zip(*nodes) + # Cope with +/-180 latitudes... + _evlongs = [] + for evlong in evlongs: + if evlong < 0: + evlong = float(evlong) + evlong += 360 + _evlongs.append(evlong) + evlongs = _evlongs + _stalongs = [] + for stalong in stalongs: + if stalong < 0: + stalong = float(stalong) + stalong += 360 + _stalongs.append(stalong) + stalongs = _stalongs evdepths = [-1 * depth for depth in evdepths] - fig = plt.figure() + fig = plt.figure(figsize=size) ax = fig.add_subplot(111, projection='3d') - ax.scatter(evlats, evlongs, evdepths, marker="x", c="k") - ax.scatter(stalats, stalongs, staelevs, marker="v", c="r") - ax.set_ylabel("Latitude (deg)") - ax.set_xlabel("Longitude (deg)") - ax.set_zlabel("Depth(km)") + ax.scatter(evlats, evlongs, evdepths, marker="x", c="k", + label='Hypocenters') + ax.scatter(stalats, stalongs, staelevs, marker="v", c="r", + label='Stations') + ax.set_ylabel("Longitude (deg)") + ax.set_xlabel("Latitude (deg)") + ax.set_zlabel("Elevation (km)") ax.get_xaxis().get_major_formatter().set_scientific(False) ax.get_yaxis().get_major_formatter().set_scientific(False) + plt.legend() if not save: plt.show() else: @@ -883,7 +1204,7 @@ def pretty_template_plot(template, size=(10.5, 7.5), save=False, lines = [] labels = [] for i, tr in enumerate(template): - # Cope with a singe channel template case. + # Cope with a single channel template case. if len(template) > 1: axis = axes[i] else: @@ -925,7 +1246,10 @@ def pretty_template_plot(template, size=(10.5, 7.5), save=False, pick.waveform_id.channel_code[-1] == tr.stats.channel[0] + tr.stats.channel[-1]] for pick in tr_picks: - if 'P' in pick.phase_hint.upper(): + if not pick.phase_hint: + pcolor = 'k' + label = 'Unknown pick' + elif 'P' in pick.phase_hint.upper(): pcolor = 'red' label = 'P-pick' elif 'S' in pick.phase_hint.upper(): @@ -966,9 +1290,144 @@ def pretty_template_plot(template, size=(10.5, 7.5), save=False, return fig +def plot_repicked(template, picks, det_stream, size=(10.5, 7.5), save=False, + savefile=None, title=False): + """ + Plot a template over a detected stream, with picks corrected by lag-calc. + + :param template: Template used to make the detection, will be aligned \ + according to picks. + :type template: obspy.core.stream.Stream + :param picks: list of corrected picks. + :type picks: list + :param det_stream: Stream to plot in the background, should be the \ + detection, data should encompass the time the picks are made. + :type det_stream: obspy.core.stream.Stream + :param size: tuple of plot size. + :type size: tuple + :param save: To save figure or not, if false, will show to screen. + :type save: bool + :param savefile: File name to save file, required if save==True. + :type savefile: str + :param title: Title for plot, defaults to None. + :type title: str + + :return: Figure handle which can be edited. + :rtype: matplotlib.pyplot.figure + + .. note: Called by lag_calc, hence no example - can be called outside though + + .. image:: ../../plots/plot_repicked.png + """ + _check_save_args(save, savefile) + fig, axes = plt.subplots(len(template), 1, sharex=True, figsize=size) + if len(template) > 1: + axes = axes.ravel() + mintime = det_stream.sort(['starttime'])[0].stats.starttime + template.sort(['network', 'station', 'starttime']) + lengths = [] + lines = [] + labels = [] + n_templates_plotted = 0 + for i, tr in enumerate(template.sort(['starttime'])): + # Cope with a single channel template case. + if len(template) > 1: + axis = axes[i] + else: + axis = axes + tr_picks = [pick for pick in picks if + pick.waveform_id.station_code == tr.stats.station and + pick.waveform_id.channel_code[0] + + pick.waveform_id.channel_code[-1] == + tr.stats.channel[0] + tr.stats.channel[-1]] + if len(tr_picks) > 1: + msg = 'Multiple picks on channel %s' % tr.stats.station + ', ' + \ + tr.stats.channel + raise NotImplementedError(msg) + if len(tr_picks) == 0: + msg = 'No pick for chanel %s' % tr.stats.station + ', ' + \ + tr.stats.channel + print(msg) + else: + pick = tr_picks[0] + delay = pick.time - mintime + y = tr.data + # Normlise + y /= max(y) + x = np.linspace(0, (len(y) - 1) * tr.stats.delta, len(y)) + x += delay + btr = det_stream.select(station=tr.stats.station, + channel=tr.stats.channel)[0] + bdelay = btr.stats.starttime - mintime + by = btr.data + if len(tr_picks) > 0: + by /= max(by[int(delay):int(delay) + len(x)]) + else: + by /= max(by) + bx = np.linspace(0, (len(by) - 1) * btr.stats.delta, len(by)) + bx += bdelay + axis.plot(bx, by, 'k', linewidth=1.5) + if len(tr_picks) > 0: + template_line, = axis.plot(x, y, 'r', linewidth=1.6, + label='Template') + if not pick.phase_hint: + pcolor = 'k' + label = 'Unknown pick' + elif 'P' in pick.phase_hint.upper(): + pcolor = 'red' + label = 'P-pick' + elif 'S' in pick.phase_hint.upper(): + pcolor = 'blue' + label = 'S-pick' + else: + pcolor = 'k' + label = 'Unknown pick' + pdelay = pick.time - mintime + line = axis.axvline(x=pdelay, color=pcolor, linewidth=2, + linestyle='--', label=label) + if label not in labels: + lines.append(line) + labels.append(label) + if n_templates_plotted == 0: + lines.append(template_line) + labels.append('Template') + n_templates_plotted += 1 + lengths.append(max(bx[-1], x[-1])) + else: + lengths.append(bx[1]) + axis.set_ylabel('.'.join([tr.stats.station, tr.stats.channel]), + rotation=0, horizontalalignment='right') + axis.yaxis.set_ticks([]) + axis.set_xlim([0, max(lengths)]) + if len(template) > 1: + axis = axes[len(template) - 1] + else: + axis = axes + axis.set_xlabel('Time (s) from %s' % + mintime.datetime.strftime('%Y/%m/%d %H:%M:%S.%f')) + plt.figlegend(lines, labels, 'upper right') + if title: + if len(template) > 1: + axes[0].set_title(title) + else: + axes.set_title(title) + else: + plt.subplots_adjust(top=0.98) + plt.tight_layout() + plt.subplots_adjust(hspace=0) + if not save: + plt.show() + plt.close() + else: + plt.savefig(savefile) + plt.close() + return fig + + def NR_plot(stream, NR_stream, detections, false_detections=False, size=(18.5, 10), save=False, savefile=None, title=False): - r"""Plot Network response alongside the streams used. + """ + Plot Network response alongside the stream used. Highlights detection times in the network response. @@ -990,6 +1449,8 @@ def NR_plot(stream, NR_stream, detections, false_detections=False, :param title: String for the title of the plot, set to False :returns: :class: matplotlib.figure + + .. Note:: Called by bright_lights, not a general use plot (hence no example) """ _check_save_args(save, savefile) import datetime as dt @@ -1068,7 +1529,8 @@ def NR_plot(stream, NR_stream, detections, false_detections=False, def SVD_plot(SVStreams, SValues, stachans, title=False, save=False, savefile=None): - r"""Plot singular vectors from the clustering routines. + """ + Plot singular vectors from the clustering routines. One plot for each stachan. @@ -1088,9 +1550,50 @@ def SVD_plot(SVStreams, SValues, stachans, title=False, save=False, additionally according to station and channel. :returns: :class: matplotlib.figure + + .. rubric:: Example + + >>> from obspy import read + >>> import glob + >>> from eqcorrscan.utils.plotting import SVD_plot + >>> from eqcorrscan.utils.clustering import svd, SVD_2_stream + >>> wavefiles = glob.glob('eqcorrscan/tests/test_data/WAV/TEST_/*') + >>> streams = [read(w) for w in wavefiles[1:10]] + >>> stream_list = [] + >>> for st in streams: + ... tr = st.select(station='GCSZ', channel='EHZ') + ... tr = tr.detrend('simple').resample(100).filter('bandpass', freqmin=2, + ... freqmax=8) + ... stream_list.append(tr) + >>> svec, sval, uvec, stachans = svd(stream_list=stream_list) + >>> SVstreams = SVD_2_stream(SVectors=svec, stachans=stachans, k=3, + ... sampling_rate=100) + >>> SVD_plot(SVStreams=SVstreams, SValues=sval, + ... stachans=stachans) # doctest: +SKIP + + .. plot:: + + from obspy import read + import glob, os + from eqcorrscan.utils.plotting import SVD_plot + from eqcorrscan.utils.clustering import svd, SVD_2_stream + wavefiles = glob.glob(os.path.realpath('../../..') + + '/tests/test_data/WAV/TEST_/*') + streams = [read(w) for w in wavefiles[1:10]] + stream_list = [] + for st in streams: + tr = st.select(station='GCSZ', channel='EHZ') + st.detrend('simple').resample(100).filter('bandpass', freqmin=5, + freqmax=40) + stream_list.append(tr) + svec, sval, uvec, stachans = svd(stream_list=stream_list) + SVstreams = SVD_2_stream(SVectors=svec, stachans=stachans, k=3, + sampling_rate=100) + SVD_plot(SVStreams=SVstreams, SValues=sval, + stachans=stachans) """ _check_save_args(save, savefile) - for stachan in stachans: + for sval, stachan in zip(SValues, stachans): print(stachan) plot_traces = [SVStream.select(station=stachan.split('.')[0], channel=stachan.split('.')[1])[0] @@ -1102,7 +1605,7 @@ def SVD_plot(SVStreams, SValues, stachans, title=False, save=False, y = tr.data x = np.linspace(0, len(y) * tr.stats.delta, len(y)) axes[i].plot(x, y, 'k', linewidth=1.1) - ylab = 'SV '+str(i+1)+'='+str(round(SValues[i] / len(SValues), 2)) + ylab = 'SV %s = %s' % (i+1, round(sval[i] / len(sval), 2)) axes[i].set_ylabel(ylab, rotation=0) axes[i].yaxis.set_ticks([]) print(i) @@ -1123,14 +1626,15 @@ def SVD_plot(SVStreams, SValues, stachans, title=False, save=False, def plot_synth_real(real_template, synthetic, channels=False, save=False, savefile=None): - r"""Plot multiple channels of data for real data and synthetic. + """ + Plot multiple channels of data for real data and synthetic. :type real_template: obspy.core.stream.Stream :param real_template: Stream of the real template :type synthetic: obspy.core.stream.Stream :param synthetic: Stream of synthetic template :type channels: list - :param channels: List of tuples of (station, channel) to plot, default is\ + :param channels: List of tuples of (station, channel) to plot, default is \ False, which plots all. :type save: bool :param save: False will plot to screen, true will save plot and not show \ @@ -1139,6 +1643,40 @@ def plot_synth_real(real_template, synthetic, channels=False, save=False, :param savefile: Filename to save to, required for save=True :returns: :class: matplotlib.figure + + >>> from obspy import read, Stream, Trace + >>> from eqcorrscan.utils.synth_seis import seis_sim + >>> from eqcorrscan.utils.plotting import plot_synth_real + >>> real = read() + >>> synth = Stream(Trace(seis_sim(SP=100, flength=200))) + >>> synth[0].stats.station = 'RJOB' + >>> synth[0].stats.channel = 'EHZ' + >>> synth[0].stats.sampling_rate = 100 + >>> synth = synth.filter('bandpass', freqmin=2, freqmax=8) + >>> real = real.select(station='RJOB', + ... channel='EHZ').detrend('simple').filter('bandpass', + ... freqmin=2, + ... freqmax=8) + >>> real = real.trim(starttime=real[0].stats.starttime + 43, + ... endtime=real[0].stats.starttime + 45).detrend('simple') + >>> plot_synth_real(real_template=real, synthetic=synth) # doctest: +SKIP + + .. plot:: + + from eqcorrscan.utils.plotting import plot_synth_real + from obspy import read, Stream, Trace + from eqcorrscan.utils.synth_seis import seis_sim + import os + real = read() + synth = Stream(Trace(seis_sim(SP=100, flength=200))) + synth[0].stats.station = 'RJOB' + synth[0].stats.channel = 'EHZ' + synth[0].stats.sampling_rate = 100 + synth.filter('bandpass', freqmin=2, freqmax=8) + real = real.select(station='RJOB', channel='EHZ').detrend('simple').filter('bandpass', freqmin=2, freqmax=8) + real.trim(starttime=real[0].stats.starttime + 4.9, + endtime=real[0].stats.starttime + 6.9).detrend('simple') + plot_synth_real(real_template=real, synthetic=synth) """ _check_save_args(save, savefile) from obspy.signal.cross_correlation import xcorr @@ -1163,6 +1701,10 @@ def plot_synth_real(real_template, synthetic, channels=False, save=False, if len(stachans) > 1: axes = axes.ravel() for i, stachan in enumerate(stachans): + if len(stachans) > 1: + axis = axes[i] + else: + axis = axes real_tr = real_template.select(station=stachan[0], channel=stachan[1])[0] synth_tr = synthetic.select(station=stachan[0], @@ -1182,13 +1724,16 @@ def plot_synth_real(real_template, synthetic, channels=False, save=False, y = tr.data y = y / float(max(abs(y))) x = np.linspace(0, len(y) * tr.stats.delta, len(y)) - axes[i].plot(x, y, colours[j], linewidth=2.0, label=labels[j]) - axes[i].get_yaxis().set_ticks([]) + axis.plot(x, y, colours[j], linewidth=2.0, label=labels[j]) + axis.get_yaxis().set_ticks([]) ylab = stachan[0]+'.'+stachan[1]+' cc='+str(round(corr, 2)) - axes[i].set_ylabel(ylab, rotation=0) + axis.set_ylabel(ylab, rotation=0) plt.subplots_adjust(hspace=0) # axes[0].legend() - axes[-1].set_xlabel('Time (s)') + if len(stachans) > 1: + axes[-1].set_xlabel('Time (s)') + else: + axis.set_xlabel('Time (s)') if not save: plt.show() else: @@ -1345,18 +1890,20 @@ def spec_trace(traces, cmap=None, wlen=0.4, log=False, trc='k', >>> st = read() >>> spec_trace(st, trc='white') # doctest: +SKIP + .. plot:: from obspy import read from eqcorrscan.utils.plotting import spec_trace st = read() spec_trace(st, trc='white') + """ from obspy import Stream if isinstance(traces, Stream): traces.sort(['station', 'channel']) if not Fig: - Fig = plt.figure(figsize=size) + Fig = plt.figure() for i, tr in enumerate(traces): if i == 0: ax = Fig.add_subplot(len(traces), 1, i+1) @@ -1364,37 +1911,38 @@ def spec_trace(traces, cmap=None, wlen=0.4, log=False, trc='k', ax = Fig.add_subplot(len(traces), 1, i+1, sharex=ax) ax1, ax2 = _spec_trace(tr, wlen=wlen, log=log, trc=trc, tralpha=tralpha, axes=ax) - ax2.set_yticks([]) + ax.set_yticks([]) if i < len(traces) - 1: plt.setp(ax1.get_xticklabels(), visible=False) if type(traces) == list: - ax2.text(0.005, 0.85, tr.stats.starttime.datetime. + ax.text(0.005, 0.85, tr.stats.starttime.datetime. strftime('%Y/%m/%d %H:%M:%S'), bbox=dict(facecolor='white', alpha=0.8), transform=ax2.transAxes) else: - ax2.text(0.005, 0.85, '.'.join([tr.stats.station, + ax.text(0.005, 0.85, '.'.join([tr.stats.station, tr.stats.channel]), bbox=dict(facecolor='white', alpha=0.8), transform=ax2.transAxes) - ax2.text(0.005, 0.02, str(np.max(tr.data).round(1)), + ax.text(0.005, 0.02, str(np.max(tr.data).round(1)), bbox=dict(facecolor='white', alpha=0.95), transform=ax2.transAxes) - ax1.set_xlabel('Time (s)') + ax.set_xlabel('Time (s)') Fig.subplots_adjust(hspace=0) + Fig.set_size_inches(w=size[0], h=size[1], forward=True) Fig.text(0.04, 0.5, 'Frequency (Hz)', va='center', rotation='vertical') if title: plt.suptitle(title) if show: plt.show() - plt.close() else: return Fig def _spec_trace(trace, cmap=None, wlen=0.4, log=False, trc='k', tralpha=0.9, size=(10, 2.5), axes=None, title=None): - r"""Function to plot a trace over that traces spectrogram. + """ + Function to plot a trace over that traces spectrogram. Uses obspys spectrogram routine. @@ -1439,7 +1987,7 @@ def _spec_trace(trace, cmap=None, wlen=0.4, log=False, trc='k', if not axes: Fig.set_size_inches(size) Fig.show() - Fig.close() + # Fig.close() else: return ax1, ax2 diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py index 4762b2503..473237558 100644 --- a/eqcorrscan/utils/pre_processing.py +++ b/eqcorrscan/utils/pre_processing.py @@ -46,7 +46,7 @@ def _check_daylong(tr): def shortproc(st, lowcut, highcut, filt_order, samp_rate, debug=0, - parallel=False, num_cores=False): + parallel=False, num_cores=False, starttime=None, endtime=None): r"""Basic function to bandpass and downsample. Works in place on data. This is employed to ensure all parts of the \ @@ -71,6 +71,12 @@ def shortproc(st, lowcut, highcut, filt_order, samp_rate, debug=0, :type num_cores: int :param num_cores: Control the number of cores for parallel processing, \ if set to False then this will use all the cores. + :type starttime: obspy.core.UTCDateTime + :param starttime: Desired data start time, will trim to this before \ + processing + :type endtime: obspy.core.UTCDateTime + :param endtime: Desired data end time, will trim to this before \ + processing :return: obspy.Stream @@ -125,6 +131,21 @@ def shortproc(st, lowcut, highcut, filt_order, samp_rate, debug=0, # Add sanity check for filter if highcut and highcut >= 0.5 * samp_rate: raise IOError('Highcut must be lower than the nyquist') + if debug > 4: + parallel = False + if starttime and endtime: + for tr in st: + tr.trim(starttime, endtime) + print(len(tr)) + if len(tr.data) == ((endtime - starttime) * + tr.stats.sampling_rate) + 1: + tr.data = tr.data[1:len(tr.data)] + elif starttime: + for tr in st: + tr.trim(starttime=starttime) + elif endtime: + for tr in st: + tr.trim(endtime=endtime) if parallel: if not num_cores: num_cores = cpu_count() @@ -197,7 +218,8 @@ def dayproc(st, lowcut, highcut, filt_order, samp_rate, >>> client = Client('GEONET') >>> t1 = UTCDateTime(2012, 3, 26) >>> t2 = t1 + 86400 - >>> bulk_info = [('NZ', 'FOZ', '10', 'HH*', t1, t2)] + >>> bulk_info = [('NZ', 'FOZ', '10', 'HHE', t1, t2), + ... ('NZ', 'FOZ', '10', 'HHE', t1, t2)] >>> st = client.get_waveforms_bulk(bulk_info) >>> st = dayproc(st=st, lowcut=2, highcut=9, filt_order=3, samp_rate=20, ... starttime=t1, debug=0, parallel=True, num_cores=2) @@ -217,7 +239,8 @@ def dayproc(st, lowcut, highcut, filt_order, samp_rate, >>> client = Client('GEONET') >>> t1 = UTCDateTime(2012, 3, 26) >>> t2 = t1 + 86400 - >>> bulk_info = [('NZ', 'FOZ', '10', 'HH*', t1, t2)] + >>> bulk_info = [('NZ', 'FOZ', '10', 'HHE', t1, t2), + ... ('NZ', 'FOZ', '10', 'HHE', t1, t2)] >>> st = client.get_waveforms_bulk(bulk_info) >>> st = dayproc(st=st, lowcut=None, highcut=9, filt_order=3, samp_rate=20, ... starttime=t1, debug=0, parallel=True, num_cores=2) @@ -236,7 +259,8 @@ def dayproc(st, lowcut, highcut, filt_order, samp_rate, >>> client = Client('GEONET') >>> t1 = UTCDateTime(2012, 3, 26) >>> t2 = t1 + 86400 - >>> bulk_info = [('NZ', 'FOZ', '10', 'HH*', t1, t2)] + >>> bulk_info = [('NZ', 'FOZ', '10', 'HHE', t1, t2), + ... ('NZ', 'FOZ', '10', 'HHE', t1, t2)] >>> st = client.get_waveforms_bulk(bulk_info) >>> st = dayproc(st=st, lowcut=2, highcut=None, filt_order=3, samp_rate=20, ... starttime=t1, debug=0, parallel=True, num_cores=2) @@ -253,6 +277,8 @@ def dayproc(st, lowcut, highcut, filt_order, samp_rate, tracein = False if highcut and highcut >= 0.5 * samp_rate: raise IOError('Highcut must be lower than the nyquist') + if debug > 4: + parallel = False if parallel: if not num_cores: num_cores = cpu_count() @@ -281,7 +307,7 @@ def dayproc(st, lowcut, highcut, filt_order, samp_rate, def process(tr, lowcut, highcut, filt_order, samp_rate, debug, - starttime=False, full_day=False): + starttime=False, full_day=False, seisan=True): r"""Basic function to process data, usually called by dayproc or shortproc. Functionally, this will bandpass, downsample and check headers and length \ @@ -310,6 +336,9 @@ def process(tr, lowcut, highcut, filt_order, samp_rate, debug, :param starttime: Desired start of trace :type full_day: bool :param full_day: Whether to expect, and enforce a full day of data or not. + :type seisan: bool + :param seisan: Whether channels are named like seisan channels (which are \ + two letters rather than three) - defaults to True. :return: obspy.Stream @@ -318,7 +347,7 @@ def process(tr, lowcut, highcut, filt_order, samp_rate, debug, import warnings from obspy.signal.filter import bandpass, lowpass, highpass # Add sanity check - if highcut and highcut >= 0.5*samp_rate: + if highcut and highcut >= 0.5 * samp_rate: raise IOError('Highcut must be lower than the nyquist') # Define the start-time if starttime: @@ -334,16 +363,10 @@ def process(tr, lowcut, highcut, filt_order, samp_rate, debug, qual = _check_daylong(tr) if not qual: msg = ("Data have more zeros than actual data, please check the raw", - " data set-up and manually sort it") + " data set-up and manually sort it: " + tr.stats.station + "." + + tr.stats.channel) raise ValueError(msg) tr = tr.detrend('simple') # Detrend data before filtering - - # If there is one sample too many remove the first sample - this occurs - # at station FOZ where the first sample is zero when it shouldn't be, - # Not real sample: generated during data download - # if full_day: - # if len(tr.data) == (86400 * tr.stats.sampling_rate) + 1: - # tr.data = tr.data[1:len(tr.data)] if debug > 0: print('I have '+str(len(tr.data))+' data points for ' + tr.stats.station+'.'+tr.stats.channel+' before processing') @@ -353,15 +376,11 @@ def process(tr, lowcut, highcut, filt_order, samp_rate, debug, and full_day: if debug >= 2: print('Data for '+tr.stats.station+'.'+tr.stats.channel + - ' is not of daylong length, will zero pad') - # Work out when the trace thinks it is starting - # traceday = UTCDateTime(str(tr.stats.starttime.year)+'-' + - # str(tr.stats.starttime.month)+'-' + - # str(tr.stats.starttime.day)) + ' are not of daylong length, will zero pad') # Use obspy's trim function with zero padding - tr = tr.trim(starttime, starttime+86400, pad=True, fill_value=0, + tr = tr.trim(starttime, starttime + 86400, pad=True, fill_value=0, nearest_sample=True) - # If there is one sample too many after this remove the last one + # If there is one sample too many after this remove the first one # by convention if len(tr.data) == (86400 * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] @@ -399,10 +418,11 @@ def process(tr, lowcut, highcut, filt_order, samp_rate, debug, warnings.warn('No filters applied') # Account for two letter channel names in s-files and therefore templates - tr.stats.channel = tr.stats.channel[0]+tr.stats.channel[-1] + if seisan: + tr.stats.channel = tr.stats.channel[0]+tr.stats.channel[-1] # Sanity check the time header - if tr.stats.starttime.day != day != day and full_day: + if tr.stats.starttime.day != day and full_day: warnings.warn("Time headers do not match expected date: " + str(tr.stats.starttime)) @@ -418,11 +438,11 @@ def process(tr, lowcut, highcut, filt_order, samp_rate, debug, # by convention if len(tr.data) == (86400 * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] - if not tr.stats.sampling_rate*86400 == tr.stats.npts: + if not tr.stats.sampling_rate * 86400 == tr.stats.npts: raise ValueError('Data are not daylong for '+tr.stats.station + '.'+tr.stats.channel) # Final visual check for debug - if debug >= 4: + if debug > 4: tr.plot() return tr diff --git a/eqcorrscan/utils/stacking.py b/eqcorrscan/utils/stacking.py index 2a8c46429..3648f8333 100644 --- a/eqcorrscan/utils/stacking.py +++ b/eqcorrscan/utils/stacking.py @@ -93,7 +93,8 @@ def PWS_stack(streams, weight=2, normalize=True): return Phasestack -def align_traces(trace_list, shift_len, master=False): +def align_traces(trace_list, shift_len, master=False, positive=False, + plot=False): """ Align traces relative to each other based on their cross-correlation value. Uses the obspy.signal.cross_correlation.xcorr function to find the optimum @@ -114,11 +115,17 @@ def align_traces(trace_list, shift_len, master=False): :type master: obspy.core.trace.Trace :param master: Master trace to align to, if set to False will align to \ the largest amplitude trace (default) + :type positive: bool + :param positive: Return the maximum positive cross-correlation, or the \ + absolute maximum, defaults to False (absolute maximum). + :type plot: bool + :param plot: If true, will plot each trace aligned with the master. - :returns: list of shifts for best alignment in seconds + :returns: list of shifts and correlations for best alignment in seconds """ - from obspy.signal.cross_correlation import xcorr + from eqcorrscan.core.match_filter import normxcorr2 from copy import deepcopy + from eqcorrscan.utils.plotting import xcorr_plot traces = deepcopy(trace_list) if not master: # Use trace with largest MAD amplitude as master @@ -135,7 +142,21 @@ def align_traces(trace_list, shift_len, master=False): for i in range(len(traces)): if not master.stats.sampling_rate == traces[i].stats.sampling_rate: raise ValueError('Sampling rates not the same') - shift, cc = xcorr(master, traces[i], shift_len) + cc_vec = normxcorr2(template=traces[i].data. + astype(np.float32)[shift_len:-shift_len], + image=master.data.astype(np.float32)) + cc_vec = cc_vec[0] + shift = np.abs(cc_vec).argmax() + cc = cc_vec[shift] + if plot: + xcorr_plot(template=traces[i].data. + astype(np.float32)[shift_len:-shift_len], + image=master.data.astype(np.float32), shift=shift, + cc=cc) + shift -= shift_len + if cc < 0 and positive: + cc = cc_vec.max() + shift = cc_vec.argmax() - shift_len shifts.append(shift / master.stats.sampling_rate) ccs.append(cc) return shifts, ccs diff --git a/setup.cfg b/setup.cfg index e403b1e4a..1eff3a7f6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,6 +3,6 @@ # 3. If at all possible, it is good practice to do this. If you cannot, you # will need to generate wheels for each Python version that you support. [metadata] -description-file = REAME.md +description-file = README.md [aliases] test = pytest diff --git a/setup.py b/setup.py index be8267e37..a4b93c91c 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,9 @@ from os import path import warnings import glob +from distutils.extension import Extension +from Cython.Distutils import build_ext +import numpy as np try: from pypandoc import convert read_md = lambda f: convert(f, 'rst') @@ -62,21 +65,21 @@ install_requires = ['numpy>=1.8.0', 'obspy>=1.0.0', 'matplotlib>=1.3.0', 'joblib>=0.8.4', 'scipy>=0.14', 'multiprocessing', - 'LatLon'] + 'LatLon', 'h5py', 'cython'] else: install_requires = ['numpy>=1.8.0', 'obspy>=1.0.0', 'matplotlib>=1.3.0', 'joblib>=0.8.4', 'multiprocessing', - 'LatLon'] + 'LatLon', 'h5py', 'cython'] else: if not READ_THE_DOCS: install_requires = ['numpy>=1.8.0', 'obspy>=0.10.2', 'matplotlib>=1.3.0', 'joblib>=0.8.4', - 'scipy>=0.14', 'LatLon'] + 'scipy>=0.14', 'LatLon', 'h5py', 'cython'] else: install_requires = ['numpy>=1.8.0', 'obspy>=0.10.2', 'matplotlib>=1.3.0', 'joblib>=0.8.4', - 'LatLon'] + 'LatLon', 'h5py', 'cython'] # install_requires.append('ConfigParser') setup( name='EQcorrscan', @@ -118,6 +121,7 @@ # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.5', ], # What does your project relate to? @@ -139,7 +143,16 @@ # Test requirements for using pytest setup_requires=['pytest-runner'], - tests_require=['pytest', 'pytest-flake8', 'pytest-cov', 'pytest-xdist'], + tests_require=['pytest', 'pytest-cov'], + + # Build our extension for subspace detection + cmdclass={'build_ext': build_ext}, + ext_modules=[Extension("eqcorrscan.core.subspace_statistic", + ["eqcorrscan/core/subspace_statistic.pyx"], + include_dirs=[np.get_include()])] + # Extension("eqcorrscan.core.sliding_normxcorr", + # ["eqcorrscan/core/sliding_normxcorr.pyx"], + # include_dirs=[np.get_include()])] # List additional groups of dependencies here (e.g. development # dependencies). You can install these using the following syntax, # for example: