diff --git a/tests/test_peakdetection.py b/tests/test_eventdetection.py similarity index 71% rename from tests/test_peakdetection.py rename to tests/test_eventdetection.py index c2dd9236..eb927a5c 100644 --- a/tests/test_peakdetection.py +++ b/tests/test_eventdetection.py @@ -1,6 +1,6 @@ from nose.tools import assert_true, assert_equal, assert_almost_equal, assert_raises import numpy as np -import thunderfish.peakdetection as pd +import thunderfish.eventdetection as ed def test_detect_peaks(): @@ -36,24 +36,16 @@ def test_detect_peaks(): threshold = 0.5 min_thresh = 0.3 - assert_raises(ValueError, pd.detect_peaks, data, 0.0) + assert_raises(ValueError, ed.detect_peaks, data, 0.0) - assert_raises(ValueError, pd.detect_peaks, data, -1.0) + assert_raises(ValueError, ed.detect_peaks, data, -1.0) - assert_raises(IndexError, pd.detect_peaks, data, threshold, time[:len(time)//2]) - - peaks, troughs = pd.detect_peaks(data, threshold) + peaks, troughs = ed.detect_peaks(data, threshold) assert_true(np.all(peaks == peak_indices), "detect_peaks(data, threshold) did not correctly detect peaks") assert_true(np.all(troughs == trough_indices), "detect_peaks(data, threshold) did not correctly detect troughs") - peaks, troughs = pd.detect_peaks(data, threshold, time) - assert_true(np.all(peaks == peak_times), - "detect_peaks(data, threshold, time) did not correctly detect peaks") - assert_true(np.all(troughs == trough_times), - "detect_peaks(data, threshold, time) did not correctly detect troughs") - def test_detect_dynamic_peaks(): # generate data: @@ -88,36 +80,36 @@ def test_detect_dynamic_peaks(): threshold = 0.5 min_thresh = 0.3 - assert_raises(ValueError, pd.detect_dynamic_peaks, data, 0.0, min_thresh, 0.5, time, - pd.accept_peak_size_threshold) + assert_raises(ValueError, ed.detect_dynamic_peaks, data, 0.0, min_thresh, 0.5, time, + ed.accept_peak_size_threshold) - assert_raises(ValueError, pd.detect_dynamic_peaks, data, -1.0, min_thresh, 0.5, time, - pd.accept_peak_size_threshold) + assert_raises(ValueError, ed.detect_dynamic_peaks, data, -1.0, min_thresh, 0.5, time, + ed.accept_peak_size_threshold) - assert_raises(ValueError, pd.detect_dynamic_peaks, data, threshold, 0.0, 0.5, time, - pd.accept_peak_size_threshold) + assert_raises(ValueError, ed.detect_dynamic_peaks, data, threshold, 0.0, 0.5, time, + ed.accept_peak_size_threshold) - assert_raises(ValueError, pd.detect_dynamic_peaks, data, threshold, -1.0, 0.5, time, - pd.accept_peak_size_threshold) + assert_raises(ValueError, ed.detect_dynamic_peaks, data, threshold, -1.0, 0.5, time, + ed.accept_peak_size_threshold) - assert_raises(ValueError, pd.detect_dynamic_peaks, data, threshold, min_thresh, 0.0, time, - pd.accept_peak_size_threshold) + assert_raises(ValueError, ed.detect_dynamic_peaks, data, threshold, min_thresh, 0.0, time, + ed.accept_peak_size_threshold) - assert_raises(ValueError, pd.detect_dynamic_peaks, data, threshold, min_thresh, -1.0, time, - pd.accept_peak_size_threshold) + assert_raises(ValueError, ed.detect_dynamic_peaks, data, threshold, min_thresh, -1.0, time, + ed.accept_peak_size_threshold) - assert_raises(IndexError, pd.detect_dynamic_peaks, data, threshold, min_thresh, 0.5, time[:len(time)//2], - pd.accept_peak_size_threshold) + assert_raises(IndexError, ed.detect_dynamic_peaks, data, threshold, min_thresh, 0.5, time[:len(time)//2], + ed.accept_peak_size_threshold) - peaks, troughs = pd.detect_dynamic_peaks(data, threshold, min_thresh, 0.5, time, - pd.accept_peak_size_threshold) + peaks, troughs = ed.detect_dynamic_peaks(data, threshold, min_thresh, 0.5, time, + ed.accept_peak_size_threshold) assert_true(np.all(peaks == peak_times), "detect_dynamic_peaks(data, threshold, time, accept_peak_size_threshold) did not correctly detect peaks") assert_true(np.all(troughs == trough_times), "detect_dynamic_peaks(data, threshold, time, accept_peak_size_threshold) did not correctly detect troughs") - peaks, troughs = pd.detect_dynamic_peaks(data, threshold, min_thresh, 0.5, None, - pd.accept_peak_size_threshold, + peaks, troughs = ed.detect_dynamic_peaks(data, threshold, min_thresh, 0.5, None, + ed.accept_peak_size_threshold, thresh_ampl_fac=0.9, thresh_weight=0.1) assert_true(np.all(peaks == peak_indices), "detect_dynamic_peaks(data, threshold, time, accept_peak_size_threshold) did not correctly detect peaks") @@ -125,18 +117,55 @@ def test_detect_dynamic_peaks(): "detect_dynamic_peaks(data, threshold, time, accept_peak_size_threshold) did not correctly detect troughs") +def test_threshold_crossings(): + # generate data: + time = np.arange(0.0, 10.0, 0.01) + data = np.zeros(time.shape) + pt_indices = np.random.randint(5, len(data) - 10, size=40) + pt_indices.sort() + while np.any(np.diff(pt_indices).min() < 5): + pt_indices = np.random.randint(5, len(data) - 10, size=40) + pt_indices.sort() + up_indices = pt_indices[0::2] + down_indices = pt_indices[1::2] + up = True + for i in range(len(pt_indices) - 1): + if up: + data[pt_indices[i]:pt_indices[i + 1]] = 1.0 + else: + data[pt_indices[i]:pt_indices[i + 1]] = 0.0 + up = not up + if up: + data[pt_indices[-1]:] = 1.0 + + threshold = 0.5 + up, down = ed.threshold_crossings(data, threshold) + assert_true(np.all(up == up_indices-1), + "threshold_crossings(data, threshold) did not correctly detect up crossings") + assert_true(np.all(down == down_indices-1), + "threshold_crossings(data, threshold) did not correctly detect down crossings") + + threshold = 0.1 + 0.8/10.0*time + assert_raises(IndexError, ed.threshold_crossings, data, threshold[1:]) + up, down = ed.threshold_crossings(data, threshold) + assert_true(np.all(up == up_indices-1), + "threshold_crossings(data, threshold) did not correctly detect up crossings") + assert_true(np.all(down == down_indices-1), + "threshold_crossings(data, threshold) did not correctly detect down crossings") + + def test_thresholds(): # generate data: data = np.random.randn(10000) - std_th = pd.std_threshold(data, th_factor=1.0) - prc_th = pd.percentile_threshold(data, th_factor=1.0, percentile=16.0) + std_th = ed.std_threshold(data, th_factor=1.0) + prc_th = ed.percentile_threshold(data, th_factor=1.0, percentile=16.0) assert_almost_equal(std_th, 1.0, 1, 'std_threshold %g esimate failed' % std_th) assert_true(np.abs(prc_th-2.0) < 0.1, 'percentile_threshold %g esimate failed' % prc_th) time = np.arange(0.0, 10.0, 0.01) data = np.sin(2.0*np.pi*21.7*time) - mm_th = pd.minmax_threshold(data, th_factor=1.0) + mm_th = ed.minmax_threshold(data, th_factor=1.0) assert_almost_equal(mm_th, 2.0, 2, 'minmax_threshold %g esimate failed' % mm_th) - prc_th = pd.percentile_threshold(data, th_factor=1.0, percentile=0.1) + prc_th = ed.percentile_threshold(data, th_factor=1.0, percentile=0.1) assert_true(np.abs(prc_th-2.0) < 0.1, 'percentile_threshold %g esimate failed' % prc_th) @@ -148,28 +177,28 @@ def test_trim(): trough_indices = pt_indices[1:n:2] # peak first, same length: - p_inx, t_inx = pd.trim(peak_indices, trough_indices) + p_inx, t_inx = ed.trim(peak_indices, trough_indices) assert_true(len(p_inx) == len(peak_indices) and np.all(p_inx == peak_indices), "trim(peak_indices, trough_indices) failed on peaks") assert_true(len(t_inx) == len(trough_indices) and np.all(t_inx == trough_indices), "trim(peak_indices, trough_indices) failed on troughs") # trough first, same length: - p_inx, t_inx = pd.trim(peak_indices[1:], trough_indices[:-1]) + p_inx, t_inx = ed.trim(peak_indices[1:], trough_indices[:-1]) assert_true(len(p_inx) == len(peak_indices[1:]) and np.all(p_inx == peak_indices[1:]), "trim(peak_indices[1:], trough_indices[:-1]) failed on peaks") assert_true(len(t_inx) == len(trough_indices[:-1]) and np.all(t_inx == trough_indices[:-1]), "trim(peak_indices[1:], trough_indices[:-1]) failed on troughs") # peak first, more peaks: - p_inx, t_inx = pd.trim(peak_indices, trough_indices[:-2]) + p_inx, t_inx = ed.trim(peak_indices, trough_indices[:-2]) assert_true(len(p_inx) == len(peak_indices[:-2]) and np.all(p_inx == peak_indices[:-2]), "trim(peak_indices, trough_indices[:-2]) failed on peaks") assert_true(len(t_inx) == len(trough_indices[:-2]) and np.all(t_inx == trough_indices[:-2]), "trim(peak_indices, trough_indices[:-2]) failed on troughs") # trough first, more troughs: - p_inx, t_inx = pd.trim(peak_indices[1:-2], trough_indices) + p_inx, t_inx = ed.trim(peak_indices[1:-2], trough_indices) assert_true(len(p_inx) == len(peak_indices[1:-2]) and np.all(p_inx == peak_indices[1:-2]), "trim(peak_indices[1:-2], trough_indices) failed on peaks") assert_true(len(t_inx) == len(trough_indices[:-3]) and np.all(t_inx == trough_indices[:-3]), @@ -184,28 +213,28 @@ def test_trim_to_peak(): trough_indices = pt_indices[1:n:2] # peak first, same length: - p_inx, t_inx = pd.trim_to_peak(peak_indices, trough_indices) + p_inx, t_inx = ed.trim_to_peak(peak_indices, trough_indices) assert_true(len(p_inx) == len(peak_indices) and np.all(p_inx == peak_indices), "trim_to_peak(peak_indices, trough_indices) failed on peaks") assert_true(len(t_inx) == len(trough_indices) and np.all(t_inx == trough_indices), "trim_to_peak(peak_indices, trough_indices) failed on troughs") # trough first, same length: - p_inx, t_inx = pd.trim_to_peak(peak_indices[1:], trough_indices[:-1]) + p_inx, t_inx = ed.trim_to_peak(peak_indices[1:], trough_indices[:-1]) assert_true(len(p_inx) == len(peak_indices[1:-1]) and np.all(p_inx == peak_indices[1:-1]), "trim_to_peak(peak_indices[1:], trough_indices[:-1]) failed on peaks") assert_true(len(t_inx) == len(trough_indices[1:-1]) and np.all(t_inx == trough_indices[1:-1]), "trim_to_peak(peak_indices[1:], trough_indices[:-1]) failed on troughs") # peak first, more peaks: - p_inx, t_inx = pd.trim_to_peak(peak_indices, trough_indices[:-2]) + p_inx, t_inx = ed.trim_to_peak(peak_indices, trough_indices[:-2]) assert_true(len(p_inx) == len(peak_indices[:-2]) and np.all(p_inx == peak_indices[:-2]), "trim_to_peak(peak_indices, trough_indices[:-2]) failed on peaks") assert_true(len(t_inx) == len(trough_indices[:-2]) and np.all(t_inx == trough_indices[:-2]), "trim_to_peak(peak_indices, trough_indices[:-2]) failed on troughs") # trough first, more troughs: - p_inx, t_inx = pd.trim_to_peak(peak_indices[1:-2], trough_indices) + p_inx, t_inx = ed.trim_to_peak(peak_indices[1:-2], trough_indices) assert_true(len(p_inx) == len(peak_indices[1:-2]) and np.all(p_inx == peak_indices[1:-2]), "trim_to_peak(peak_indices[1:-2], trough_indices) failed on peaks") assert_true(len(t_inx) == len(trough_indices[1:-2]) and np.all(t_inx == trough_indices[1:-2]), @@ -220,25 +249,25 @@ def test_trim_closest(): trough_indices = pt_indices[1:n:2] trough_indices = peak_indices - np.random.randint(1, 5, size=len(peak_indices)) - p_inx, t_inx = pd.trim_closest(peak_indices, trough_indices) + p_inx, t_inx = ed.trim_closest(peak_indices, trough_indices) assert_true(len(p_inx) == len(peak_indices) and np.all(p_inx == peak_indices), "trim_closest(peak_indices, peak_indices-5) failed on peaks") assert_true(len(t_inx) == len(trough_indices) and np.all(t_inx == trough_indices), "trim_closest(peak_indices, peak_indices-5) failed on troughs") - p_inx, t_inx = pd.trim_closest(peak_indices[1:], trough_indices[:-1]) + p_inx, t_inx = ed.trim_closest(peak_indices[1:], trough_indices[:-1]) assert_true(len(p_inx) == len(peak_indices[1:-1]) and np.all(p_inx == peak_indices[1:-1]), "trim_closest(peak_indices[1:], peak_indices-5) failed on peaks") assert_true(len(t_inx) == len(trough_indices[1:-1]) and np.all(t_inx == trough_indices[1:-1]), "trim_closest(peak_indices[1:], peak_indices-5) failed on troughs") trough_indices = peak_indices + np.random.randint(1, 5, size=len(peak_indices)) - p_inx, t_inx = pd.trim_closest(peak_indices, trough_indices) + p_inx, t_inx = ed.trim_closest(peak_indices, trough_indices) assert_true(len(p_inx) == len(peak_indices) and np.all(p_inx == peak_indices), "trim_closest(peak_indices, peak_indices+5) failed on peaks") assert_true(len(t_inx) == len(trough_indices) and np.all(t_inx == trough_indices), "trim_closest(peak_indices, peak_indices+5) failed on troughs") - p_inx, t_inx = pd.trim_closest(np.array([]), np.array([])) + p_inx, t_inx = ed.trim_closest(np.array([]), np.array([])) assert_equal(len(p_inx), 0, "trim_closest([], []) failed on peaks") assert_equal(len(t_inx), 0, "trim_closest([], []) failed on troughs") diff --git a/tests/test_harmonicgroups.py b/tests/test_harmonicgroups.py index c6c5d36a..43db55ec 100644 --- a/tests/test_harmonicgroups.py +++ b/tests/test_harmonicgroups.py @@ -18,18 +18,31 @@ def test_harmonic_groups(): amplitudes=[1.0, 0.7, 0.2, 0.1], phases=[0.0, 0.0, 0.0, 0.0]) fish3 = ff.generate_wavefish(eodfs[2], samplerate, duration=8.0, noise_std=0.0, - amplitudes=[10.0, 5.0, 1.0], - phases=[0.0, 0.0, 0.0]) + amplitudes=[10.0, 5.0, 1.0, 0.1], + phases=[0.0, 0.0, 0.0, 0.0]) fish4 = ff.generate_wavefish(eodfs[3], samplerate, duration=8.0, noise_std=0.0, - amplitudes=[6.0, 3.0, 1.0], - phases=[0.0, 0.0, 0.0]) + amplitudes=[6.0, 3.0, 1.0, 0.1], + phases=[0.0, 0.0, 0.0, 0.0]) data = fish1 + fish2 + fish3 + fish4 # analyse: psd_data = ps.psd(data, samplerate, fresolution=df) groups = hg.harmonic_groups(psd_data[1], psd_data[0])[0] fundamentals = hg.fundamental_freqs(groups) - + fdbs = hg.fundamental_freqs_and_power(groups) # check: assert_true(np.all(np.abs(eodfs-fundamentals) < df), 'harmonic_groups() did not correctly detect all fundamental frequencies') + + fundamentals = hg.fundamental_freqs([groups, [groups[1], groups[3]]]) + fdbs = hg.fundamental_freqs_and_power([groups, [groups[1], groups[3]]], 3) + # check: + assert_true(np.all(np.abs(eodfs-fundamentals[0]) < df), + 'harmonic_groups() did not correctly detect all fundamental frequencies') + + fundamentals = hg.fundamental_freqs([[groups, [groups[1], groups[3]]]]) + fdbs = hg.fundamental_freqs_and_power([[groups, [groups[1], groups[3]]]], 10, True) + # check: + assert_true(np.all(np.abs(eodfs-fundamentals[0][0]) < df), + 'harmonic_groups() did not correctly detect all fundamental frequencies') + diff --git a/tests/test_powerspectrum.py b/tests/test_powerspectrum.py index 8743cec6..9949079c 100644 --- a/tests/test_powerspectrum.py +++ b/tests/test_powerspectrum.py @@ -1,8 +1,9 @@ -from nose.tools import assert_equal +from nose.tools import assert_equal, assert_true import numpy as np import thunderfish.powerspectrum as ps # run this with "nosetests tests/test_powerspectrum.py" in the first thunderfish folder. + def test_powerspectrum(): # generate data fundamental = 300. # Hz @@ -14,13 +15,34 @@ def test_powerspectrum(): psd_data = ps.multi_resolution_psd(data, samplerate, fresolution=[0.5, 1]) # test the results - assert_equal(round(psd_data[0][1][np.argmax(psd_data[0][0])]), fundamental, 'peak in PSD is not the fundamental ' - 'frequency given.') - assert_equal(round(psd_data[1][1][np.argmax(psd_data[1][0])]), fundamental, 'peak in PSD is not the fundamental ' - 'frequency given.') + assert_equal(round(psd_data[0][1][np.argmax(psd_data[0][0])]), fundamental, + 'peak in PSD is not the fundamental frequency given.') + assert_equal(round(psd_data[1][1][np.argmax(psd_data[1][0])]), fundamental, + 'peak in PSD is not the fundamental frequency given.') # run multi_resolution_psd with 1 fresolutions (float) psd_data = ps.multi_resolution_psd(data, samplerate, fresolution=0.5) # test the result - assert_equal(round(psd_data[1][np.argmax(psd_data[0])]), fundamental, 'peak in PSD is not the fundamental ' - 'frequency given.') + assert_equal(round(psd_data[1][np.argmax(psd_data[0])]), fundamental, + 'peak in PSD is not the fundamental frequency given.') + + +def test_peak_freqs(): + # generate data: + dt = 0.001 + time = np.arange(0.0, 10.0, dt) + data = np.zeros(len(time)) + w = len(data)//10 + freqs = 20.0 + 80.0*np.random.rand(10) + onsets = [] + offsets = [] + for k in range(10): + i0 = k*w + i1 = i0 + w + data[i0:i1] = np.sin(2.0*np.pi*freqs[k]*time[i0:i1]) + onsets.append(i0+w//10) + offsets.append(i1-w//10) + df = 0.5 + mfreqs = ps.peak_freqs(onsets, offsets, data, 1.0/dt, freq_resolution=df) + assert_true(np.all(np.abs(freqs - mfreqs) <= 2.0*df), "peak_freqs() failed") + diff --git a/thunderfish/__init__.py b/thunderfish/__init__.py index 9de8c42b..753f94f4 100644 --- a/thunderfish/__init__.py +++ b/thunderfish/__init__.py @@ -2,7 +2,7 @@ __all__ = ['dataloader', 'configfile', - 'peakdetection', + 'eventdetection', 'bestwindow', 'powerspectrum', 'harmonicgroups', diff --git a/thunderfish/bestwindow.py b/thunderfish/bestwindow.py index a28e273b..e67e3a4b 100644 --- a/thunderfish/bestwindow.py +++ b/thunderfish/bestwindow.py @@ -1,23 +1,27 @@ -"""Functions needed for selecting the region within a recording with the +""" +# Best-window selection +Functions needed for selecting the region within a recording with the most stable signal of largest amplitude that is not clipped. -Main functions: -clip_amplitudes(): estimated clipping amplitudes from the data. -best_window_indices(): select start- and end-indices of the best window -best_window_times(): select start end end-time of the best window -best_window(): return data of the best window - -add_clip_config(): add parameters for clip_amplitudes() to configuration. -clip_args(): retrieve parameters for clip_amplitudes() from configuration. -add_best_window_config(): add parameters for best_window() to configuration. -best_window_args(): retrieve parameters for best_window*() from configuration. - -plot_clipping(): visualization of the algorithm for detecting clipped amplitudes in clip_amplitudes(). -plot_best_window(): visualization of the algorithm used in best_window_indices(). +## Main functions: +- `clip_amplitudes()`: estimated clipping amplitudes from the data. +- `best_window_indices()`: select start- and end-indices of the best window +- `best_window_times()`: select start end end-time of the best window +- `best_window()`: return data of the best window + +## Configuration parameter +- `add_clip_config()`: add parameters for clip_amplitudes() to configuration. +- `clip_args()`: retrieve parameters for clip_amplitudes() from configuration. +- `add_best_window_config()`: add parameters for best_window() to configuration. +- `best_window_args()`: retrieve parameters for best_window*() from configuration. + +## Visualization of the algorithms +- `plot_clipping()`: visualization of the algorithm for detecting clipped amplitudes in clip_amplitudes(). +- `plot_best_window()`: visualization of the algorithm used in best_window_indices(). """ import numpy as np -from .peakdetection import percentile_threshold, detect_peaks, trim_to_peak +from .eventdetection import percentile_threshold, detect_peaks, trim_to_peak try: import matplotlib.pyplot as plt except ImportError: @@ -144,7 +148,7 @@ def clip_args(cfg, rate): def best_window_indices(data, samplerate, single=True, win_size=1., win_shift=0.1, - th_factor=0.8, percentile=0.1, min_clip=-np.inf, max_clip=np.inf, + th_factor=0.8, percentile=10.0, min_clip=-np.inf, max_clip=np.inf, w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.5, plot_data_func=None, **kwargs): """ Detect the best window of the data to be analyzed. The data have @@ -155,7 +159,7 @@ def best_window_indices(data, samplerate, single=True, win_size=1., win_shift=0. dynamic threshold. The threshold is computed in win_shift wide windows as thresh_ampl_fac times the interpercentile range at the percentile and 100.0-percentile percentile of the data - using the peakdetection.percentile_threshold() function. + using the eventdetection.percentile_threshold() function. Second, criteria for selecting the best window are computed for each window of width win_size shifted by win_shift trough the data. The @@ -186,8 +190,8 @@ def best_window_indices(data, samplerate, single=True, win_size=1., win_shift=0. :param single: (boolean). If true return only the single window with the smallest cost. If False return the largest window with the cost below the minimum cost plus tolerance. :param win_size: (float). Size of the best window in seconds. Choose it large enough for a minimum analysis. :param win_shift: (float). Time shift in seconds between windows. Should be smaller or equal to win_size and not smaller than about one tenth of win_shift. - :param percentile: (int). percentile parameter for the peakdetection.percentile_threshold() function used to estimate thresholds for detecting peaks in the data. - :param th_factor: (float). th_factor parameter for the peakdetection.percentile_threshold() function used to estimate thresholds for detecting peaks in the data. + :param percentile: (float). percentile parameter for the eventdetection.percentile_threshold() function used to estimate thresholds for detecting peaks in the data. + :param th_factor: (float). th_factor parameter for the eventdetection.percentile_threshold() function used to estimate thresholds for detecting peaks in the data. :param min_clip: (float). Minimum amplitude below which data are clipped. :param max_clip: (float). Maximum amplitude above which data are clipped. :param w_cv_interv: (float). Weight for the coefficient of variation of the intervals. @@ -224,7 +228,7 @@ def best_window_indices(data, samplerate, single=True, win_size=1., win_shift=0. # too little data: if len(data) / samplerate <= win_size: - raise UserWarning('no best window found: not enough data') + raise UserWarning('not enough data') # threshold for peak detection: threshold = percentile_threshold(data, samplerate, win_shift, @@ -233,7 +237,7 @@ def best_window_indices(data, samplerate, single=True, win_size=1., win_shift=0. # detect large peaks and troughs: peak_idx, trough_idx = detect_peaks(data, threshold) if len(peak_idx) == 0 or len(trough_idx) == 0: - raise UserWarning('best_window(): no peaks or troughs detected') + raise UserWarning('no peaks or troughs detected') # compute cv of intervals, mean peak amplitude and its cv: invalid_cv = 1000.0 @@ -324,7 +328,7 @@ def best_window_indices(data, samplerate, single=True, win_size=1., win_shift=0. def best_window_times(data, samplerate, single=True, win_size=1., win_shift=0.1, - th_factor=0.8, percentile=0.1, min_clip=-np.inf, max_clip=np.inf, + th_factor=0.8, percentile=10.0, min_clip=-np.inf, max_clip=np.inf, w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.5, plot_data_func=None, **kwargs): """Finds the window within data with the best data. See best_window_indices() for details. @@ -344,7 +348,7 @@ def best_window_times(data, samplerate, single=True, win_size=1., win_shift=0.1, def best_window(data, samplerate, single=True, win_size=1., win_shift=0.1, - th_factor=0.8, percentile=0.1, min_clip=-np.inf, max_clip=np.inf, + th_factor=0.8, percentile=10.0, min_clip=-np.inf, max_clip=np.inf, w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.5, plot_data_func=None, **kwargs): """Finds the window within data with the best data. See best_window_indices() for details. @@ -416,7 +420,7 @@ def plot_best_window(data, rate, peak_idx, trough_idx, idx0, idx1, def add_best_window_config(cfg, single=True, win_size=1., win_shift=0.1, - th_factor=0.8, percentile=0.1, + th_factor=0.8, percentile=10.0, min_clip=-np.inf, max_clip=np.inf, w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.5): @@ -515,7 +519,7 @@ def best_window_args(cfg): # compute best window: print("call bestwindow() function...") best_window_indices(data, rate, single=False, - win_size=1.0, win_shift=0.5, th_factor=0.8, percentile=0.1, + win_size=4.0, win_shift=0.5, th_factor=0.8, percentile=10.0, min_clip=min_clip, max_clip=max_clip, w_cv_ampl=10.0, tolerance=0.5, plot_data_func=plot_best_window, ax=ax) diff --git a/thunderfish/checkpulse.py b/thunderfish/checkpulse.py index 3db468c5..1160ab49 100644 --- a/thunderfish/checkpulse.py +++ b/thunderfish/checkpulse.py @@ -1,12 +1,26 @@ """ -Functions for checking whether a pulse-type or a wave-type is present in a recording. +# Check for pulse-type weakly electric fish +Functions for checking whether a pulse-type or a wave-type weakly electric fish +is present in a recording. + +# Main functions +- `check_pulse_width()`: checks for a pulse-type fish based on the width of detected peaks. +- `check_pulse_psd()`: checks for pulse-type fish based on its signature on the power sepctrum. + +## Visualization +- `plot_width_period_ratio()`: visualizes the check_pulse_width() algorithm. +- `plot_psd_proportion()`: visualizes the check_pulse_psd() algorithm. + +## Configuration parameter +- `add_check_pulse_width_config()': add parameters for check_pulse_width() to configuration. +- `check_pulse_width_args()`: retrieve parameters for check_pulse_width() from configuration. +- `add_check_pulse_psd_config()`: add parameters for check_pulse_psd() to configuration. +- `check_pulse_psd_args()`: retrieve parameters for check_pulse_psd() from configuration. -check_pulse_width(): checks for a pulse-type fish based on the width of detected peaks. -check_pulse_psd(): checks for puls_type fish based on its signature on the power sepctrum. """ import numpy as np -from .peakdetection import percentile_threshold, detect_peaks, trim_to_peak +from .eventdetection import percentile_threshold, detect_peaks, trim_to_peak from .powerspectrum import decibel try: from matplotlib.patches import Rectangle @@ -14,108 +28,154 @@ pass -def check_pulse_width(data, samplerate, th_factor=0.6, percentile=0.1, - pulse_thres=0.1, verbose=0, plot_data_func=None, **kwargs): +def check_pulse_width(data, samplerate, th_factor=2.0, percentile=25.0, + pulse_thresh=0.1, verbose=0, plot_data_func=None, **kwargs): """Detects if a fish is pulse- or wave-type based on the proportion of the time distance between a peak and its following trough, relative to the time between consecutive peaks. - WARNING! This function does not (yet) detect monophasic pulse-fishes - such as Brachyhypopomus bennetti or Electrophorus. - - :param data: (1-D array). The data to be analyzed (Usually the best window already) - :param samplerate: (float). Sampling rate of the data in Hz - :param percentile: (float between 0. and 50.). The inter-percentile range TODO - :param th_factor: (float). The threshold for peak detection is the inter-percentile-range multiplied by this factor. - :param pulse_thres: (float). a positive number setting the minimum distance between peaks and troughs - :param verbose: (int). if > 1, print information in the command line. - :param plot_data_func: Function for plotting the data with detected peaks and troughs, an inset and the distribution - of peak-ratios. - plot_data_func(data, samplerate, peak_idx, trough_idx, peakdet_th, pulse_th, pulse_fish, pvt_dist, tvp_dist) - :param data: (array). the raw data. - :param samplerate: (float). the sampling rate of the data. - :param peak_idx: (array). indices into raw data indicating detected peaks. - :param trough_idx: (array). indices into raw data indicating detected troughs. - :param peakdet_th: (float). a positive number setting the minimum distance between peaks and troughs. - :param pulse_th: (float). If the median r-value is smaller than this threshold, the fish is pulse-; else wave-type - :param pulse_fish: (bool). True if algorithm suggests a pulse-type fish. - :param pvt_dist: (array-like). distribution of r-values [pk2tr/pk2pk] - :param tvp_dist: (array-like). distribution of r-values [tr2pk/tr2tr] - :return pulse_fish: (bool). True if algorithm suggests a pulse-type fish. - :return: peak_ratio: (float). Returns a float between 0. and 1. which gives the proportion of peak-2-trough, - from peak-2-peak time distance. (Wave-type fish should have larger values than pulse-type fish) + + Parameters + ---------- + data: 1-D array + The data to be analyzed (Usually the best window already). + samplerate: float + Sampling rate of the data in Hertz. + percentile: float + The interpercentile range is computed at percentile and 100.0-percentile. + th_factor: float + The threshold for peak detection is the inter-percentile-range + multiplied by this factor. + pulse_thresh: float + A positive number setting the minimum distance between peaks and troughs. + verbose: int + If > 1, print information in the command line. + plot_data_func(data, samplerate, peak_idx, trough_idx, peakdet_th, pulse_th, pulse_fish, pvt_dist, tvp_dist): + data: array + The raw data. + samplerate: float + The sampling rate of the data. + peak_idx: array + Indices into raw data indicating detected peaks. + trough_idx: array + Indices into raw data indicating detected troughs. + peakdet_th: float + A positive number setting the minimum distance between peaks and troughs. + pulse_th: float + If the median r-value is smaller than this threshold, the fish is pulse-; else wave-type + pulse_fish: bool + True if algorithm suggests a pulse-type fish. + pvt_dist: array-like + Distribution of r-values [pk2tr/pk2pk] + tvp_dist: array-like + Distribution of r-values [tr2pk/tr2tr] + + Returns + ------- + pulse_fish: bool + True if algorithm suggests a pulse-type fish. + peak_ratio: float + Returns a float between 0. and 1. which gives the proportion of peak-2-trough, + from peak-2-peak time distance, i.e. pulse width relative to pulse interval. + interval: float + The mean inter-pulse-interval. """ def ratio(peak_idx, trough_idx): """ Calculates (peak-trough) / (peak-peak) - :return: peak-ratio (float). The median of (peak-trough) / (peak-peak) - :return: r_tr (array). The distribution of (peak-trough) / (peak-peak) + Returns + ------- + + peak-ratio: float + The median of (peak-trough) / (peak-peak) + r_tr: array + The distribution of (peak-trough) / (peak-peak) """ peaks, troughs = trim_to_peak(peak_idx, trough_idx) + if len(peaks) < 2: + return 1.0, np.array([]) - # get times of peaks and troughs, pk_times need to be floats! - pk_times = peaks / float(samplerate) # Actually there is no need to divide by samplerate. - tr_times = troughs / float(samplerate) # Time differences can be calculated using indices only. - - pk_2_pk = np.diff(pk_times) - pk_2_tr = (tr_times - pk_times)[:-1] + pk_2_pk = np.diff(peaks).astype(np.float) + pk_2_tr = (troughs - peaks)[:-1].astype(np.float) # get the proportion of peak-2-trough, from peak-2-peak time distance r_tr = pk_2_tr / pk_2_pk - r_tr[r_tr > 0.5] = 1 - r_tr[r_tr > 0.5] # fix for cases where trough of eod comes before peak + r_tr[r_tr > 0.5] = 1.0 - r_tr[r_tr > 0.5] # fix for cases where trough of eod comes before peak peak_ratio = np.median(r_tr) return peak_ratio, r_tr - if verbose > 1: - print('Analyzing Fish-Type...') - # threshold for peak detection: threshold = percentile_threshold(data, th_factor=th_factor, percentile=percentile) - + if verbose > 0: + print('check_pulse_width: threshold for pulse detection is %g' % threshold) + # detect large peaks and troughs: peak_idx, trough_idx = detect_peaks(data, threshold) pr_pvt, pvt_dist = ratio(peak_idx, trough_idx) pr_tvp, tvp_dist = ratio(trough_idx, peak_idx) - + peak_ratio = np.mean([pr_pvt, pr_tvp]) - pulse_fish = peak_ratio < pulse_thres + pulse_fish = peak_ratio < pulse_thresh + if len(peak_idx) > 1: + interval = np.mean(np.diff(peak_idx)/samplerate) + else: + interval = -1.0 + if plot_data_func: - plot_data_func(data, samplerate, peak_idx, trough_idx, threshold, pulse_thres, + plot_data_func(data, samplerate, peak_idx, trough_idx, threshold, pulse_thresh, pulse_fish, pr_pvt, pr_tvp, **kwargs) if verbose > 0: f_type = 'pulse' if pulse_fish else 'wave' - print('Fish-type is %s. r-value = %.3f' % (f_type, peak_ratio)) + print(' fish-type is %s. pulse-width-ratio is %.3f' % (f_type, peak_ratio)) - return pulse_fish, peak_ratio + return pulse_fish, peak_ratio, interval -def check_pulse_psd(power, freqs, proportion_th=0.27, freq_bins=125, max_freq=3000, - outer_percentile=1, inner_percentile=25, verbose=0, +def check_pulse_psd(power, freqs, proportion_th=0.27, freq_bin_width=125.0, max_freq=3000.0, + outer_percentile=1.0, inner_percentile=25.0, verbose=0, plot_data_func=None, **kwargs): """Detects if a fish is pulse- or wave-type based on the inter-quartile range relative to the inter-percentile range in the power-spectrum. - :param power: (1-D array) power array of a power spectrum. - :param freqs: (1-D array) frequency array of a power spectrum. - :param proportion_th: (float) Proportion of the data that defines if the psd belongs to a wave or a pulsefish. - :param freq_bins: (float) width of frequency bins in which the psd shall be divided (Hz). - :param max_freq: (float) maximum frequency that shall be provided in the separated power array. - :param outer_percentile:(float) ((100-outer_percentile) - outer_percentile) / ((100-inner_percentile) - inner_percentile) - is the proportion that leeds to the decision if the psd belongs to a wave or pulsetype fish. - :param inner_percentile:(float) ((100-outer_percentile) - outer_percentile) / ((100-inner_percentile) - inner_percentile) - is the proportion that leeds to the decision if the psd belongs to a wave or pulsetype fish. - :param verbose: (int) when the value is 1 you get additional shell output. - :param plot_data_func: (function) function (psdtypeplot()) that is used to create a axis for later plotting about the process of psd - type detection. - :param **kwargs: additional arguments that are passed to the plot_data_func(). - :return pulse_fish: (bool). True if algorithm suggests a pulse-type fish. - :return proportions: (1-D array) proportions of the single psd bins. + Parameters + ---------- + power: 1-D array + Power array of a power spectrum. + freqs: 1-D array + Frequency array of a power spectrum. + proportion_th: float + Proportion of the data that defines if the psd belongs to a wave or a pulsefish. + freq_bin_width: float + Width of frequency bins in which the psd shall be divided (Hz). + max_freq: float + Maximum frequency that shall be provided in the separated power array. + outer_percentile: float + ((100-outer_percentile) - outer_percentile) / ((100-inner_percentile) - inner_percentile) + is the proportion that leeds to the decision if the psd belongs to a wave + or pulsetype fish. + inner_percentile: float + ((100-outer_percentile) - outer_percentile) / ((100-inner_percentile) - inner_percentile) + is the proportion that leeds to the decision if the psd belongs to a wave + or pulsetype fish. + verbose: int + When the value is 1 you get additional shell output. + plot_data_func: function + Visualize the algorithm. + **kwargs: + + + Returns + ------- + pulse_fish: bool + True if algorithm suggests a pulse-type fish. + proportions: 1-D array + Proportions of the single psd bins. """ if verbose >= 1: @@ -125,8 +185,8 @@ def check_pulse_psd(power, freqs, proportion_th=0.27, freq_bins=125, max_freq=30 # Take a 1-D array of powers (from powerspectrums), transforms it into dB and divides it into several bins. proportions = [] all_percentiles = [] - for trial in range(int(max_freq / freq_bins)): - tmp_power_db = decibel(power[trial * int(freq_bins / res): (trial + 1) * int(freq_bins / res)]) + for trial in range(int(max_freq / freq_bin_width)): + tmp_power_db = decibel(power[trial * int(freq_bin_width / res): (trial + 1) * int(freq_bin_width / res)]) # calculates 4 percentiles for each powerbin percentiles = np.percentile(tmp_power_db, [outer_percentile, inner_percentile, 100 - inner_percentile, @@ -138,7 +198,7 @@ def check_pulse_psd(power, freqs, proportion_th=0.27, freq_bins=125, max_freq=30 pulse_fish = percentile_ratio > proportion_th - if verbose >= 1: + if verbose > 0: f_type = 'pulse' if pulse_fish else 'wave' print ('PSD-type is %s. proportion = %.3f' % (f_type, float(np.mean(proportions)))) @@ -150,18 +210,30 @@ def check_pulse_psd(power, freqs, proportion_th=0.27, freq_bins=125, max_freq=30 def plot_width_period_ratio(data, samplerate, peak_idx, trough_idx, peakdet_th, pulse_th, pulse_fish, pvt_dist, tvp_dist, ax, fs=14): - """Plots the data, a zoomed index of it and the peak-width versus peak-period rations + """Plots the data, a zoomed index of it and the peak-width versus peak-period ratios that determine whether fish is pulse or wave-type. - :param data: (array-like). amplitude data - :param samplerate: (float). sampling rate in Hz - :param peak_idx: (array of int). Array with peak indices - :param trough_idx: (array of int). Array with trough indices - :param peakdet_th: (float). threshold for peak detection - :param pulse_th: (float). If the median r-value is smaller than this threshold, the fish is pulse-; else wave-type - :param pulse_fish: (bool). Fish-type suggested by the algorithm (True for pulse-type) - :param pvt_dist: (array-like). Array with r-values (peak2trough / peak2peak) - :param tvp_dist: (array-like). Array with r-values (trough2peak/ trough2trough) + Parameters + ---------- + data: array-like + Amplitude data. + samplerate: float + Sampling rate in Hz. + peak_idx: array of int + Array with peak indices. + trough_idx: array of int + Array with trough indices. + peakdet_th: float + Threshold for peak detection. + pulse_th: float + If the median r-value is smaller than this threshold, the fish is pulse-; + else wave-type. + pulse_fish: bool + Fish-type suggested by the algorithm (True for pulse-type). + pvt_dist: array-like + Array with r-values (peak2trough / peak2peak). + tvp_dist: array-like + Array with r-values (trough2peak/ trough2trough). """ def plot_peak_trough_hist(vals, ax, hist_color, plot_label, label_size): @@ -229,17 +301,27 @@ def plot_psd_proportion(freqs, power, proportions, percentiles, pulse_fish, ax, fs, max_freq = 3000): """Visualizes the check_pulse_psd() algorithm. - This function takes the frequency and power array of a powerspectrum as well as the calculated percentiles array - of the frequency bins (see: get_bin_percentiles()) and the proportions array calculated from these percentiles. + This function takes the frequency and power array of a powerspectrum + as well as the calculated percentiles array of the frequency bins + and the proportions array calculated from these percentiles. - :param freqs: (1-D array) frequency array of a psd. - :param power: (1-D array) power array of a psd. - :param proportions: (1-D array) proportions of the single psd bins. - :param percentiles: (2-D array) for every bin four values are calulated and stored in separate lists. These four - values are percentiles of the respective bins. - :param ax: (axis for plot) empty axis that is filled with content in the function. - :param fs: (int) fontsize for the plot. - :param max_freq: (float) maximum frequency that shall appear in the plot. + Parameters + ---------- + freqs: 1-D array + Frequency array of a psd. + power: 1-D array + Power array of a psd. + proportions: 1-D array + Proportions of the single psd bins. + percentiles: 2-D array + For every bin four values are calulated and stored in separate lists. These four + values are percentiles of the respective bins. + ax: axis for plot + Empty axis that is filled with content in the function. + fs: int + Fontsize for the plot. + max_freq: float + Maximum frequency that shall appear in the plot. """ f_type = 'pulse' if pulse_fish else 'wave' ax.set_title('Suggested fish-type is %s' % f_type, fontsize=fs + 2) @@ -256,6 +338,90 @@ def plot_psd_proportion(freqs, power, proportions, percentiles, pulse_fish, ax.set_xlabel('Frequency', fontsize=fs) ax.set_ylabel('Power [dB]', fontsize=fs) + +def add_check_pulse_width_config(cfg, th_factor=2.0, percentile=25.0, pulse_thresh=0.1): + """ Add parameter needed for check_pulse_width() as + a new section to a configuration. + + Parameters + ---------- + cfg: ConfigFile + The configuration. + See check_pulse_width() for details on the remaining arguments. + """ + + cfg.add_section('Check pulse widths:') + cfg.add('pulseWidthPercentile', percentile, '%', 'The variance of the data is measured as the interpercentile range.') + cfg.add('pulseWidthThresholdFactor', th_factor, '', 'The threshold for peak detection is this factor multiplied with the interpercentile range.') + cfg.add('pulseWidthThresholdRatio', pulse_thresh, '', 'A pulsefish is detected if the width of the pulses relative to the intervals is smaller than this threshold.') + + +def check_pulse_width_args(cfg): + """ Translates a configuration to the + respective parameter names of the function check_pulse_width(). + The return value can then be passed as key-word arguments to this function. + + Parameters + ---------- + cfg: ConfigFile + The configuration. + + Returns + ------- + a: dict + Dictionary with names of arguments of the check_pulse_width() function + and their values as supplied by `cfg`. + """ + a = cfg.map({'th_factor': 'pulseWidthThresholdFactor', + 'percentile': 'pulseWidthPercentile', + 'pulse_thresh': 'pulseWidthThresholdRatio'}) + return a + + +def add_check_pulse_psd_config(cfg, proportion_th=0.27, freq_bin_width=125.0, max_freq=3000.0, + outer_percentile=1.0, inner_percentile=25.0): + """ Add parameter needed for check_pulse_psd() as + a new section to a configuration. + + Parameters + ---------- + cfg: ConfigFile + The configuration. + See check_pulse_psd() for details on the remaining arguments. + """ + + cfg.add_section('Check for pulse fish in power spectrum:') + cfg.add('pulsePSDProportionThreshold', proportion_th, '', 'Proportion of the data that defines if the power spectrum is the one of a pulsefish.') + cfg.add('pulsePSDFrequencyBinWidth', freq_bin_width, 'Hz', 'Width of frequency bins used for analyzing power distribution.') + cfg.add('pulsePSDMaximumFrequency', max_freq, 'Hz', 'Maximum frequency up to which the power spectrum is analyzed.') + cfg.add('pulsePSDOuterPercentile', outer_percentile, '%', 'The interpercentile range of powers used as reference.') + cfg.add('pulsePSDInnerPercentile', inner_percentile, '%', 'The interpercentile range of powers that is divided by the out percentile range.') + + +def check_pulse_psd_args(cfg): + """ Translates a configuration to the + respective parameter names of the function check_pulse_psd(). + The return value can then be passed as key-word arguments to this function. + + Parameters + ---------- + cfg: ConfigFile + The configuration. + + Returns + ------- + a: dict + Dictionary with names of arguments of the check_pulse_psd() function + and their values as supplied by `cfg`. + """ + a = cfg.map({'proportion_th': 'pulsePSDProportionThreshold', + 'freq_bin_width': 'pulsePSDFrequencyBins', + 'max_freq': 'pulsePSDMaximumFrequency', + 'outer_percentile': 'pulsePSDOuterPercentile', + 'inner_percentile': 'pulsePSDInnerPercentile'}) + return a + + if __name__ == "__main__": import sys import matplotlib.pyplot as plt diff --git a/thunderfish/chirp.py b/thunderfish/chirp.py index c19012fd..284f4034 100644 --- a/thunderfish/chirp.py +++ b/thunderfish/chirp.py @@ -9,7 +9,7 @@ import numpy as np from .harmonicgroups import harmonic_groups from .powerspectrum import spectrogram -from .peakdetection import std_threshold, detect_peaks, trim_to_peak +from .eventdetection import std_threshold, detect_peaks, trim_to_peak try: # TODO remove plt usage from algorithms! import matplotlib.pyplot as plt @@ -102,7 +102,7 @@ def chirp_detection(spectrum, freqs, time, fishlist=None, fundamentals=None, min # calculate the slope by calculating the difference in the power power_diff = np.diff(power) - # peakdetection in the power_diff to detect drops in power indicating chrips + # peak detection in the power_diff to detect drops in power indicating chrips threshold = std_threshold(power_diff) peaks, troughs = detect_peaks(power_diff, threshold) troughs, peaks = trim_to_peak(troughs, peaks) # reversed troughs and peaks in output and input to get trim_to_troughs diff --git a/thunderfish/configfile.py b/thunderfish/configfile.py index faa00bc0..412b1a92 100644 --- a/thunderfish/configfile.py +++ b/thunderfish/configfile.py @@ -219,7 +219,7 @@ def write_comment(stream, comment, maxline, cs): differs = False if hasattr(v, '__len__') and (not isinstance(v, str)): val = v[0] - if len(v) > 1: + if len(v) > 1 and len(v[1]) > 0: unit = ' ' + v[1] if len(v) > 2: comment = v[2] diff --git a/thunderfish/consistentfishes.py b/thunderfish/consistentfishes.py index af8875b9..31208987 100644 --- a/thunderfish/consistentfishes.py +++ b/thunderfish/consistentfishes.py @@ -3,13 +3,14 @@ fishes present in all fishlists. consistent_fishes(): Compares a list of fishlists and builds a consistent fishlist. +consistent_fishes_plot(): Visualize the algorithm of consisten_fishes(). """ import numpy as np from .harmonicgroups import fundamental_freqs -def find_consistency(fundamentals, df_th=1.): +def find_consistency(fundamentals, df_th=1.0): """ Compares lists of floats to find these values consistent in every list. (with a certain threshold) @@ -22,13 +23,22 @@ def find_consistency(fundamentals, df_th=1.): are returned. - :param fundamentals: (2-D array) list of lists containing the fundamentals of a fishlist. - fundamentals = [ [f1, f1, ..., f1, f1], [f2, f2, ..., f2, f2], ..., [fn, fn, ..., fn, fn] ] - :param df_th: (float) Frequency threshold for the comparison of different fishlists. If the fundamental - frequencies of two fishes from different fishlists vary less than this threshold they are - assigned as the same fish. - :return consistent_fundamentals: (1-D array) List containing all values that are available in all given lists. - :return index: (1-D array) Indices of the values that are in every list relating to the fist list in fishlists. + Parameters + ---------- + fundamentals: 2-D array + List of lists containing the fundamentals of a fishlist. + fundamentals = [ [f1, f1, ..., f1, f1], [f2, f2, ..., f2, f2], ..., [fn, fn, ..., fn, fn] ] + df_th: float + Frequency threshold for the comparison of different fishlists in Hertz. If the fundamental + frequencies of two fishes from different fishlists vary less than this threshold they are + assigned as the same fish. + + Returns + ------- + consistent_fundamentals: 1-D array + List containing all values that are available in all given lists. + index: 1-D array + Indices of the values that are in every list relating to the fist list in fishlists. """ consistency_help = np.ones(len(fundamentals[0]), dtype=int) @@ -43,22 +53,35 @@ def find_consistency(fundamentals, df_th=1.): return consistent_fundamentals, index -def consistent_fishes(fishlists, verbose=0, plot_data_func=None, **kwargs): +def consistent_fishes(fishlists, verbose=0, plot_data_func=None, df_th=1.0, **kwargs): """ Compares several fishlists to create a fishlist only containing these fishes present in all these fishlists. Therefore several functions are used to first extract the fundamental frequencies of every fish in each fishlist, before comparing them and building a fishlist only containing these fishes present in all fishlists. - :param fishlists: (4-D array) List of fishlists with harmonics and each frequency and power. - fishlists[fishlist][fish][harmonic][frequency, power] - :param plot_data_func: (function) function (consistentfishesplot()) that is used to create a axis for later plotting containing a figure to - visualice what the modul did. - :param verbose: (int) when the value is 1 you get additional shell output. - :param **kwargs: additional arguments that are passed to the plot_data_func(). - :return filtered_fishlist:(3-D array) New fishlist with the same structure as a fishlist in fishlists only - containing these fishes that are available in every fishlist in fishlists. - fishlist[fish][harmonic][frequency, power] + Parameters + ---------- + fishlists: 4-D array + List of fishlists with harmonics and each frequency and power. + fishlists[fishlist][fish][harmonic][frequency, power] + plot_data_func: + Function that visualizes what consistent_fishes() did. + verbose: int + When the value larger than one you get additional shell output. + df_th: float + Frequency threshold for the comparison of different fishlists in Hertz. If the fundamental + frequencies of two fishes from different fishlists vary less than this threshold they are + assigned as the same fish. + **kwargs: dict + Passed on to plot function. + + Returns + ------- + filtered_fishlist: 3-D array + New fishlist with the same structure as a fishlist in fishlists only + containing these fishes that are available in every fishlist in fishlists. + fishlist[fish][harmonic][frequency, power] """ if verbose >= 1: print('Finding consistent fishes out of %d fishlists ...' % len(fishlists)) @@ -84,13 +107,19 @@ def consistent_fishes_plot(fishlists, filtered_fishlist, ax, fs): """ Creates an axis for plotting all lists and the consistent values marked with a bar. - :param filtered_fishlist: (3-D array) Contains power and frequency of these fishes that hve been detected in - several powerspectra using different resolutions. - :param fishlists: (4-D array or 3-D array) List of or single fishlists with harmonics and each frequency and power. - fishlists[fishlist][fish][harmonic][frequency, power] - fishlists[fish][harmonic][frequency, power] - :param ax: (axis for plot) empty axis that is filled with content in the function. - :param fs: (int) fontsize for the plot. + Parameter + --------- + filtered_fishlist: 3-D array + Contains power and frequency of these fishes that hve been detected in + several powerspectra using different resolutions. + fishlists: 4-D array or 3-D array + List of or single fishlists with harmonics and each frequency and power. + fishlists[fishlist][fish][harmonic][frequency, power] + fishlists[fish][harmonic][frequency, power] + ax: axis for plot + Empty axis that is filled with content in the function. + fs: int + Fontsize for the plot. """ for list in np.arange(len(fishlists)): for fish in np.arange(len(fishlists[list])): diff --git a/thunderfish/csvmaker.py b/thunderfish/csvmaker.py index d6a6fe34..435a842f 100644 --- a/thunderfish/csvmaker.py +++ b/thunderfish/csvmaker.py @@ -1,7 +1,7 @@ import numpy as np -def write_csv(filename, csv_header, data_matrix, format_str='%.3f'): +def write_csv(filename, csv_header, data_matrix, format_str='%.6g'): """ Write matrix to csv-file. @@ -19,7 +19,7 @@ def write_csv(filename, csv_header, data_matrix, format_str='%.3f'): """ # Check if header has same row length as data_matrix - if len(csv_header) != len(data_matrix[0]): + if len(data_matrix) > 0 and len(csv_header) != len(data_matrix[0]): raise ValueError('The length of the header does not match the length of the data matrix!') with open(filename, 'wb') as fin: diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py index 93fbc122..0845835b 100644 --- a/thunderfish/eodanalysis.py +++ b/thunderfish/eodanalysis.py @@ -1,46 +1,73 @@ """ -Detects EODs in a given dataset and computes their mean waveform. +# Analysis of EOD waveforms of weakly-electric fish. -eod_waveform(): calculates a mean EOD of a given dataset. +## Main functions +- `eod_waveform()`: compute an averaged EOD waveform. + +## Visualization +- `eod_waveform_plot()`: plot the averaged waveform with standard deviation. """ import numpy as np -from .peakdetection import percentile_threshold, detect_peaks, snippets - - -def eod_waveform(data, samplerate, th_factor=0.6, percentile=0.1, start=None, stop=None): - """Detects EODs in the given data, extracts data snippets around each EOD and computes a mean waveform with standard - deviation. - - :param data: (1-D array) the data to be analysed. - :param samplerate: (float) samplerate of the data in Hertz. - :param percentile: (int). percentile parameter for the peakdetection.percentile_threshold() function used to - estimate thresholds for detecting EOD peaks in the data. - :param th_factor: (float). th_factor parameter for the peakdetection.percentile_threshold() function used to - estimate thresholds for detecting EOD peaks in the data. - :param start: (float or None) start time of EOD snippets relative to peak. - :param stop: (float or None) stop time of EOD snippets relative to peak. - :return mean_eod (1-D array) Average of the EOD snippets. - :return std_eod (1-D array) Standard deviation of the averaged snippets. - :return time (1-D array) Time axis for mean_eod and std_eod. - :return eod_times (1-D array) Times of EOD peaks in seconds. +from .eventdetection import percentile_threshold, detect_peaks, snippets + + +def eod_waveform(data, samplerate, th_factor=0.6, percentile=0.1, + period=None, start=None, stop=None): + """Detect EODs in the given data, extract data snippets around each EOD, + and compute a mean waveform with standard deviation. + + Parameters + ---------- + data: 1-D array + The data to be analysed. + samplerate: float + Sampling rate of the data in Hertz. + percentile: int + Percentile parameter for the eventdetection.percentile_threshold() function used to + estimate thresholds for detecting EOD peaks in the data. + th_factor: float + th_factor parameter for the eventdetection.percentile_threshold() function used to + estimate thresholds for detecting EOD peaks in the data. + period: float or None + Average waveforms with this period instead of peak times. + start: float or None + Start time of EOD snippets relative to peak. + stop: float or None + Stop time of EOD snippets relative to peak. + + Returns + ------- + mean_eod: 2-D array + Average of the EOD snippets. First column is time in seconds, + second column the mean eod, third column the standard deviation + eod_times: 1-D array + Times of EOD peaks in seconds. """ - # threshold for peak detection: - threshold = percentile_threshold(data, th_factor=th_factor, percentile=percentile) + if period is None: + # threshold for peak detection: + threshold = percentile_threshold(data, th_factor=th_factor, percentile=percentile) - # detect peaks: - eod_idx, _ = detect_peaks(data, threshold) + # detect peaks: + eod_idx, _ = detect_peaks(data, threshold) + if len(eod_idx) == 0: + return np.array([]), np.array([]), np.array([]), np.array([]) - # eod times: - eod_times = eod_idx / samplerate + # eod indices and times: + eod_times = eod_idx / samplerate + else: + eod_times = np.arange(0.0, len(data)/samplerate, period) + eod_idx = np.asarray(eod_times * samplerate, dtype=int) # start and stop times: if start is None or stop is None: - period = np.mean(np.diff(eod_times)) + tmp_period = period + if tmp_period is None: + tmp_period = np.mean(np.diff(eod_times)) if start is None: - start = -period + start = -1.5*tmp_period if stop is None: - stop = period + stop = 1.5*tmp_period # start and stop indices: start_inx = int(start * samplerate) stop_inx = int(stop * samplerate) @@ -48,26 +75,47 @@ def eod_waveform(data, samplerate, th_factor=0.6, percentile=0.1, start=None, st # extract snippets: eod_snippets = snippets(data, eod_idx, start_inx, stop_inx) - # mean and std of snippets: - mean_eod = np.mean(eod_snippets, axis=0) - std_eod = np.std(eod_snippets, axis=0, ddof=1) + # mean and std of snippets: + mean_eod = np.zeros((len(eod_snippets[0]), 3)) + mean_eod[:,1] = np.mean(eod_snippets, axis=0) + mean_eod[:,2] = np.std(eod_snippets, axis=0, ddof=1) # time axis: time = (np.arange(len(mean_eod)) + start_inx) / samplerate - - return mean_eod, std_eod, time, eod_times + if period is not None: + # move peak of waveform to zero: + offs = len(mean_eod[:,1])//4 + time -= time[offs+np.argmax(mean_eod[offs:3*offs,1])] + mean_eod[:,0] = time + + return mean_eod, eod_times -def eod_waveform_plot(time, mean_eod, std_eod, ax, unit='a.u.'): +def eod_waveform_plot(time, mean_eod, std_eod, ax, unit='a.u.', **kwargs): """Plot mean eod and its standard deviation. - :param time: (1-D array) Time of the mean EOD. - :param mean_eod: (1-D array) Mean EOD waveform. - :param std_eod: (1-D array) Sandard deviation of EOD waveform. - :param ax: (axis for plot). - :param unit: (string) Unit of the data. + Parameters + ---------- + time: 1-D array + Time of the mean EOD. + mean_eod: 1-D array + Mean EOD waveform. + std_eod: 1-D array + Standard deviation of EOD waveform. + ax: + Axis for plot + unit: string + Unit of the data. + kwargs: dict + Arguments passed on to the plot command for the mean eod. """ - ax.plot(1000.0*time, mean_eod, lw=2, color='r') + if not 'lw' in kwargs: + kwargs['lw'] = 2 + if not 'color' in kwargs: + kwargs['color'] = 'r' + ax.autoscale(True) + ax.plot(1000.0*time, mean_eod, **kwargs) + ax.autoscale(False) ax.fill_between(1000.0*time, mean_eod + std_eod, mean_eod - std_eod, color='grey', alpha=0.3) ax.set_xlabel('Time [msec]') diff --git a/thunderfish/peakdetection.py b/thunderfish/eventdetection.py similarity index 71% rename from thunderfish/peakdetection.py rename to thunderfish/eventdetection.py index 38d6a850..0a9445ec 100644 --- a/thunderfish/peakdetection.py +++ b/thunderfish/eventdetection.py @@ -1,29 +1,41 @@ """ -Detecting and handling peaks and troughs in data arrays. +# Event detection +Detecting and handling peaks and troughs and threshold crossings in data arrays. -detect_peaks(): peak and trough detection with a relative threshold. +## Peak detection +- `detect_peaks()`: peak and trough detection with a relative threshold. +- `peak_size_width()`: compute for each peak its size and width. -peak_size_width(): compute for each peak its size and width. +## Threshold crossings +- `threshold_crossings()`: detect crossings of an absolute threshold. -trim(): make the list of peaks and troughs returned by detect_peaks() the same length. -trim_to_peak(): ensure that the peak is first. -trim_closest(): ensure that peaks minus troughs is smallest. +## Event manipulation +- `trim()`: make the list of peaks and troughs the same length. +- `trim_to_peak()`: ensure that the peak is first. +- `trim_closest()`: ensure that peaks minus troughs is smallest. -std_threshold(): estimate detection threshold based on the standard deviation. -hist_threshold(): esimate detection threshold based on a histogram of the data. -minmax_threshold(): estimate detection threshold based on maximum minus minimum value. -percentile_threshold(): estimate detection threshold based on interpercentile range. +- `merge_events()`: Merge events if they are closer than a minimum distance. +- `remove_events()`: Remove events that are too short or too long. +- `widen_events()`: Enlarge events on both sides without overlap. -snippets(): cut out data snippets around a list of indices. +## Threshold estimation +- `std_threshold()`: estimate detection threshold based on the standard deviation. +- `hist_threshold()`: esimate detection threshold based on a histogram of the data. +- `minmax_threshold()`: estimate detection threshold based on maximum minus minimum value. +- `percentile_threshold()`: estimate detection threshold based on interpercentile range. -detect_dynamic_peaks(): peak and trough detection with a dynamically adapted threshold. -accept_peak_size_threshold(): adapt the dection threshold to the size of the detected peaks. +## Snippets +- `snippets(): cut out data snippets around a list of indices. + +## Peak detection with dynamic threshold: +- `detect_dynamic_peaks()`: peak and trough detection with a dynamically adapted threshold. +- `accept_peak_size_threshold()`: adapt the dection threshold to the size of the detected peaks. """ import numpy as np -def detect_peaks(data, threshold, time=None): +def detect_peaks(data, threshold): """ Detect peaks and troughs using a fixed, relative threshold according to Bryan S. Todd and David C. Andrews (1999): The identification of peaks in physiological signals. @@ -36,22 +48,18 @@ def detect_peaks(data, threshold, time=None): threshold: float or array A positive number or array of numbers setting the detection threshold, i.e. the minimum distance between peaks and troughs. - time: array - The (optional) 1-D array with the time corresponding to the data values. Returns ------- - peak_array: array - A list of peaks. - trough_array: array - A list of troughs. - If time is `None` then these are arrays of the indices where the peaks/troughs occur. - If time is given then these are arrays of the times where the peaks/troughs occur. + peak_array: array of ints + A list of indices of detected peaks. + trough_array: array of ints + A list of indices of detected troughs. Raises ------ ValueError: If `threshold <= 0`. - IndexError: If `data`, `time`, and `threshold` arrays differ in length. + IndexError: If `data` and `threshold` arrays differ in length. """ thresh_array = True @@ -63,8 +71,6 @@ def detect_peaks(data, threshold, time=None): raise ValueError('input argument threshold must be positive!') elif len(data) != len(threshold): raise IndexError('input arrays data and threshold must have same length!') - if time is not None and len(data) != len(time): - raise IndexError('input arrays time and data must have same length!') peaks_list = list() troughs_list = list() @@ -93,11 +99,7 @@ def detect_peaks(data, threshold, time=None): # the maximum is a peak! elif max_value >= value + thresh: # this is a peak: - if time is None: - peaks_list.append(max_inx) - else: - peaks_list.append(time[max_inx]) - + peaks_list.append(max_inx) # change direction: min_inx = index # minimum element min_value = value @@ -111,11 +113,7 @@ def detect_peaks(data, threshold, time=None): elif value >= min_value + thresh: # there was a trough: - if time is None: - troughs_list.append(min_inx) - else: - troughs_list.append(time[min_inx]) - + troughs_list.append(min_inx) # change direction: max_inx = index # maximum element max_value = value @@ -201,6 +199,41 @@ def peak_size_width(time, data, peak_indices, trough_indices, pfac=0.75): return peaks +def threshold_crossings(data, threshold): + """ + Detect crossings of a threshold with positive and negative slope. + + Parameters + ---------- + data: array + An 1-D array of input data where threshold crossings are detected. + threshold: float or array + A number or array of numbers setting the threshold + that needs to be crossed. + + Returns + ------- + up_array: array of ints + A list of indices where the threshold is crossed with positive slope. + down_array: array of ints + A list of indices where the threshold is crossed with negative slope. + + Raises + ------ + IndexError: If `data` and `threshold` arrays differ in length. + """ + + if np.isscalar(threshold): + up_array = np.nonzero((data[1:]>threshold) & (data[:-1]<=threshold))[0] + down_array = np.nonzero((data[1:]<=threshold) & (data[:-1]>threshold))[0] + else: + if len(data) != len(threshold): + raise IndexError('input arrays data and threshold must have same length!') + up_array = np.nonzero((data[1:]>threshold[1:]) & (data[:-1]<=threshold[:-1]))[0] + down_array = np.nonzero((data[1:]<=threshold[1:]) & (data[:-1]>threshold[:-1]))[0] + return up_array, down_array + + def trim(peaks, troughs): """ Trims the peaks and troughs arrays such that they have the same length. @@ -298,6 +331,152 @@ def trim_closest(peaks, troughs): return peaks[pidx:pidx + nn], troughs[tidx:tidx + nn] +def merge_events(onsets, offsets, min_distance): + """Merge events if they are closer than a minimum distance. + + If the beginning of an event (onset, peak, or positive threshold crossing, + is too close to the end of the previous event (offset, trough, or negative + threshold crossing) the two events are merged into a single one that begins + with the first one and ends with the second one. + + Parameters + ---------- + onsets: 1-D array + The onsets (peaks, or positive threshold crossings) of the events + as indices or times. + offsets: 1-D array + The offsets (troughs, or negative threshold crossings) of the events + as indices or times. + min_distance: int or float + The minimum distance between events. If the beginning of an event is separated + from the end of the previous event by less than this distance then the two events + are merged into one. If the event onsets and offsets are given in indices than + min_distance is also in indices. + + Returns + ------- + merged_onsets: 1-D array + The onsets (peaks, or positive threshold crossings) of the merged events + as indices or times according to onsets. + merged_offsets: 1-D array + The offsets (troughs, or negative threshold crossings) of the merged events + as indices or times according to offsets. + """ + onsets, offsets = trim_to_peak(onsets, offsets) + if len(onsets) == 0 or len(offsets) == 0: + return np.array([]), np.array([]) + else: + diff = onsets[1:] - offsets[:-1] + indices = diff > min_distance + merged_onsets = onsets[np.hstack([True, indices])] + merged_offsets = offsets[np.hstack([indices, True])] + return merged_onsets, merged_offsets + + +def remove_events(onsets, offsets, min_duration, max_duration=None): + """Remove events that are too short or too long. + + If the length of an event, i.e. `offset` (offset, trough, or negative + threshold crossing) minus `onset` (onset, peak, or positive threshold crossing), + is shorter than `min_duration` or longer than `max_duration`, then this event is + removed. + + Parameters + ---------- + onsets: 1-D array + The onsets (peaks, or positive threshold crossings) of the events + as indices or times. + offsets: 1-D array + The offsets (troughs, or negative threshold crossings) of the events + as indices or times. + min_duration: int, float, or None + The minimum duration of events. If the event offset minus the event onset + is less than `min_duration`, then the event is removed from the lists. + If the event onsets and offsets are given in indices than + `min_duration` is also in indices. If `None` then this test is skipped. + max_duration: int, float, or None + The maximum duration of events. If the event offset minus the event onset + is larger than `max_duration`, then the event is removed from the lists. + If the event onsets and offsets are given in indices than + `max_duration` is also in indices. If `None` then this test is skipped. + + Returns + ------- + onsets: 1-D array + The onsets (peaks, or positive threshold crossings) of the events + with too short and too long events removed as indices or times according to onsets. + offsets: 1-D array + The offsets (troughs, or negative threshold crossings) of the events + with too short and too long events removed as indices or times according to offsets. + """ + onsets, offsets = trim_to_peak(onsets, offsets) + if len(onsets) == 0 or len(offsets) == 0: + return np.array([]), np.array([]) + elif min_duration is not None or max_duration is not None: + diff = offsets - onsets + if min_duration is not None and max_duration is not None: + indices = (diff > min_duration) & (diff < max_duration) + elif min_duration is not None: + indices = diff > min_duration + else: + indices = diff < max_duration + onsets = onsets[indices] + offsets = offsets[indices] + return onsets, offsets + + +def widen_events(onsets, offsets, max_time, duration): + """Enlarge events on both sides without overlap. + + Subtracts `duration` from the `onsets` and adds `duration` to the offsets. + If two succeeding events are separated by less than two times the `duration`, + then the offset of the previous event and the onset of the following event are + set at the center between the two events. + + Parameters + ---------- + onsets: 1-D array + The onsets (peaks, or positive threshold crossings) of the events + as indices or times. + offsets: 1-D array + The offsets (troughs, or negative threshold crossings) of the events + as indices or times. + max_time: int or float + The maximum value for the end of the last event. + If the event onsets and offsets are given in indices than + max_time is the maximum possible index, i.e. the len of the + data array on which the events where detected. + duration: int or float + The number of indices or the time by which the events should be enlarged. + If the event onsets and offsets are given in indices than + duration is also in indices. + + Returns + ------- + onsets: 1-D array + The onsets (peaks, or positive threshold crossings) of the enlarged events. + offsets: 1-D array + The offsets (troughs, or negative threshold crossings) of the enlarged events. + """ + new_onsets = [] + new_offsets = [] + if len(onsets) > 0: + on_idx = onsets[0] + new_onsets.append( on_idx - duration if on_idx >= duration else 0 ) + for off_idx, on_idx in zip(offsets[:-1], onsets[1:]): + if on_idx - off_idx < 2*duration: + mid_idx = (on_idx + off_idx)//2 + new_offsets.append(mid_idx) + new_onsets.append(mid_idx) + else: + new_offsets.append(off_idx + duration) + new_onsets.append(on_idx - duration) + if len(offsets) > 0: + off_idx = offsets[-1] + new_offsets.append(off_idx + duration if off_idx + duration < max_time else max_time) + return new_onsets, new_offsets + + def std_threshold(data, samplerate=None, win_size=None, th_factor=5.): """Esimates a threshold for detect_peaks() based on the standard deviation of the data. @@ -382,22 +561,26 @@ def hist_threshold(data, samplerate=None, win_size=None, th_factor=5., for inx0 in range(0, len(data), win_size_indices): inx1 = inx0 + win_size_indices - hist, bins = np.histogram(data[inx0:inx1], nbins, density=True) + std, center = hist_threshold(data[inx0:inx1], samplerate=None, win_size=None, + th_factor=th_factor, nbins=nbins, + hist_height=hist_height) + threshold[inx0:inx1] = std + centers[inx0:inx1] = center + return threshold, centers + else: + maxd = np.max(data) + mind = np.min(data) + contrast = np.abs((maxd - mind)/(maxd + mind)) + if contrast > 1e-8: + hist, bins = np.histogram(data, nbins, density=False) inx = hist > np.max(hist) * hist_height lower = bins[0:-1][inx][0] upper = bins[1:][inx][-1] # needs to return the next bin center = 0.5 * (lower + upper) std = 0.5 * (upper - lower) - threshold[inx0:inx1] = std * th_factor - centers[inx0:inx1] = center - return threshold, centers - else: - hist, bins = np.histogram(data, nbins, density=True) - inx = hist > np.max(hist) * hist_height - lower = bins[0:-1][inx][0] - upper = bins[1:][inx][-1] # needs to return the next bin - center = 0.5 * (lower + upper) - std = 0.5 * (upper - lower) + else: + std = np.std(data) + center = np.mean(data) return std * th_factor, center @@ -447,7 +630,7 @@ def minmax_threshold(data, samplerate=None, win_size=None, th_factor=0.8): return (np.max(data) - np.min(data)) * th_factor -def percentile_threshold(data, samplerate=None, win_size=None, th_factor=0.8, percentile=0.1): +def percentile_threshold(data, samplerate=None, win_size=None, th_factor=0.8, percentile=10.0): """Esimate a threshold for detect_peaks() based on an inter-percentile range of the data. The threshold is computed as the range between the percentile and @@ -474,7 +657,7 @@ def percentile_threshold(data, samplerate=None, win_size=None, th_factor=0.8, pe Sampling rate of the data in Hz. win_size: float or None Size of window in which a threshold value is computed. - percentile: int + percentile: float The interpercentile range is computed at percentile and 100.0-percentile. th_factor: float Factor by which the inter-percentile range of the data is multiplied to set the threshold. @@ -773,10 +956,11 @@ def accept_peak_size_threshold(time, data, event_inx, index, min_inx, threshold, if __name__ == "__main__": import matplotlib.pyplot as plt - print("Checking peakdetection module ...") + print("Checking eventetection module ...") print('') # generate data: - time = np.arange(0.0, 10.0, 0.01) + dt = 0.001 + time = np.arange(0.0, 10.0, dt) f = 2.0 data = (0.5 * np.sin(2.0 * np.pi * f * time) + 0.5) ** 4.0 data += -0.1 * time * (time - 10.0) @@ -786,18 +970,8 @@ def accept_peak_size_threshold(time, data, event_inx, index, min_inx, threshold, plt.plot(time, data) print('') - print('check detect_peaks(data, 0.5, time)...') - peaks, troughs = detect_peaks(data, 0.5, time) - # print peaks: - print('detected %d peaks with period %g that differs from the real frequency by %g' % ( - len(peaks), np.mean(np.diff(peaks)), f - 1.0 / np.mean(np.diff(peaks)))) - # print troughs: - print('detected %d troughs with period %g that differs from the real frequency by %g' % ( - len(troughs), np.mean(np.diff(troughs)), f - 1.0 / np.mean(np.diff(troughs)))) - - print('') - print('check detect_peaks(data, 0.5)...') - peaks, troughs = detect_peaks(data, 0.5) + print('check detect_peaks(data, 1.0)...') + peaks, troughs = detect_peaks(data, 1.0) # print peaks: print('detected %d peaks with period %g that differs from the real frequency by %g' % ( len(peaks), np.mean(np.diff(peaks)), f - 1.0 / np.mean(np.diff(peaks)) / np.mean(np.diff(time)))) @@ -809,5 +983,12 @@ def accept_peak_size_threshold(time, data, event_inx, index, min_inx, threshold, plt.plot(time[peaks], data[peaks], '.r', ms=20) plt.plot(time[troughs], data[troughs], '.g', ms=20) + # detect threshold crossings: + onsets, offsets = threshold_crossings(data, 3.0) + onsets, offsets = merge_events(onsets, offsets, int(0.5/f/dt)) + plt.plot(time, 3.0*np.ones(len(time)), 'k') + plt.plot(time[onsets], data[onsets], '.c', ms=20) + plt.plot(time[offsets], data[offsets], '.b', ms=20) + plt.ylim(-0.5, 4.0) plt.show() diff --git a/thunderfish/fishfinder.py b/thunderfish/fishfinder.py index 054adfc7..a2235596 100644 --- a/thunderfish/fishfinder.py +++ b/thunderfish/fishfinder.py @@ -38,9 +38,9 @@ def __init__(self, data, samplingrate, unit, filename, channel, verbose, cfg): self.fmin = 0.0 self.fmax = 0.0 self.decibel = True - self.fresolution = self.cfg['frequencyResolution'][0] + self.fresolution = self.cfg.value('frequencyResolution') self.deltaf = 1.0 - self.mains_freq = self.cfg['mainsFreq'][0] + self.mains_freq = self.cfg.value('mainsFreq') self.power_label = None self.all_peaks_artis = None self.good_peaks_artist = None @@ -49,15 +49,15 @@ def __init__(self, data, samplingrate, unit, filename, channel, verbose, cfg): self.peak_artists = [] self.legend = True self.legendhandle = None - self.help = self.cfg['displayHelp'][0] + self.help = self.cfg.value('displayHelp') self.helptext = [] self.allpeaks = [] self.fishlist = [] self.mains = [] self.peak_specmarker = [] self.peak_annotation = [] - self.min_clip = self.cfg['minClipAmplitude'][0] - self.max_clip = self.cfg['maxClipAmplitude'][0] + self.min_clip = self.cfg.value('minClipAmplitude') + self.max_clip = self.cfg.value('maxClipAmplitude') self.colorrange, self.markerrange = colors_markers() # audio output: @@ -155,15 +155,15 @@ def annotate_peak(self, peak, harmonics=-1, inx=-1): # annotation: fwidth = self.fmax - self.fmin pl = [] - if self.cfg['labelFrequency'][0]: + if self.cfg.value('labelFrequency'): pl.append(r'$f=${:.1f} Hz'.format(peak[0])) - if self.cfg['labelHarmonic'][0] and harmonics >= 0: + if self.cfg.value('labelHarmonic') and harmonics >= 0: pl.append(r'$h=${:d}'.format(harmonics)) - if self.cfg['labelPower'][0]: + if self.cfg.value('labelPower'): pl.append(r'$p=${:g}'.format(peak[1])) - if self.cfg['labelWidth'][0]: + if self.cfg.value('labelWidth'): pl.append(r'$\Delta f=${:.2f} Hz'.format(peak[3])) - if self.cfg['labelDoubleUse'][0]: + if self.cfg.value('labelDoubleUse'): pl.append(r'dc={:.0f}'.format(peak[4])) self.peak_annotation.append(self.axp.annotate('\n'.join(pl), xy=(peak[0], peak[1]), xytext=(peak[0] + 0.03 * fwidth, peak[1]), @@ -194,14 +194,11 @@ def update_plots(self, draw=True): self.axt.set_ylim(self.ymin, self.ymax) # compute power spectrum: - nfft = int(np.round(2 ** (np.floor(np.log(self.samplerate / self.fresolution) / np.log(2.0)) + 1.0))) - if nfft < 16: - nfft = 16 nfft, noverlap = nfft_noverlap(self.fresolution, self.samplerate, 0.5, 16) t00 = t0 t11 = t1 w = t11 - t00 - minw = nfft * (self.cfg['minPSDAverages'][0] + 1) // 2 + minw = nfft * (self.cfg.value('minPSDAverages') + 1) // 2 if t11 - t00 < minw: w = minw t11 = t00 + w @@ -212,6 +209,7 @@ def update_plots(self, draw=True): t00 = 0 t11 = w power, freqs = ml.psd(self.data[t00:t11], NFFT=nfft, noverlap=noverlap, Fs=self.samplerate, detrend=ml.detrend_mean) + power = np.squeeze(power) # squeeze is necessary when nfft is to large with respect to the data self.deltaf = freqs[1] - freqs[0] # detect fish: h_kwargs = psd_peak_detection_args(self.cfg) @@ -256,8 +254,8 @@ def update_plots(self, draw=True): tws = '%.3gs' % (tw) a = 2 * w // nfft - 1 # number of ffts m = '' - if self.cfg['mainsFreq'][0] > 0.0: - m = ', mains=%.0fHz' % self.cfg['mainsFreq'][0] + if self.cfg.value('mainsFreq') > 0.0: + m = ', mains=%.0fHz' % self.cfg.value('mainsFreq') if self.power_frequency_label == None: self.power_frequency_label = self.axp.set_xlabel( r'Frequency [Hz] (nfft={:d}, $\Delta f$={:s}: T={:s}/{:d}{:s})'.format(nfft, dfs, tws, a, m)) @@ -323,7 +321,7 @@ def update_plots(self, draw=True): fishpoints, = self.axp.plot(fpeaks[:len(fpeakinx)], power[fpeakinx], linestyle='None', marker='.', color='k', ms=10, mec=None, mew=0.0, zorder=2) self.peak_artists.append(fishpoints) - labels.append('%3.0f Hz mains' % self.cfg['mainsFreq'][0]) + labels.append('%3.0f Hz mains' % self.cfg.value('mainsFreq')) ncol = (len(labels)-1) // 8 + 1 self.legendhandle = self.axs.legend(self.peak_artists[:len(labels)], labels, loc='upper right', ncol=ncol) self.legenddict = dict() @@ -369,6 +367,7 @@ def keypress(self, event): if idx1 > 0: self.toffset = idx0 / self.samplerate self.twindow = (idx1 - idx0) / self.samplerate + self.twindow *= 2.0/(self.cfg.value('numberPSDWindows')+1.0) self.update_plots() except UserWarning as e: if self.verbose > 0: @@ -503,26 +502,26 @@ def keypress(self, event): self.decibel = not self.decibel self.update_plots() elif event.key in 'm': - if self.cfg['mainsFreq'][0] == 0.0: - self.cfg['mainsFreq'][0] = self.mains_freq + if self.cfg.value('mainsFreq') == 0.0: + self.cfg.set('mainsFreq', self.mains_freq) else: - self.cfg['mainsFreq'][0] = 0.0 + self.cfg.set('mainsFreq', 0.0) self.update_plots() elif event.key in 't': - t_diff = self.cfg['highThresholdFactor'][0] - self.cfg['lowThresholdFactor'][0] - self.cfg['lowThresholdFactor'][0] -= 0.1 - if self.cfg['lowThresholdFactor'][0] < 0.1: - self.cfg['lowThresholdFactor'][0] = 0.1 - self.cfg['highThresholdFactor'][0] = self.cfg['lowThresholdFactor'][0] + t_diff - print('lowThresholdFactor =', self.cfg['lowThresholdFactor'][0]) + t_diff = self.cfg.value('highThresholdFactor') - self.cfg.value('lowThresholdFactor') + self.cfg.set('lowThresholdFactor', self.cfg.value('lowThresholdFactor') - 0.1) + if self.cfg.value('lowThresholdFactor') < 0.1: + self.cfg.set('lowThresholdFactor', 0.1) + self.cfg.set('highThresholdFactor', self.cfg.value('lowThresholdFactor') + t_diff) + print('lowThresholdFactor =', self.cfg.value('lowThresholdFactor')) self.update_plots() elif event.key in 'T': - t_diff = self.cfg['highThresholdFactor'][0] - self.cfg['lowThresholdFactor'][0] - self.cfg['lowThresholdFactor'][0] += 0.1 - if self.cfg['lowThresholdFactor'][0] > 20.0: - self.cfg['lowThresholdFactor'][0] = 20.0 - self.cfg['highThresholdFactor'][0] = self.cfg['lowThresholdFactor'][0] + t_diff - print('lowThresholdFactor =', self.cfg['lowThresholdFactor'][0]) + t_diff = self.cfg.value('highThresholdFactor') - self.cfg.value('lowThresholdFactor') + self.cfg.set('lowThresholdFactor', self.cfg.value('lowThresholdFactor') + 0.1) + if self.cfg.value('lowThresholdFactor') > 20.0: + self.cfg.set('lowThresholdFactor', 20.0) + self.cfg.set('highThresholdFactor', self.cfg.value('lowThresholdFactor') + t_diff) + print('lowThresholdFactor =', self.cfg.value('lowThresholdFactor')) self.update_plots() elif event.key == 'escape': self.remove_peak_annotation() @@ -716,6 +715,7 @@ def main(): cfg.add_section('Power spectrum estimation:') cfg.add('frequencyResolution', 0.5, 'Hz', 'Frequency resolution of the power spectrum.') cfg.add('minPSDAverages', 3, '', 'Minimum number of fft averages for estimating the power spectrum.') + cfg.add('numberPSDWindows', 1, '', 'Number of windows on which power spectra are computed.') cfg.add_section('Items to display:') cfg.add('displayHelp', False, '', 'Display help on key bindings') @@ -754,7 +754,7 @@ def main(): with open_data(filepath, channel, 60.0, 10.0, verbose) as data: # plot: ## if len(data) < 10**8: - ## # data[:].copy() makes bestwindow much faster (it's slow in peakdetection): + ## # data[:].copy() makes bestwindow much faster (it's slow in eventdetection): ## SignalPlot(data[:].copy(), data.samplerate, data.unit, filename, channel) ## else: SignalPlot(data, data.samplerate, data.unit, filename, channel, verbose, cfg) diff --git a/thunderfish/harmonicgroups.py b/thunderfish/harmonicgroups.py index e5a9f20c..15e88869 100644 --- a/thunderfish/harmonicgroups.py +++ b/thunderfish/harmonicgroups.py @@ -1,22 +1,37 @@ -"""Functions for extracting harmonic groups from a power spectrum. - -harmonic_groups(): detect peaks in a power spectrum and groups them - according to their harmonic structure. - -extract_fundamentals(): collect harmonic groups from lists of power spectrum peaks. -threshold_estimate(): estimates thresholds for peak detection in a power spectrum. - -fundamental_freqs(): extract the fundamental frequencies from lists of harmonic groups - as returned by harmonic_groups(). -colors_markers(): Generate a list of colors and markers for plotting. -plot_harmonic_groups(): Mark decibel power of fundamentals and their harmonics. -plot_psd_harmonic_groups(): Plot decibel power-spectrum with detected peaks, harmonic groups, and mains frequencies. +""" +# Harmonic group detection +Functions for extracting harmonic groups from a power spectrum. + +## Harmonic group extraction +- `harmonic_groups()`: detect peaks in a power spectrum and groups them + according to their harmonic structure. +- `extract_fundamentals()`: collect harmonic groups from lists of power spectrum peaks. +- `threshold_estimate()`: estimates thresholds for peak detection in a power spectrum. + +## Handling of lists of harmonic groups +- `fundamental_freqs()`: extract the fundamental frequencies from lists of harmonic groups. +- `fundamental_freqs_and_power()`: + +## Visualization +- `colors_markers()`: Generate a list of colors and markers for plotting. +- `plot_harmonic_groups()`: Mark decibel power of fundamentals and their harmonics. +- `plot_psd_harmonic_groups()`: Plot decibel power-spectrum with detected peaks, harmonic groups, and mains frequencies. + +## Configuration parameter +- `add_psd_peak_detection_config()`: add parameters for the detection of peaks in + power spectra to configuration. +- `psd_peak_detection_args()`: retrieve parameters for the detection of peaks in + power spectra from configuration. +- `add_harmonic_groups_config()`: add parameters for the detection of harmonic groups + to configuration. +- `harmonic_groups_args()`: retrieve parameters for the detection of harmonic groups + from configuration. """ from __future__ import print_function import numpy as np import scipy.signal as sig -from .peakdetection import detect_peaks, peak_size_width, hist_threshold +from .eventdetection import detect_peaks, peak_size_width, hist_threshold from .powerspectrum import decibel, power, plot_decibel_psd try: import matplotlib.cm as cm @@ -26,7 +41,7 @@ def build_harmonic_group(freqs, more_freqs, deltaf, verbose=0, min_freq=20.0, max_freq=2000.0, - freq_tol_fac=0.7, max_divisor=4, max_upper_fill=1, + freq_tol_fac=1.0, max_divisor=4, max_upper_fill=1, max_double_use_harmonics=8, max_double_use_count=1, max_fill_ratio=0.25, power_n_harmonics=10, **kwargs): """Find all the harmonics belonging to the largest peak in a list of frequency peaks. @@ -431,12 +446,12 @@ def build_harmonic_group(freqs, more_freqs, deltaf, verbose=0, min_freq=20.0, ma return freqs, more_freqs, group, best_fzero_harmonics, fmax -def extract_fundamentals(good_freqs, all_freqs, deltaf, verbose=0, freq_tol_fac=0.7, +def extract_fundamentals(good_freqs, all_freqs, deltaf, verbose=0, freq_tol_fac=1.0, mains_freq=60.0, min_freq=0.0, max_freq=2000.0, max_divisor=4, max_upper_fill=1, max_double_use_harmonics=8, max_double_use_count=1, max_fill_ratio=0.25, power_n_harmonics=10, - min_group_size=3, max_harmonics=0, max_groups=0, **kwargs): + min_group_size=4, max_harmonics=0, max_groups=0, **kwargs): """Extract fundamental frequencies from power-spectrum peaks. Parameters @@ -468,12 +483,11 @@ def extract_fundamentals(good_freqs, all_freqs, deltaf, verbose=0, freq_tol_fac= max_double_use_count: int Maximum number of harmonic groups a single peak can be part of. max_fill_ratio: float - Maximum allowed fraction of filled in frequencies.. + Maximum allowed fraction of filled in frequencies. power_n_harmonics: int Maximum number of harmonics over which the total power of the signal is computed. min_group_size: int - Minimum required number of harmonics that are not filled in and - are not part of other, so far detected, harmonics groups. + Within min_group_size no harmonics are allowed to be filled in. max_harmonics: int Maximum number of harmonics to be returned for each group. max_groups: int @@ -533,10 +547,8 @@ def extract_fundamentals(good_freqs, all_freqs, deltaf, verbose=0, freq_tol_fac= print('Nothing found for fmax=%.2fHz' % fmax) continue - # count number of harmonics which have been detected, are not fill-ins, - # and are not doubly used: - group_size = np.sum((harm_group[:, 1] > 0.0) & (harm_group[:, 4] < 2.0)) - group_size_ok = (group_size >= min_group_size) + # within min_group_size we do not want fill ins: + group_size_ok = np.sum(harm_group[:min_group_size, 1] > 0.0) == min_group_size # check frequency range of fundamental: fundamental_ok = (harm_group[0, 0] >= min_freq and @@ -558,7 +570,7 @@ def extract_fundamentals(good_freqs, all_freqs, deltaf, verbose=0, freq_tol_fac= if verbose > 0: print('Discarded harmonic group: {:.2f}Hz p={:10.8f} g={:d} f={:} m={:}'.format( harm_group[0, 0], np.sum(harm_group[:, 1]), - group_size, fundamental_ok, mains_ok)) + group_size_ok, fundamental_ok, mains_ok)) # do not save more than n harmonics: if max_harmonics > 0: @@ -641,7 +653,9 @@ def threshold_estimate(psd_data, low_thresh_factor=6.0, high_thresh_factor=10.0, """ n = len(psd_data) psd_data_seg = psd_data[n//2:n*3//4] - psd_data_seg = np.mean(psd_data_seg) + sig.detrend(psd_data_seg, type='linear') + psd_data_seg = psd_data_seg[~np.isnan(psd_data_seg)] + psd_data_seg = np.mean(psd_data_seg) + \ + sig.detrend(psd_data_seg, type='linear') noise_std, center = hist_threshold(psd_data_seg, th_factor=1.0, nbins=nbins) low_threshold = noise_std * low_thresh_factor high_threshold = noise_std * high_thresh_factor @@ -651,11 +665,11 @@ def threshold_estimate(psd_data, low_thresh_factor=6.0, high_thresh_factor=10.0, def harmonic_groups(psd_freqs, psd, verbose=0, low_threshold=0.0, high_threshold=0.0, thresh_bins=100, low_thresh_factor=6.0, high_thresh_factor=10.0, max_peak_width_fac=10.0, min_peak_width=1.0, - freq_tol_fac=0.7, mains_freq=60.0, min_freq=0.0, max_freq=2000.0, + freq_tol_fac=1.0, mains_freq=60.0, min_freq=0.0, max_freq=2000.0, max_work_freq=4000.0, max_divisor=4, max_upper_fill=1, max_double_use_harmonics=8, max_double_use_count=1, max_fill_ratio=0.25, power_n_harmonics=10, - min_group_size=3, max_harmonics=0, max_groups=0, **kwargs): + min_group_size=4, max_harmonics=0, max_groups=0, **kwargs): """Detect peaks in power spectrum and extract fundamentals of harmonic groups. Parameters @@ -709,8 +723,7 @@ def harmonic_groups(psd_freqs, psd, verbose=0, low_threshold=0.0, high_threshold power_n_harmonics: int Maximum number of harmonics over which the total power of the signal is computed. min_group_size: int - Minimum required number of harmonics that are not filled in and - are not part of other, so far detected, harmonics groups. + Within min_group_size no harmonics are allowed to be filled in. max_harmonics: int Maximum number of harmonics to be returned for each group. max_groups: int @@ -751,10 +764,13 @@ def harmonic_groups(psd_freqs, psd, verbose=0, low_threshold=0.0, high_threshold # thresholds: center = np.NaN if low_threshold <= 0.0 or high_threshold <= 0.0: - low_threshold, high_threshold, center = threshold_estimate(log_psd, low_thresh_factor, - high_thresh_factor, - thresh_bins) - + low_th, high_th, center = threshold_estimate(log_psd, low_thresh_factor, + high_thresh_factor, + thresh_bins) + if low_threshold <= 0.0: + low_threshold = low_th + if high_threshold <= 0.0: + high_threshold = high_th if verbose > 1: print('') print('low_threshold =%g center+low_threshold =%g' % (low_threshold, center + low_threshold)) @@ -800,72 +816,99 @@ def fundamental_freqs(group_list): """ Extract the fundamental frequencies from lists of harmonic groups. + The inner list of 2-D arrays of the input argument is transformed into + a 1-D array containig the fundamental frequencies extracted from the 2-D arrays. + Parameters ---------- - group_list: list of 2-D arrays or list of list of 2-D arrays - Lists of harmonic groups as returned by extract_fundamentals() and - harmonic_groups() with the element [0][0] of the + group_list: (list of (list of ...)) list of 2-D arrays + Arbitrarily nested lists of harmonic groups as returned by extract_fundamentals() + and harmonic_groups() with the element [0, 0] of the harmonic groups being the fundamental frequency. Returns ------- - fundamentals: 1-D array or list of 1-D array - Single array or list of arrays (corresponding to the input group_list) - of the fundamental frequencies. + fundamentals: (list of (list of ...)) 1-D array + Nested list (corresponding to `group_list`) of 1-D arrays + with the fundamental frequencies extracted from the harmonic groups. """ if len(group_list) == 0: return np.array([]) - # check whether group_list is list of fishlists: - list_of_list = False - for groups in group_list: - for harmonic_group in groups: - if len(np.shape(harmonic_group)) > 1: - list_of_list = True - break - if list_of_list: + # check whether group_list is list of harmonic groups: + list_of_groups = True + for group in group_list: + if not ( hasattr(group, 'shape') and len(np.shape(group)) == 2 ): + list_of_groups = False + break + + if list_of_groups: + fundamentals = np.array([group[0, 0] for group in group_list if len(group) > 0]) + else: fundamentals = [] for groups in group_list: - fundamentals.append(np.array([harmonic_group[0][0] for harmonic_group in groups])) - else: - fundamentals = np.array([harmonic_group[0][0] for harmonic_group in group_list if len(harmonic_group) > 0]) + f = fundamental_freqs(groups) + fundamentals.append(f) return fundamentals -def fundamental_freqs_and_db(group_list): - +def fundamental_freqs_and_power(group_list, n_harmonics=1, power=False, + ref_power=1.0, min_power=1e-20): """ Extract the fundamental frequencies and their power in dB from lists of harmonic groups. + The inner list of 2-D arrays of the input argument is transformed + into a 2-D array containig for each fish (1st dimension) the + fundamental frequencies and powers extracted from the 2-D arrays. + Parameters ---------- - group_list: list of 2-D arrays or list of list of 2-D arrays - Lists of harmonic groups as returned by extract_fundamentals() and - harmonic_groups() with the element [0][0] of the harmonic groups - being the fundamental frequency, - and element[0][1] being the corresponding power. + group_list: (list of (list of ...)) list of 2-D arrays + Arbitrarily nested lists of harmonic groups as returned by extract_fundamentals() + and harmonic_groups() with the element [0, 0] of the + harmonic groups being the fundamental frequency and the elements [:,1] being + the powers of each harmonics. + n_harmonics: int + For the power sum over the first `n_harmonmics` harmonics (including the + fundamental frequency). The default value of 1 returns the power of the + fundamental frequency only. + power: boolean + If `False` convert the power into decibel using the powerspectrum.decibel() function. + ref_power: float + Reference power for computing decibel. If set to `None` the maximum power is used. + min_power: float + Power values smaller than `min_power` are set to `np.nan`. Returns ------- - eodf_db_matrix: 2-D array or list of 2-D arrays - Matrix with fundamental frequencies in first column and - corresponding power in dB in second column. + fundamentals: (list of (list of ...)) 2-D array + Nested list (corresponding to `group_list`) of 2-D arrays + with fundamental frequencies in first column and + corresponding power in second column. """ if len(group_list) == 0: - eodf_db_matrix = np.array([]) - elif hasattr(group_list[0][0][0], '__len__'): - eodf_db_matrix = [] - for groups in group_list: - f = [np.array([harmonic_group[0][0], harmonic_group[0][1]]) for harmonic_group in group_list] - f[:, 1] = decibel(f[:, 1]) # calculate decibel using 1 as reference power - eodf_db_matrix.append(f) - else: - eodf_db_matrix = np.array([np.array([harmonic_group[0][0], harmonic_group[0][1]]) - for harmonic_group in group_list]) - eodf_db_matrix[:, 1] = decibel(eodf_db_matrix[:, 1]) # calculate decibel using 1 as reference power + return np.array([]) - return eodf_db_matrix + # check whether group_list is list of harmonic groups: + list_of_groups = True + for group in group_list: + if not ( hasattr(group, 'shape') and len(np.shape(group)) == 2 ): + list_of_groups = False + break + + if list_of_groups: + fundamentals = np.array([[group[0, 0], np.sum(group[0:n_harmonics, 1])] + for group in group_list if len(group) > 0]) + if not power: + fundamentals[:, 1] = decibel(fundamentals[:, 1], ref_power, min_power) + else: + fundamentals = [] + for groups in group_list: + f = fundamental_freqs_and_power(groups, n_harmonics, power, + ref_power, min_power) + fundamentals.append(f) + return fundamentals def colors_markers(): @@ -913,6 +956,7 @@ def colors_markers(): def plot_harmonic_groups(ax, group_list, max_freq=2000.0, max_groups=0, sort_by_freq=True, + power_n_harmonics=10, label_power=False, colors=None, markers=None, legend_rows=8, **kwargs): """ Mark decibel power of fundamentals and their harmonics in a plot. @@ -931,6 +975,10 @@ def plot_harmonic_groups(ax, group_list, max_freq=2000.0, max_groups=0, sort_by_ If not zero plot only the max_groups most powerful groups. sort_by_freq: boolean If True sort legend by frequency, otherwise by power. + power_n_harmonics: int + Maximum number of harmonics over which the total power of the signal is computed. + label_power: boolean + If `True` put the power in decibel in addition to the frequency into the legend. colors: list of colors or None If not None list of colors for plotting each group markers: list of markers or None @@ -945,7 +993,7 @@ def plot_harmonic_groups(ax, group_list, max_freq=2000.0, max_groups=0, sort_by_ return # sort by power: - powers = np.array([np.sum(fish[:10, 1]) for fish in group_list]) + powers = np.array([np.sum(fish[:power_n_harmonics, 1]) for fish in group_list]) max_power = np.max(powers) idx_maxpower = np.argsort(powers) if max_groups > 0 and len(idx_maxpower > max_groups): @@ -969,13 +1017,17 @@ def plot_harmonic_groups(ax, group_list, max_freq=2000.0, max_groups=0, sort_by_ color_kwargs = {} if colors is not None: color_kwargs = {'color': colors[k%len(colors)]} + label = '%6.1f Hz' % group[0, 0] + if label_power: + label += ' %6.1f dB' % decibel(np.array([np.sum(group[:power_n_harmonics, 1])]))[0] if markers is None: - ax.plot(x, decibel(y), 'o', ms=msize, label='%.1f Hz' % group[0, 0], **color_kwargs) + ax.plot(x, decibel(y), 'o', ms=msize, label=label, + clip_on=False, **color_kwargs) else: if k >= len(markers): break ax.plot(x, decibel(y), linestyle='None', marker=markers[k], mec=None, mew=0.0, - ms=msize, label='%.1f Hz' % group[0, 0], **color_kwargs) + ms=msize, label=label, clip_on=False, **color_kwargs) # legend: if legend_rows > 0: @@ -1089,10 +1141,10 @@ def psd_peak_detection_args(cfg): 'min_peak_width': 'minPeakWidth'}) -def add_harmonic_groups_config(cfg, mains_freq=60.0, max_divisor=4, freq_tol_fac=0.7, +def add_harmonic_groups_config(cfg, mains_freq=60.0, max_divisor=4, freq_tol_fac=1.0, max_upper_fill=1, max_fill_ratio=0.25, max_double_use_harmonics=8, max_double_use_count=1, - power_n_harmonics=10, min_group_size=3, + power_n_harmonics=10, min_group_size=4, min_freq=20.0, max_freq=2000.0, max_work_freq=4000.0, max_harmonics=0, max_groups=0): """ Add parameter needed for detection of harmonic groups as @@ -1115,12 +1167,11 @@ def add_harmonic_groups_config(cfg, mains_freq=60.0, max_divisor=4, freq_tol_fac 'Maximum fraction of filled in harmonics allowed (usefull values are smaller than 0.5)') cfg.add('maxDoubleUseHarmonics', max_double_use_harmonics, '', 'Maximum harmonics up to which double uses are penalized.') cfg.add('maxDoubleUseCount', max_double_use_count, '', 'Maximum overall double use count allowed.') - cfg.add('powerNHarmonics', power_n_harmonics, '', 'Compute total power over the first # harmonics.') + cfg.add('powerNHarmonics', power_n_harmonics, '', 'Compute total power over the first powerNHarmonics harmonics.') cfg.add_section('Acceptance of best harmonic groups:') cfg.add('minimumGroupSize', min_group_size, '', -'Minimum required number of harmonics (inclusively fundamental) that are not filled in and are not used by other groups.') - # cfg['minimumGroupSize'][0] = 2 # panama +'The number of harmonics (inclusively fundamental) that are allowed do be filled in.') cfg.add('minimumFrequency', min_freq, 'Hz', 'Minimum frequency allowed for the fundamental.') cfg.add('maximumFrequency', max_freq, 'Hz', 'Maximum frequency allowed for the fundamental.') cfg.add('maximumWorkingFrequency', max_work_freq, 'Hz', diff --git a/thunderfish/powerspectrum.py b/thunderfish/powerspectrum.py index 149aff27..0ee401d2 100644 --- a/thunderfish/powerspectrum.py +++ b/thunderfish/powerspectrum.py @@ -14,11 +14,12 @@ """ import numpy as np -import scipy.signal as scps +import scipy.signal as sig try: import matplotlib.mlab as mlab except ImportError: pass +from .eventdetection import detect_peaks def next_power_of_two(n): @@ -101,11 +102,12 @@ def decibel(power, ref_power=1.0, min_power=1e-20): ``` decibel = 10 * log10(power/ref_power) ``` + Power values smaller than `min_power` are set to `np.nan`. Parameters ---------- - power: array - Power values of the power spectrum or spectrogram. + power: float or array + Power values, for example from a power spectrum or spectrogram. ref_power: float Reference power for computing decibel. If set to `None` the maximum power is used. min_power: float @@ -114,14 +116,22 @@ def decibel(power, ref_power=1.0, min_power=1e-20): Returns ------- decibel_psd: array - Power values in decibel. + Power values in decibel relative to `ref_power`. """ + if hasattr(power, '__len__'): + tmp_power = power + decibel_psd = power.copy() + else: + tmp_power = np.array([power]) + decibel_psd = np.array([power]) if ref_power is None: - ref_power = np.max(power) - decibel_psd = power.copy() - decibel_psd[power < min_power] = np.nan - decibel_psd[power >= min_power] = 10.0 * np.log10(decibel_psd[power >= min_power]/ref_power) - return decibel_psd + ref_power = np.max(decibel_psd) + decibel_psd[tmp_power <= min_power] = np.nan + decibel_psd[tmp_power > min_power] = 10.0 * np.log10(decibel_psd[tmp_power > min_power]/ref_power) + if hasattr(power, '__len__'): + return decibel_psd + else: + return decibel_psd[0] def power(decibel, ref_power=1.0): @@ -261,6 +271,53 @@ def spectrogram(data, samplerate, fresolution=0.5, detrend=mlab.detrend_none, wi return spectrum, freqs, time +def peak_freqs(onsets, offsets, data, rate, freq_resolution=1.0, min_nfft=16, thresh=None): + """Peak frequencies computed for each of the data snippets. + + Parameters + ---------- + onsets: array of ints + Array of indices indicating the onsets of the snippets in `data`. + offsets: array of ints + Array of indices indicating the offsets of the snippets in `data`. + data: 1-D array + Data array that contains the data snippets defined by `onsets` and `offsets`. + rate: float + Samplerate of data in Hertz. + freq_resolution: float + Desired frequency resolution of the computed power spectra in Hertz. + min_nfft: int + The smallest value of nfft to be used. + thresh: None or float + If not None than this is the threshold required for the minimum hight of the peak + in the power spectrum. If the peak is too small than the peak frequency of + that snippet is set to NaN. + + Returns + ------- + freqs: array of floats + For each data snippet the frequency of the maximum power. + """ + freqs = [] + # for all events: + for i0, i1 in zip(onsets, offsets): + nfft, _ = nfft_noverlap(freq_resolution, rate, 0.5, min_nfft) + if nfft > i1 - i0 : + nfft = next_power_of_two((i1 - i0)/2) + f, Pxx = sig.welch(data[i0:i1], fs=rate, nperseg=nfft, noverlap=nfft//2, nfft=None) + if thresh is None: + fpeak = f[np.argmax(Pxx)] + else: + p, _ = detect_peaks(decibel(Pxx, None), thresh) + if len(p) > 0: + ipeak = np.argmax(Pxx[p]) + fpeak = f[p[ipeak]] + else: + fpeak = float('NaN') + freqs.append(fpeak) + return np.array(freqs) + + if __name__ == '__main__': import matplotlib.pyplot as plt diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py index 91444c35..ab6d9260 100644 --- a/thunderfish/thunderfish.py +++ b/thunderfish/thunderfish.py @@ -17,17 +17,18 @@ from .bestwindow import add_clip_config, add_best_window_config, clip_args, best_window_args from .dataloader import load_data from .bestwindow import clip_amplitudes, best_window_indices -from .checkpulse import check_pulse_width, check_pulse_psd -from .powerspectrum import plot_decibel_psd, multi_resolution_psd -from .harmonicgroups import harmonic_groups, harmonic_groups_args, psd_peak_detection_args, fundamental_freqs_and_db, colors_markers, plot_harmonic_groups +from .checkpulse import check_pulse_width, add_check_pulse_width_config, check_pulse_width_args +from .powerspectrum import decibel, plot_decibel_psd, multi_resolution_psd +from .harmonicgroups import harmonic_groups, harmonic_groups_args, psd_peak_detection_args, fundamental_freqs, fundamental_freqs_and_power, colors_markers, plot_harmonic_groups from .consistentfishes import consistent_fishes from .eodanalysis import eod_waveform_plot, eod_waveform from .csvmaker import write_csv -def output_plot(base_name, pulse_fish_width, pulse_fish_psd, EOD_count, median_IPI, inter_eod_intervals, - raw_data, samplerate, idx0, idx1, filtered_fishlist, period, time_eod, mean_eod, std_eod, unit, - psd_data, output_folder, save_plot=False, show_plot=False): +def output_plot(base_name, pulse_fish, inter_eod_intervals, + raw_data, samplerate, idx0, idx1, fishlist, mean_eods, eod_props, + unit, psd_data, power_n_harmonics, label_power, max_freq=3000.0, + output_folder='', save_plot=False, show_plot=False): """ Creates an output plot for the Thunderfish program. @@ -40,14 +41,8 @@ def output_plot(base_name, pulse_fish_width, pulse_fish_psd, EOD_count, median_I ---------- base_name: string Basename of audio_file. - pulse_fish_width: bool + pulse_fish: bool True if a pulse-fish has been detected by analysis of the EODs. - pulse_fish_psd: bool - True if a pulse-fish has been detected by analysis of the PSD. - EOD_count: int - Number of detected EODs. - median_IPI: float - Mean inter EOD interval. inter_eod_intervals: array Time difference from one to another detected EOD. raw_data: array @@ -58,20 +53,20 @@ def output_plot(base_name, pulse_fish_width, pulse_fish_psd, EOD_count, median_I Index of the beginning of the analysis window in the dataset. idx1: float Index of the end of the analysis window in the dataset. - filtered_fishlist: array + fishlist: array Frequency and power of fundamental frequency/harmonics of several fish. - period: float - Mean EOD time difference. - time_eod: array - Time for the mean EOD plot. - mean_eod: array + mean_eods: list of 2-D arrays with time, mean and std. Mean trace for the mean EOD plot. - std_eod: array - Standard deviation for the mean EOD plot. + eod_props: list of dict + Properties for each waveform in mean_eods. unit: string Unit of the trace and the mean EOD. psd_data: array Power spectrum of the analysed data for different frequency resolutions. + power_n_harmonics: int + Maximum number of harmonics over which the total power of the signal is computed. + label_power: boolean + If `True` put the power in decibel in addition to the frequency into the legend. output_folder: string Path indicating where output-files will be saved. save_plot: bool @@ -81,33 +76,32 @@ def output_plot(base_name, pulse_fish_width, pulse_fish_psd, EOD_count, median_I """ fig = plt.figure(facecolor='white', figsize=(14., 10.)) - ax1 = fig.add_axes([0.05, 0.9, 0.9, 0.1]) # title - ax2 = fig.add_axes([0.075, 0.05, 0.9, 0.1]) # trace - ax3 = fig.add_axes([0.075, 0.6, 0.4, 0.3]) # psd + ax1 = fig.add_axes([0.02, 0.9, 0.96, 0.1]) # title + ax2 = fig.add_axes([0.075, 0.05, 0.9, 0.1]) # whole trace + ax3 = fig.add_axes([0.075, 0.6, 0.7, 0.3]) # psd ax4 = fig.add_axes([0.075, 0.2, 0.4, 0.3]) # mean eod - ax5 = fig.add_axes([0.575, 0.6, 0.4, 0.3]) # meta data - ax6 = fig.add_axes([0.575, 0.2, 0.4, 0.3]) # inter EOD histogram - - # count fish: - try: - dom_freq = \ - filtered_fishlist[np.argsort([filtered_fishlist[fish][0][1] for fish in range(len(filtered_fishlist))])[-1]][0][0] - fish_count = len(filtered_fishlist) - except IndexError: - dom_freq = 1. / period - fish_count = 1 + ax5 = fig.add_axes([0.575, 0.2, 0.4, 0.3]) # inter EOD histogram # plot title - if not pulse_fish_width and not pulse_fish_psd: - if fish_count > 1: - ax1.text(-0.05, .75, '%s --- Recording of wavefish.' % base_name, fontsize=20, color='grey') - else: - ax1.text(-0.05, .75, '%s --- Recording of a wavefish.' % base_name, fontsize=20, color='grey') - elif pulse_fish_width and pulse_fish_psd: - ax1.text(-0.02, .65, '%s --- Recording of a pulsefish.' % base_name, fontsize=20, color='grey') + wavetitle = "" + if len(fishlist) == 1: + wavetitle = "a wavefish" + elif len(fishlist) > 1: + wavetitle = "%d wavefish" % len(fishlist) + pulsetitle = "" + if pulse_fish: + pulsetitle = "a pulsefish" + if len(wavetitle)==0 and len(pulsetitle)==0: + ax1.text(0.0, .72, '%s Recording - no fish.' % base_name, fontsize=22) + elif len(wavetitle)>0 and len(pulsetitle)>0: + ax1.text(0.0, .72, '%s Recording of %s and %s.' % (base_name, pulsetitle, wavetitle), + fontsize=22) else: - ax1.text(-0.05, .75, '%s --- Recording of wave- and pulsefish.' % base_name, fontsize=20, color='grey') - ax1.text(0.83, .8, 'Thunderfish by Bendalab', fontsize=16, color='grey') + ax1.text(0.0, .72, '%s Recording of %s.' % (base_name, pulsetitle+wavetitle), + fontsize=22) + + ax1.text(1.0, .77, 'thunderfish by Benda-Lab', fontsize=16, ha='right') + ax1.text(1.0, .5, 'Version %s' % __version__, fontsize=16, ha='right') ax1.set_frame_on(False) ax1.get_xaxis().set_visible(False) ax1.get_yaxis().set_visible(False) @@ -126,78 +120,57 @@ def output_plot(base_name, pulse_fish_width, pulse_fish_psd, EOD_count, median_I ############ # plot psd - if not pulse_fish_width: # and not pulse_fish_psd: + if len(fishlist) > 0: colors, markers = colors_markers() - plot_harmonic_groups(ax3, filtered_fishlist, max_freq=3000, max_groups=0, sort_by_freq=True, - colors=colors, markers=markers, legend_rows=8, - frameon=False, bbox_to_anchor=(1.05, 1), loc='upper left') - plot_decibel_psd(ax3, psd_data[0][1], psd_data[0][0], max_freq=3000.0, color='blue') - ax3.set_title('Powerspectrum (%.0f detected fish)' % fish_count) + plot_harmonic_groups(ax3, fishlist, max_freq=max_freq, max_groups=12, sort_by_freq=True, + power_n_harmonics=power_n_harmonics, label_power=label_power, + colors=colors, markers=markers, legend_rows=12, + frameon=False, bbox_to_anchor=(1.0, 1.1), loc='upper left', + title='EOD frequencies') + plot_decibel_psd(ax3, psd_data[0][1], psd_data[0][0], max_freq=max_freq, color='blue') + ax3.set_title('Powerspectrum (%d detected wave-fish)' % len(fishlist), y=1.05) ########## # plot mean EOD - eod_waveform_plot(time_eod, mean_eod, std_eod, ax4, unit=unit) - if pulse_fish_width and pulse_fish_psd: - ax4.set_title('Mean EOD (%.0f EODs; Pulse frequency: ~%.1f Hz)' % (EOD_count, dom_freq), fontsize=14) - # No xlim needed for pulse-fish, because of defined start and end in eod_waveform function of eodanalysis.py - else: - ax4.set_title('Mean EOD (%.0f EODs; Dominant frequency: %.1f Hz)' % (EOD_count, dom_freq), fontsize=14) - ax4.set_xlim([-600 * period, 600 * period]) - ############### - - # plot meta data - ax5.set_frame_on(False) - ax5.get_xaxis().set_visible(False) - ax5.get_yaxis().set_visible(False) - - # fishtype = 'pulse' if pulse_fish_width and pulse_fish_psd else 'wave' - # ax5.text(0.1, 0.9, 'wave-/pulsefish detected:', fontsize=14) - # ax5.text(0.6, 0.9, '%s / %s' %('-' if pulse_fish_psd else '+', '+' if pulse_fish_width or pulse_fish_psd else '-'), - # fontsize=14) - - # ax5.text(0.1, 0.9, 'fishtype:', fontsize=14) - # ax5.text(0.6, 0.9, '%s' %fishtype, fontsize=14) - - # ax5.text(0.1, 0.7, '# detected fish:', fontsize=14) - # ax5.text(0.6, 0.7, '%.0f' % fish_count, fontsize=14) - # - # if fishtype is 'wave': - # ax5.text(0.1, 0.5, 'dominant frequency:', fontsize=14) - # ax5.text(0.6, 0.5, '%.1f Hz' % dom_freq, fontsize=14) - # else: - # ax5.text(0.1, 0.5, 'Mean pulse frequency:', fontsize=14) - # ax5.text(0.6, 0.5, '%.1f Hz' % dom_freq, fontsize=14) - # - # ax5.text(0.1, 0.3, '# detected EODs:', fontsize=14) - # ax5.text(0.6, 0.3, '%.0f' %EOD_count, fontsize=14) - # - # ax5.text(0.1, 0.1, 'median EOD interval:', fontsize=14) - # ax5.text(0.6, 0.1, '%.2f ms' % (median_IPI* 1000), fontsize=14) + eodaxes = [ax4, ax5] + for axeod, mean_eod, props in zip(eodaxes[:2], mean_eods[:2], eod_props[0:2]): + eod_waveform_plot(mean_eod[:,0], mean_eod[:,1], mean_eod[:,2], axeod, unit=unit) + axeod.set_title('Average EOD of %.1f Hz %sfish (n=%d EODs)' + % (props['EODf'], props['type'], props['n']), fontsize=14, y=1.05) + if props['type'] == 'wave': + lim = 750.0/props['EODf'] + axeod.set_xlim([-lim, +lim]) + else: + break ################ # plot inter EOD interval histogram - tmp_period = 1000. / dom_freq - tmp_period = tmp_period - tmp_period % 0.05 - inter_eod_intervals *= 1000. # transform sec in msec - median_IPI *= 1000. # tranform sec in msec - n, edges = np.histogram(inter_eod_intervals, bins=np.arange(tmp_period - 5., tmp_period + 5., 0.05)) - - ax6.bar(edges[:-1], n, edges[1] - edges[0] - 0.001) - ax6.plot([median_IPI, median_IPI], [0, max(n)], '--', color='red', lw=2, label='median %.2f ms' % median_IPI) - ax6.set_xlabel('inter EOD interval [ms]') - ax6.set_ylabel('n') - ax6.set_title('Inter EOD interval histogram', fontsize=14) - - max_IPI = np.ceil(np.max(inter_eod_intervals)+0.5) - if max_IPI/median_IPI < 1.2: - max_IPI = np.ceil(1.2*median_IPI) - ax6.set_xlim(0.0, max_IPI) - ax6.legend(loc='upper right', frameon=False) + if len(inter_eod_intervals)>2: + tmp_period = 1000. * np.mean(inter_eod_intervals) + tmp_period = tmp_period - tmp_period % 0.05 + inter_eod_intervals *= 1000. # transform sec in msec + median_IPI = 1000. * eod_props[0]['medianinterval'] + n, edges = np.histogram(inter_eod_intervals, bins=np.arange(tmp_period - 5., tmp_period + 5., 0.05)) + + ax5.bar(edges[:-1], n, edges[1] - edges[0] - 0.001) + ax5.plot([median_IPI, median_IPI], [0, np.max(n)], '--', color='red', lw=2, label='median %.2f ms' % median_IPI) + ax5.set_xlabel('inter EOD interval [ms]') + ax5.set_ylabel('n') + ax5.set_title('Inter EOD interval histogram', fontsize=14, y=1.05) + + max_IPI = np.ceil(np.max(inter_eod_intervals)+0.5) + if max_IPI/median_IPI < 1.2: + max_IPI = np.ceil(1.2*median_IPI) + min_IPI = np.floor(np.min(inter_eod_intervals)-0.5) + if min_IPI/median_IPI > 0.8: + min_IPI = np.floor(0.8*median_IPI) + ax5.set_xlim(min_IPI, max_IPI) + ax5.legend(loc='upper right', frameon=False) # cosmetics - for ax in [ax2, ax3, ax4, ax6]: + for ax in [ax2, ax3, ax4, ax5]: ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.get_xaxis().tick_bottom() @@ -216,11 +189,16 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False, # configuration options: cfg = ConfigFile() cfg.add_section('Power spectrum estimation:') - cfg.add('frequencyResolution', 0.5, 'Hz', 'Frequency resolution of the power spectrum.') + cfg.add('frequencyResolution', 0.2, 'Hz', 'Frequency resolution of the power spectrum.') + cfg.add('numberPSDWindows', 2, '', 'Number of windows on which power spectra are computed.') + cfg.add('numberPSDResolutions', 1, '', 'Number of power spectra computed within each window with decreasing resolution.') + cfg.add('frequencyThreshold', 1.0, 'Hz', 'The fundamental frequency of each fish needs to be detected in each power spectrum within this threshold.') + # TODO: make this threshold dependent on frequency resolution! add_psd_peak_detection_config(cfg) add_harmonic_groups_config(cfg) add_clip_config(cfg) add_best_window_config(cfg, win_size=8.0, w_cv_ampl=10.0) + add_check_pulse_width_config(cfg) # load configuration from working directory and data directories: cfg.load_files(cfgfile, filename, 3, verbose) @@ -233,7 +211,7 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False, else: print('write configuration to %s ...' % save_config) cfg.dump(save_config) - return '' + return None # check data file: if len(filename) == 0: @@ -250,11 +228,15 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False, return 'invalid channel %d' % channel # load data: - raw_data, samplerate, unit = load_data(filename, channel) + try: + raw_data, samplerate, unit = load_data(filename, channel, verbose=verbose) + except IOError as e: + return 'failed to open file %s: %s' % (filename, str(e)) if len(raw_data) == 0: return 'empty data file %s' % filename # calculate best_window: + found_bestwindow = True min_clip = cfg.value('minClipAmplitude') max_clip = cfg.value('maxClipAmplitude') if min_clip == 0.0 or max_clip == 0.0: @@ -264,71 +246,146 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False, min_clip=min_clip, max_clip=max_clip, **best_window_args(cfg)) except UserWarning as e: - print(str(e)) - return '' + print('best_window: ' + str(e)) + found_bestwindow = False + idx0 = 0 + idx1 = len(raw_data) data = raw_data[idx0:idx1] # pulse-type fish? - # TODO: add configuration parameter for check_pulse()! - pulse_fish_width, pta_value = check_pulse_width(data, samplerate) + pulse_fish, pta_value, pulse_period = \ + check_pulse_width(data, samplerate, verbose=verbose, + **check_pulse_width_args(cfg)) - # calculate powerspectra with different frequency resolutions: + # calculate powerspectra within sequential windows and different frequency resolutions: + numpsdwindows = cfg.value('numberPSDWindows') + numpsdresolutions = cfg.value('numberPSDResolutions') minfres = cfg.value('frequencyResolution') - psd_data = multi_resolution_psd(data, samplerate, fresolution=[minfres, 2*minfres, 4*minfres]) - + fresolution=[minfres] + for i in range(1, numpsdresolutions): + fresolution.append(2*fresolution[-1]) + n_incr = len(data)//(numpsdwindows+1) # half overlapping + psd_data = [] + deltaf = minfres + for k in range(numpsdwindows): + mr_psd_data = multi_resolution_psd(data[k*n_incr:(k+2)*n_incr], samplerate, + fresolution=fresolution) + deltaf = mr_psd_data[0][1][1] - mr_psd_data[0][1][0] + psd_data.extend(mr_psd_data) + # find the fishes in the different powerspectra: + h_kwargs = psd_peak_detection_args(cfg) + h_kwargs.update(harmonic_groups_args(cfg)) fishlists = [] - for i in range(len(psd_data)): - h_kwargs = psd_peak_detection_args(cfg) - h_kwargs.update(harmonic_groups_args(cfg)) - fishlist = harmonic_groups(psd_data[i][1], psd_data[i][0], verbose, **h_kwargs)[0] + for i, psd in enumerate(psd_data): + fishlist = harmonic_groups(psd[1], psd[0], verbose-1, **h_kwargs)[0] + if verbose > 0: + print('fundamental frequencies detected in power spectrum of window %d at resolution %d:' + % (i//numpsdresolutions, i%numpsdresolutions)) + if len(fishlist) > 0: + print(' ' + ' '.join(['%.1f' % freq[0, 0] for freq in fishlist])) + else: + print(' none') fishlists.append(fishlist) - - # find the psd_type: - # TODO: add configuration parameter for check_pulse()! - pulse_fish_psd, proportion = check_pulse_psd(psd_data[0][0], psd_data[0][1]) - # filter the different fishlists to get a fishlist with consistent fishes: - if not pulse_fish_width: # and not pulse_fish_psd: - # TODO: add configuration parameter for consistent_fishes()! - filtered_fishlist = consistent_fishes(fishlists) - # analyse eod waveform: - # TODO: add configuration parameter for eod_waveform()! - mean_eod, std_eod, time, eod_times = eod_waveform(data, samplerate, th_factor=0.6) - if save_csvs: - # write csv file with main EODF and corresponding power in dB of detected fishes: - csv_matrix = fundamental_freqs_and_db(filtered_fishlist) - csv_name = os.path.join(output_folder, outfilename + '-wavefish_eodfs.csv') + filtered_fishlist = consistent_fishes(fishlists, df_th=cfg.value('frequencyThreshold')) + if verbose > 0: + if len(filtered_fishlist) > 0: + print('fundamental frequencies consistent in all power spectra:') + print(' ' + ' '.join(['%.1f' % freq[0, 0] for freq in filtered_fishlist])) + else: + print('no fundamental frequencies are consistent in all power spectra') + + # remove multiples of pulse fish frequency from fishlist: + if pulse_fish: + fishlist = [] + for fish in filtered_fishlist: + eodf = fish[0,0] + n = np.round(eodf*pulse_period) + pulse_eodf = n/pulse_period + if verbose > 0: + print('check wavefish at %.1f Hz versus %d-th harmonic of pulse frequency %.1f Hz at resolution of %.1f Hz' + % (eodf, n, pulse_eodf, 0.5*n*deltaf)) + # TODO: make the threshold a parameter! + if np.abs(eodf-pulse_eodf) > 0.5*n*deltaf: + fishlist.append(fish) + else: + if verbose > 0: + print('removed frequency %.1f Hz, because it is multiple of pulsefish %.1f' % (eodf, pulse_eodf)) + else: + fishlist = filtered_fishlist + + # save fish frequencies and amplitudes: + if save_csvs and found_bestwindow: + if len(fishlist) > 0: + # write csv file with main EODf and corresponding power in dB of detected fishes: + csv_matrix = fundamental_freqs_and_power(fishlist, cfg.value('powerNHarmonics')) + csv_name = os.path.join(output_folder, outfilename + '-wavefish-eodfs.csv') header = ['fundamental frequency (Hz)', 'power (dB)'] write_csv(csv_name, header, csv_matrix) - else: - filtered_fishlist = [] - # analyse eod waveform: + #if pulse_fish: + # TODO: write frequency amd parameter of pulse-fish to -pulsefish_eodfs.csv + + # analyse eod waveform: + eod_props = [] + mean_eods = [] + inter_eod_intervals = [] + if pulse_fish: mean_eod_window = 0.002 - # TODO: add configuration parameter for eod_waveform()! - mean_eod, std_eod, time, eod_times = eod_waveform(data, samplerate, th_factor=0.5, start=-mean_eod_window, - stop=mean_eod_window) - - # write mean EOD - if save_csvs: - header = ['time (ms)', 'mean', 'std'] - write_csv(os.path.join(output_folder, outfilename + '-mean_waveform.csv'), header, - np.column_stack((1000.0 * time, mean_eod, std_eod))) - - period = np.mean(np.diff(eod_times)) - - # analyze inter-peak intervals: - inter_peak_intervals = np.diff(eod_times) # in sec - lower_perc, upper_perc = np.percentile(inter_peak_intervals, [1, 100 - 1]) - inter_eod_intervals = inter_peak_intervals[(inter_peak_intervals > lower_perc) & - (inter_peak_intervals < upper_perc)] - median_IPI = np.median(inter_eod_intervals) - std_IPI = np.std(inter_eod_intervals, ddof=1) + mean_eod, eod_times = \ + eod_waveform(data, samplerate, + percentile=cfg.value('pulseWidthPercentile'), + th_factor=cfg.value('pulseWidthThresholdFactor'), + start=-mean_eod_window, stop=mean_eod_window) + mean_eods.append(mean_eod) + eod_props.append({'type': 'pulse', + 'n': len(eod_times), + 'EODf': 1.0/pulse_period, + 'period': pulse_period}) + # analyze inter-pulse intervals: + inter_pulse_intervals = np.diff(eod_times) # in sec + lower_perc, upper_perc = np.percentile(inter_pulse_intervals, [1, 100 - 1]) + inter_eod_intervals = inter_pulse_intervals[(inter_pulse_intervals > lower_perc) & + (inter_pulse_intervals < upper_perc)] + if len(inter_eod_intervals) > 2: + eod_props[-1]['medianinterval'] = np.median(inter_eod_intervals) + eod_props[-1]['stdinterval'] = np.std(inter_eod_intervals, ddof=1) + + # analyse EOD waveform of all wavefish: + powers = np.array([np.sum(fish[:cfg.value('powerNHarmonics'), 1]) + for fish in fishlist]) + for idx in np.argsort(-powers): + fish = fishlist[idx] + mean_eod, eod_times = \ + eod_waveform(data, samplerate, + percentile=cfg.value('pulseWidthPercentile'), + th_factor=cfg.value('pulseWidthThresholdFactor'), + period=1.0/fish[0,0]) + mean_eods.append(mean_eod) + eod_props.append({'type': 'wave', + 'n': len(eod_times), + 'EODf': fish[0,0], + 'power': decibel(np.sum(fish[:,1]))}) + + if not found_bestwindow: + pulsefish = False + fishlist = [] + eod_props = [] + mean_eods = [] + inter_eod_intervals = [] + + # write eod waveforms: + if save_csvs and found_bestwindow: + for i, mean_eod in enumerate(mean_eods): + header = ['time (s)', 'mean', 'std'] + write_csv(os.path.join(output_folder, outfilename + '-waveform-%d.csv' % i), + header, mean_eod) if save_plot or not save_csvs: - output_plot(outfilename, pulse_fish_width, pulse_fish_psd, len(eod_times), median_IPI, - inter_eod_intervals, raw_data, samplerate, idx0, idx1, filtered_fishlist, - period, time, mean_eod, std_eod, unit, psd_data, output_folder, + output_plot(outfilename, pulse_fish, + inter_eod_intervals, raw_data, samplerate, idx0, idx1, fishlist, + mean_eods, eod_props, unit, + psd_data, cfg.value('powerNHarmonics'), True, 3000.0, output_folder, save_plot=save_plot, show_plot=not save_csvs) @@ -339,7 +396,7 @@ def main(): # command line arguments: parser = argparse.ArgumentParser( description='Analyze EOD waveforms of weakly electric fish.', - epilog='by bendalab (2015-2017)') + epilog='by Benda-Lab (2015-2018)') parser.add_argument('--version', action='version', version=__version__) parser.add_argument('-v', action='count', dest='verbose', help='verbosity level') parser.add_argument('-c', '--save-config', nargs='?', default='', const=cfgfile, diff --git a/thunderfish/time_analysis.py b/thunderfish/time_analysis.py index 496ad4e4..33c90ba0 100644 --- a/thunderfish/time_analysis.py +++ b/thunderfish/time_analysis.py @@ -476,7 +476,7 @@ def plot_presence_time(all_presence_time, all_vanish_time, all_presence_freq, da _, dmf_p = scp.mannwhitneyu(male_day_presence_t, female_day_presence_t) _, nmf_p = scp.mannwhitneyu(male_night_presence_t, female_night_presence_t) - print ('\n Presence duration:') + print('\n Presence duration:') print('Day - Night - all: p=%.3f' % (dn_p * 5.)) print('Day - Night - male: p=%.3f' % (dnm_p * 5.)) print('Day - Night - female: p=%.3f' % (dnf_p * 5.)) @@ -1021,7 +1021,7 @@ def plot_clock_rise_counts(all_rise_clock_counts, all_m_rise_clock_counts, all_f r, p = scp.spearmanr(fish_counts, rise_rate) print('\n rise rate vs. fish count') - print ('a rise -- a #: p = %.3f; r = %.2f' % (p * 7., r)) + print('a rise -- a #: p = %.3f; r = %.2f' % (p * 7., r)) if p < 0.05: ax1.plot([xedges[0], xedges[-1]], interc + sl * np.array([xedges[0], xedges[-1]]), color='red') ax1.set_xlim([xedges[0], xedges[-1]]) @@ -1039,7 +1039,7 @@ def plot_clock_rise_counts(all_rise_clock_counts, all_m_rise_clock_counts, all_f sl, interc, r, p, _ = scp.linregress(male_counts, m_rise_rate) # r, p = scp.spearmanr(male_counts, male_r_counts / male_counts) r, p = scp.spearmanr(male_counts, m_rise_rate) - print ('m rise -- m #: p = %.3f; r = %.2f' % (p * 7., r)) + print('m rise -- m #: p = %.3f; r = %.2f' % (p * 7., r)) if p < 0.05: ax2.plot([xedges[0], xedges[-1]], interc + sl * np.array([xedges[0], xedges[-1]]), color='red') ax2.set_xlim([xedges[0], xedges[-1]]) @@ -1057,7 +1057,7 @@ def plot_clock_rise_counts(all_rise_clock_counts, all_m_rise_clock_counts, all_f sl, interc, r, p, _ = scp.linregress(female_counts, m_rise_rate) # r, p = scp.spearmanr(female_counts, male_r_counts / male_counts) r, p = scp.spearmanr(female_counts, m_rise_rate) - print ('m rise -- f #: p = %.3f; r = %.2f' % (p * 7., r)) + print('m rise -- f #: p = %.3f; r = %.2f' % (p * 7., r)) if p < 0.05: ax3.plot([xedges[0], xedges[-1]], interc + sl * np.array([xedges[0], xedges[-1]]), color='red') ax3.set_xlim([xedges[0], xedges[-1]]) @@ -1075,7 +1075,7 @@ def plot_clock_rise_counts(all_rise_clock_counts, all_m_rise_clock_counts, all_f sl, interc, r, p, _ = scp.linregress(fish_counts, m_rise_rate) # r, p = scp.spearmanr(fish_counts, male_r_counts / male_counts) r, p = scp.spearmanr(fish_counts, m_rise_rate) - print ('m rise -- a #: p = %.3f; r = %.2f' % (p * 7., r)) + print('m rise -- a #: p = %.3f; r = %.2f' % (p * 7., r)) if p < 0.05: ax4.plot([xedges[0], xedges[-1]], interc + sl * np.array([xedges[0], xedges[-1]]), color='red') ax4.set_xlim([xedges[0], xedges[-1]]) @@ -1093,7 +1093,7 @@ def plot_clock_rise_counts(all_rise_clock_counts, all_m_rise_clock_counts, all_f sl, interc, r, p, _ = scp.linregress(male_counts, f_rise_rate) # r, p = scp.spearmanr(male_counts, female_r_counts / female_counts) r, p = scp.spearmanr(male_counts, f_rise_rate) - print ('f rise -- m #: p = %.3f; r = %.2f' % (p * 7., r)) + print('f rise -- m #: p = %.3f; r = %.2f' % (p * 7., r)) if p < 0.05: ax5.plot([xedges[0], xedges[-1]], interc + sl * np.array([xedges[0], xedges[-1]]), color='red') ax5.set_xlim([xedges[0], xedges[-1]]) @@ -1111,7 +1111,7 @@ def plot_clock_rise_counts(all_rise_clock_counts, all_m_rise_clock_counts, all_f sl, interc, r, p, _ = scp.linregress(female_counts, f_rise_rate) # r, p = scp.spearmanr(female_counts, female_r_counts / female_counts) r, p = scp.spearmanr(female_counts, f_rise_rate) - print ('f rise -- f #: p = %.3f; r = %.2f' % (p * 7., r)) + print('f rise -- f #: p = %.3f; r = %.2f' % (p * 7., r)) if p < 0.05: ax6.plot([xedges[0], xedges[-1]], interc + sl * np.array([xedges[0], xedges[-1]]), color='red') ax6.set_xlim([xedges[0], xedges[-1]]) @@ -1129,7 +1129,7 @@ def plot_clock_rise_counts(all_rise_clock_counts, all_m_rise_clock_counts, all_f sl, interc, r, p, _ = scp.linregress(fish_counts, f_rise_rate) # r, p = scp.spearmanr(fish_counts, female_r_counts / female_counts) r, p = scp.spearmanr(fish_counts, f_rise_rate) - print ('f rise -- a #: p = %.3f; r = %.2f' % (p * 7., r)) + print('f rise -- a #: p = %.3f; r = %.2f' % (p * 7., r)) if p < 0.05: ax7.plot([xedges[0], xedges[-1]], interc + sl * np.array([xedges[0], xedges[-1]]), color='red') ax7.set_xlim([xedges[0], xedges[-1]]) diff --git a/thunderfish/version.py b/thunderfish/version.py index 6d127f2d..74d7938f 100644 --- a/thunderfish/version.py +++ b/thunderfish/version.py @@ -1 +1 @@ -__version__='0.5.0' # see http://semver.org/ +__version__='1.2.0' # see http://semver.org/