From 8a39152a50c656b9839f8b817619260ab46e9e10 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Thu, 1 Nov 2018 00:08:43 +0100
Subject: [PATCH 01/15] [eodanalysis] fit wavefish eods with fourier series

---
 thunderfish/eodanalysis.py | 128 +++++++++++++++++++++++++++++++++----
 thunderfish/thunderfish.py |   8 ++-
 2 files changed, 120 insertions(+), 16 deletions(-)

diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py
index 0845835b..e3e45fea 100644
--- a/thunderfish/eodanalysis.py
+++ b/thunderfish/eodanalysis.py
@@ -3,12 +3,14 @@
 
 ## Main functions
 - `eod_waveform()`: compute an averaged EOD waveform.
+- `analyze_wave()`: analyze the EOD waveform of a wave-type fish.
 
 ## Visualization
 - `eod_waveform_plot()`: plot the averaged waveform with standard deviation.
 """
 
 import numpy as np
+from scipy.optimize import curve_fit
 from .eventdetection import percentile_threshold, detect_peaks, snippets
 
 
@@ -81,17 +83,101 @@ def eod_waveform(data, samplerate, th_factor=0.6, percentile=0.1,
     mean_eod[:,2] = np.std(eod_snippets, axis=0, ddof=1)
 
     # time axis:
-    time = (np.arange(len(mean_eod)) + start_inx) / samplerate
-    if period is not None:
-        # move peak of waveform to zero:
-        offs = len(mean_eod[:,1])//4
-        time -= time[offs+np.argmax(mean_eod[offs:3*offs,1])]
-    mean_eod[:,0] = time
+    mean_eod[:,0] = (np.arange(len(mean_eod)) + start_inx) / samplerate
     
     return mean_eod, eod_times
 
 
-def eod_waveform_plot(time, mean_eod, std_eod, ax, unit='a.u.', **kwargs):
+def sinewaves(t, freq, delay, ampl, *ap):
+    """
+    Parameters
+    ----------
+    t: float or array
+        Time.
+    freq: float
+        Fundamental frequency.
+    delay: float
+        Shift of sinewaves in time.
+    ampl: float
+        Amplitude of the sinewave with the fundamental frequency.
+    *ap: list of floats
+        The relative amplitudes and phases of the harmonics.
+        
+    Returns
+    -------
+    x: float or array
+        The Fourier series evaluated at times `t`.
+    """
+    tt = t - delay
+    omega = 2.0*np.pi*freq
+    x = np.sin(omega*tt)
+    for k, (a, p) in enumerate(zip(ap[0:-1:2], ap[1::2])):
+        x += a*np.sin((k+2)*omega*tt+p)
+    return ampl*x
+
+
+def analyze_wave(eod, freq, props={}, n_harm=6):
+    """
+    Analyze the EOD waveform of a wave-type fish.
+    
+    Parameters
+    ----------
+    eod: 2-D array
+        The eod waveform. First column is time in seconds, second column the eod waveform.
+        Further columns are optional but not used.
+    freq: float
+        The frequency of the EOD.
+    props: dict
+        Dictionary with properties of the analyzed EOD waveform.
+    n_harm: int
+        Maximum number of harmonics used for the fit.
+    
+    Returns
+    -------
+    meod: 2-D array
+        The eod waveform. First column is time in seconds, second column the eod waveform.
+        Further columns are kept from the input `eod`. And a column is added with the
+        fit of the fourier series to the waveform.
+    """
+    # storage:
+    meod = np.zeros((eod.shape[0], eod.shape[1]+1))
+    meod[:,:-1] = eod
+    
+    # subtract mean:
+    meod[:,1] -= np.mean(meod[:,1])
+
+    # flip:
+    if -np.min(meod[:,1]) > np.max(meod[:,1]):
+        meod[:,1] = -meod[:,1]
+    
+    # move peak of waveform to zero:
+    offs = len(meod[:,1])//4
+    meod[:,0] -= meod[offs+np.argmax(meod[offs:3*offs,1]),0]
+
+    # fit sine wave:
+    ampl = 0.5*(np.max(meod[:,1])-np.min(meod[:,1]))
+    params = [freq, -0.25/freq, ampl]
+    for i in range(1, n_harm):
+        params.extend([1.0/(i+1), 0.0])
+    popt, pcov = curve_fit(sinewaves, meod[:,0], meod[:,1], params)
+    for i in range(1, n_harm):
+        if popt[1+i*2] < 0.0:
+            popt[1+i*2] *= -1.0
+            popt[2+i*2] += np.pi
+        popt[2+i*2] %= 2.0*np.pi
+    meod[:,-1] = sinewaves(meod[:,0], *popt)
+
+    # store results:
+    mprops = dict(props)
+    mprops['amplitude'] = popt[2]
+    for i in range(1, n_harm):
+        mprops['frac%d' % i] = popt[1+i*2]
+        mprops['phase%d' % i] = popt[1+i*2]
+    
+    return meod, mprops
+
+
+def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, ax, unit='a.u.', **kwargs):
     """Plot mean eod and its standard deviation.
 
     Parameters
@@ -102,6 +188,8 @@ def eod_waveform_plot(time, mean_eod, std_eod, ax, unit='a.u.', **kwargs):
         Mean EOD waveform.
     std_eod: 1-D array
         Standard deviation of EOD waveform.
+    fit_eod: 1-D array
+        Fit of the EOD waveform.
     ax:
         Axis for plot
     unit: string
@@ -109,15 +197,29 @@ def eod_waveform_plot(time, mean_eod, std_eod, ax, unit='a.u.', **kwargs):
     kwargs: dict
         Arguments passed on to the plot command for the mean eod.
     """
-    if not 'lw' in kwargs:
-        kwargs['lw'] = 2
-    if not 'color' in kwargs:
-        kwargs['color'] = 'r'
+    fkwargs = {}
+    if 'flw' in kwargs:
+        fkwargs['lw'] = kwargs['flw']
+        del kwargs['flw']
+    else:
+        fkwargs['lw'] = 6
+    if 'fcolor' in kwargs:
+        fkwargs['color'] = kwargs['fcolor']
+        del kwargs['fcolor']
+    else:
+        fkwargs['color'] = 'steelblue'
     ax.autoscale(True)
-    ax.plot(1000.0*time, mean_eod, **kwargs)
+    if len(fit_eod)>0:
+        ax.plot(1000.0*time, fit_eod, zorder=2, **fkwargs)
+    ckwargs = dict(kwargs)
+    if not 'lw' in ckwargs:
+        ckwargs['lw'] = 2
+    if not 'color' in ckwargs:
+        ckwargs['color'] = 'r'
+    ax.plot(1000.0*time, mean_eod, zorder=3, **ckwargs)
     ax.autoscale(False)
     ax.fill_between(1000.0*time, mean_eod + std_eod, mean_eod - std_eod,
-                    color='grey', alpha=0.3)
+                    color='grey', alpha=0.3, zorder=1)
     ax.set_xlabel('Time [msec]')
     ax.set_ylabel('Amplitude [%s]' % unit)
 
diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py
index ab6d9260..ae03469e 100644
--- a/thunderfish/thunderfish.py
+++ b/thunderfish/thunderfish.py
@@ -21,7 +21,7 @@
 from .powerspectrum import decibel, plot_decibel_psd, multi_resolution_psd
 from .harmonicgroups import harmonic_groups, harmonic_groups_args, psd_peak_detection_args, fundamental_freqs, fundamental_freqs_and_power, colors_markers, plot_harmonic_groups
 from .consistentfishes import consistent_fishes
-from .eodanalysis import eod_waveform_plot, eod_waveform
+from .eodanalysis import eod_waveform, analyze_wave, eod_waveform_plot
 from .csvmaker import write_csv
 
 
@@ -135,7 +135,8 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
     # plot mean EOD
     eodaxes = [ax4, ax5]
     for axeod, mean_eod, props in zip(eodaxes[:2], mean_eods[:2], eod_props[0:2]):
-        eod_waveform_plot(mean_eod[:,0], mean_eod[:,1], mean_eod[:,2], axeod, unit=unit)
+        eod_waveform_plot(mean_eod[:,0], mean_eod[:,1], mean_eod[:,2], mean_eod[:,3],
+                          axeod, unit=unit)
         axeod.set_title('Average EOD of %.1f Hz %sfish (n=%d EODs)'
                         % (props['EODf'], props['type'], props['n']), fontsize=14, y=1.05)
         if props['type'] == 'wave':
@@ -361,11 +362,12 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                          percentile=cfg.value('pulseWidthPercentile'),
                          th_factor=cfg.value('pulseWidthThresholdFactor'),
                          period=1.0/fish[0,0])
-        mean_eods.append(mean_eod)
         eod_props.append({'type': 'wave',
                           'n': len(eod_times),
                           'EODf': fish[0,0],
                           'power': decibel(np.sum(fish[:,1]))})
+        mean_eod, eod_props[-1] = analyze_wave(mean_eod, fish[0,0], eod_props[-1])
+        mean_eods.append(mean_eod)
         
     if not found_bestwindow:
         pulsefish = False

From 4903e37da4400e253fd3d08479c3aab6c37fa17a Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Thu, 1 Nov 2018 10:30:21 +0100
Subject: [PATCH 02/15] [eodanalysis] store spectral properties

---
 thunderfish/eodanalysis.py | 22 ++++++++++++++++------
 thunderfish/thunderfish.py | 12 +++++++++---
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py
index e3e45fea..9cc96cbb 100644
--- a/thunderfish/eodanalysis.py
+++ b/thunderfish/eodanalysis.py
@@ -134,10 +134,18 @@ def analyze_wave(eod, freq, props={}, n_harm=6):
     
     Returns
     -------
-    meod: 2-D array
+    meod: 2-D array of floats
         The eod waveform. First column is time in seconds, second column the eod waveform.
         Further columns are kept from the input `eod`. And a column is added with the
         fit of the fourier series to the waveform.
+    props: dict
+        The `props` dictionary with added properties of the analyzed EOD waveform.
+    spec_data: 2-D array of floats
+        First column is the index of the harmonics, second column its frequency,
+        third column its relative amplitude, and fourth column the phase shift
+        relative to the fundamental. Rows are the harmonics,
+        first row is the fundamental frequency with index 0, amplitude of one, and
+        phase shift of zero.
     """
     # storage:
     meod = np.zeros((eod.shape[0], eod.shape[1]+1))
@@ -165,16 +173,18 @@ def analyze_wave(eod, freq, props={}, n_harm=6):
             popt[1+i*2] *= -1.0
             popt[2+i*2] += np.pi
         popt[2+i*2] %= 2.0*np.pi
+        if popt[2+i*2] > np.pi:
+            popt[2+i*2] -= 2.0*np.pi
     meod[:,-1] = sinewaves(meod[:,0], *popt)
 
     # store results:
-    mprops = dict(props)
-    mprops['amplitude'] = popt[2]
+    props['amplitude'] = popt[2]
+    spec_data = np.zeros((n_harm, 4))
+    spec_data[0, :] = [1.0, freq, 1.0, 0.0]
     for i in range(1, n_harm):
-        mprops['frac%d' % i] = popt[1+i*2]
-        mprops['phase%d' % i] = popt[1+i*2]
+        spec_data[i, :] = [i+1, (i+1)*freq, popt[1+i*2], popt[1+i*2]]
     
-    return meod, mprops
+    return meod, props, spec_data
 
 
 def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, ax, unit='a.u.', **kwargs):
diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py
index ae03469e..05251a60 100644
--- a/thunderfish/thunderfish.py
+++ b/thunderfish/thunderfish.py
@@ -330,6 +330,7 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
     # analyse eod waveform:
     eod_props = []
     mean_eods = []
+    spec_data = []
     inter_eod_intervals = []
     if pulse_fish:
         mean_eod_window = 0.002
@@ -339,6 +340,7 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                          th_factor=cfg.value('pulseWidthThresholdFactor'),
                          start=-mean_eod_window, stop=mean_eod_window)
         mean_eods.append(mean_eod)
+        spec_data.append([])
         eod_props.append({'type': 'pulse',
                           'n': len(eod_times),
                           'EODf': 1.0/pulse_period,
@@ -366,8 +368,9 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                           'n': len(eod_times),
                           'EODf': fish[0,0],
                           'power': decibel(np.sum(fish[:,1]))})
-        mean_eod, eod_props[-1] = analyze_wave(mean_eod, fish[0,0], eod_props[-1])
+        mean_eod, eod_props[-1], sdata = analyze_wave(mean_eod, fish[0,0], eod_props[-1])
         mean_eods.append(mean_eod)
+        spec_data.append(sdata)
         
     if not found_bestwindow:
         pulsefish = False
@@ -378,10 +381,13 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
 
     # write eod waveforms:
     if save_csvs and found_bestwindow:
-        for i, mean_eod in enumerate(mean_eods):
-            header = ['time (s)', 'mean', 'std']
+        for i, (mean_eod, sdata) in enumerate(zip(mean_eods, spec_data)):
+            header = ['time (s)', 'mean', 'std', 'fit']
             write_csv(os.path.join(output_folder, outfilename + '-waveform-%d.csv' % i),
                       header, mean_eod)
+            header = ['harmonics', 'frequency (Hz)', 'amplitude', 'phase']
+            write_csv(os.path.join(output_folder, outfilename + '-spectrum-%d.csv' % i),
+                      header, sdata)
 
     if save_plot or not save_csvs:
         output_plot(outfilename, pulse_fish,

From c66c8871e2686bb6579c5b5880d7dc0499f964cf Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Thu, 1 Nov 2018 23:37:18 +0100
Subject: [PATCH 03/15] [thunderfish] skip bad waveforms

---
 thunderfish/eodanalysis.py | 31 ++++++++++++++++++++++++-------
 thunderfish/thunderfish.py | 28 +++++++++++++++++++---------
 2 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py
index 9cc96cbb..4b8db1ab 100644
--- a/thunderfish/eodanalysis.py
+++ b/thunderfish/eodanalysis.py
@@ -139,13 +139,21 @@ def analyze_wave(eod, freq, props={}, n_harm=6):
         Further columns are kept from the input `eod`. And a column is added with the
         fit of the fourier series to the waveform.
     props: dict
-        The `props` dictionary with added properties of the analyzed EOD waveform.
+        The `props` dictionary with added properties of the analyzed EOD waveform:
+        - p-p-amplitude: peak-to-peak amplitude of the Fourier fit.
+        - amplitude: amplitude factor of the Fourier fit.
+        - rmserror: root-mean-square error between Fourier-fit and EOD waveform relative to
+          the p-p amplitude. If larger than 0.05 the data are bad.
     spec_data: 2-D array of floats
         First column is the index of the harmonics, second column its frequency,
         third column its relative amplitude, and fourth column the phase shift
-        relative to the fundamental. Rows are the harmonics,
-        first row is the fundamental frequency with index 0, amplitude of one, and
-        phase shift of zero.
+        relative to the fundamental.
+        Rows are the harmonics, first row is the fundamental frequency with index 0,
+        amplitude of one, and phase shift of zero.
+        If the amplitude of the first harmonic (spec-data[1,3]) is larger than 2,
+        or the amplitude if the second harmonic (spec-data[2,3]) is larger than 0.2,
+        then the EOD waveform has the wrong waveform and
+        should not be used for further analysis.
     """
     # storage:
     meod = np.zeros((eod.shape[0], eod.shape[1]+1))
@@ -169,20 +177,29 @@ def analyze_wave(eod, freq, props={}, n_harm=6):
         params.extend([1.0/(i+1), 0.0])
     popt, pcov = curve_fit(sinewaves, meod[:,0], meod[:,1], params)
     for i in range(1, n_harm):
+        # make all amplitudes positive:
         if popt[1+i*2] < 0.0:
             popt[1+i*2] *= -1.0
             popt[2+i*2] += np.pi
+        # all phases in the range 0 to 2 pi:
         popt[2+i*2] %= 2.0*np.pi
-        if popt[2+i*2] > np.pi:
+        # all phases except of 2nd harmonic in the range -pi to pi:
+        if popt[2+i*2] > np.pi and i != 2:
             popt[2+i*2] -= 2.0*np.pi
     meod[:,-1] = sinewaves(meod[:,0], *popt)
 
+    # fit error:
+    ppampl = np.max(meod[:,3]) - np.min(meod[:,3])
+    rmserror = np.sqrt(np.mean((meod[:,1] - meod[:,3])**2.0))/ppampl
+
     # store results:
+    props['p-p-amplitude'] = ppampl
     props['amplitude'] = popt[2]
+    props['rmserror'] = rmserror
     spec_data = np.zeros((n_harm, 4))
-    spec_data[0, :] = [1.0, freq, 1.0, 0.0]
+    spec_data[0, :] = [0.0, freq, 1.0, 0.0]
     for i in range(1, n_harm):
-        spec_data[i, :] = [i+1, (i+1)*freq, popt[1+i*2], popt[1+i*2]]
+        spec_data[i, :] = [i, (i+1)*freq, popt[1+i*2], popt[2+i*2]]
     
     return meod, props, spec_data
 
diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py
index 05251a60..99ad840c 100644
--- a/thunderfish/thunderfish.py
+++ b/thunderfish/thunderfish.py
@@ -13,10 +13,10 @@
 import matplotlib.pyplot as plt
 from .version import __version__
 from .configfile import ConfigFile
-from .harmonicgroups import add_psd_peak_detection_config, add_harmonic_groups_config
-from .bestwindow import add_clip_config, add_best_window_config, clip_args, best_window_args
 from .dataloader import load_data
+from .bestwindow import add_clip_config, add_best_window_config, clip_args, best_window_args
 from .bestwindow import clip_amplitudes, best_window_indices
+from .harmonicgroups import add_psd_peak_detection_config, add_harmonic_groups_config
 from .checkpulse import check_pulse_width, add_check_pulse_width_config, check_pulse_width_args
 from .powerspectrum import decibel, plot_decibel_psd, multi_resolution_psd
 from .harmonicgroups import harmonic_groups, harmonic_groups_args, psd_peak_detection_args, fundamental_freqs, fundamental_freqs_and_power, colors_markers, plot_harmonic_groups
@@ -364,13 +364,23 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                          percentile=cfg.value('pulseWidthPercentile'),
                          th_factor=cfg.value('pulseWidthThresholdFactor'),
                          period=1.0/fish[0,0])
-        eod_props.append({'type': 'wave',
-                          'n': len(eod_times),
-                          'EODf': fish[0,0],
-                          'power': decibel(np.sum(fish[:,1]))})
-        mean_eod, eod_props[-1], sdata = analyze_wave(mean_eod, fish[0,0], eod_props[-1])
-        mean_eods.append(mean_eod)
-        spec_data.append(sdata)
+        props = {'type': 'wave',
+                 'n': len(eod_times),
+                 'EODf': fish[0,0],
+                 'power': decibel(np.sum(fish[:,1]))}
+        mean_eod, props, sdata = analyze_wave(mean_eod, fish[0,0], props)
+        # only add good waveforms:
+        if (idx > 0 or clipped < 0.01) and sdata[1,2] < 2.0 and sdata[2,2] < 0.2 and props['rmserror'] < 0.05:
+            eod_props.append(props)
+            mean_eods.append(mean_eod)
+            spec_data.append(sdata)
+            if verbose > 0:
+                print('%d take waveform of %6.1fHz fish: clipped=%4.2f ampl1=%4.2f ampl2=%4.2f ampl3=%4.2f rmserror=%4.3f'
+                      % (idx, fish[0,0], clipped, sdata[1,2], sdata[2,2], sdata[3,2], props['rmserror']))
+        else:
+            if verbose > 0:
+                print('%d skip waveform of %.1fHz fish: clipped=%4.2f ampl1=%.2f ampl2=%.2f rmserror=%.3f'
+                      % (idx, fish[0,0], clipped, sdata[1,2], sdata[2,2], props['rmserror']))
         
     if not found_bestwindow:
         pulsefish = False

From c52c60c485f03741b1c092b86ac2e50c9b32f9f7 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Fri, 2 Nov 2018 08:53:02 +0100
Subject: [PATCH 04/15] [thunderfish] configureable waveform checks

---
 thunderfish/thunderfish.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py
index 99ad840c..65959fcc 100644
--- a/thunderfish/thunderfish.py
+++ b/thunderfish/thunderfish.py
@@ -200,7 +200,12 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
     add_clip_config(cfg)
     add_best_window_config(cfg, win_size=8.0, w_cv_ampl=10.0)
     add_check_pulse_width_config(cfg)
-
+    cfg.add_section('Waveform selection:')
+    cfg.add('maximumClippedFraction', 0.01, '', 'Take waveform of the fish with the highest power only if the fraction of clipped signals is below this value.')
+    cfg.add('maximumFirstHarmonicAmplitude', 2.0, '', 'Skip waveform of wave-type fish if the ampltude of the first harmonic is higher than this factor times the amplitude of the fundamental.')
+    cfg.add('maximumSecondHarmonicAmplitude', 0.2, '', 'Skip waveform of wave-type fish if the ampltude of the second harmonic is higher than this factor times the amplitude of the fundamental. That is, the waveform appears to have twice the frequency than the fundamental.')
+    cfg.add('maximumRMSError', 0.05, '', 'Skip waveform of wave-type fish if the root-mean-squared error relative to the peak-to-peak amplitude is larger than this number.')
+    
     # load configuration from working directory and data directories:
     cfg.load_files(cfgfile, filename, 3, verbose)
 
@@ -357,7 +362,7 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
     # analyse EOD waveform of all wavefish:
     powers = np.array([np.sum(fish[:cfg.value('powerNHarmonics'), 1])
                        for fish in fishlist])
-    for idx in np.argsort(-powers):
+    for k, idx in enumerate(np.argsort(-powers)):
         fish = fishlist[idx]
         mean_eod, eod_times = \
             eod_waveform(data, samplerate,
@@ -369,8 +374,11 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                  'EODf': fish[0,0],
                  'power': decibel(np.sum(fish[:,1]))}
         mean_eod, props, sdata = analyze_wave(mean_eod, fish[0,0], props)
-        # only add good waveforms:
-        if (idx > 0 or clipped < 0.01) and sdata[1,2] < 2.0 and sdata[2,2] < 0.2 and props['rmserror'] < 0.05:
+        # add good waveforms only:
+        if (k > 0 or clipped < cfg.value('maximumClippedFraction')) and \
+            sdata[1,2] < cfg.value('maximumFirstHarmonicAmplitude') and \
+            sdata[2,2] < cfg.value('maximumSecondHarmonicAmplitude') and \
+            props['rmserror'] < cfg.value('maximumRMSError'):
             eod_props.append(props)
             mean_eods.append(mean_eod)
             spec_data.append(sdata)

From c713c2a96364500ad8960ced2c635d6983774212 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Fri, 2 Nov 2018 09:34:01 +0100
Subject: [PATCH 05/15] [bestwindow] updated documentation

---
 thunderfish/bestwindow.py | 306 ++++++++++++++++++++++++--------------
 1 file changed, 196 insertions(+), 110 deletions(-)

diff --git a/thunderfish/bestwindow.py b/thunderfish/bestwindow.py
index e67e3a4b..62241bbf 100644
--- a/thunderfish/bestwindow.py
+++ b/thunderfish/bestwindow.py
@@ -36,33 +36,51 @@ def clip_amplitudes(data, win_indices, min_fac=2.0, nbins=20,
     If the bins at the edges are more than min_fac times as large as
     the neighboring bins, clipping at the bin's amplitude is assumed.
 
-    Args:
-      data (array): 1-D array with the data.
-      win_indices (int): size of the analysis window in indices.
-      min_fac (float): if the first or the second bin is at least min_fac times
+    Parameters
+    ----------
+    data: 1-D array
+        The data.
+    win_indices: int
+        Size of the analysis window in indices.
+    min_fac: float
+        If the first or the second bin is at least `min_fac` times
         as large as the third bin, their upper bin edge is set as min_clip.
         Likewise for the last and next-to last bin.
-      nbins (int): number of bins used for computing a histogram within min_ampl and max_ampl
-      min_ampl (float): minimum to be expected amplitude of the data
-      max_ampl (float): maximum to be expected amplitude of the data
-      plot_hist_func(data, winx0, winx1, bins, h,
-                     min_clip, max_clip, min_ampl, max_ampl, kwargs):
-        function for visualizing the histograms, is called for every window.
-        plot_clipping() is a simple function that can be passed as plot_hist_func.
-        data: the full data array
-        winx0: the start index of the current window
-        winx1: the end index of the current window
-        bins: the bin edges of the histogram
-        h: the histogram, plot it with plt.bar(bins[:-1], h, width=np.mean(np.diff(bins)))
-        min_clip: the current value of the minimum clip amplitude
-        max_clip: the current value of the minimum clip amplitude
-        min_ampl: the minimum amplitude of the data
-        max_ampl: the maximum amplitude of the data
-        kwargs: further user supplied key-word arguments.
-
-    Returns:
-      min_clip : minimum amplitude that is not clipped.
-      max_clip : maximum amplitude that is not clipped.
+    nbins: int
+        Number of bins used for computing a histogram within `min_ampl` and `max_ampl`.
+    min_ampl: float
+        Minimum to be expected amplitude of the data.
+    max_ampl: float
+        Maximum to be expected amplitude of the data
+    plot_hist_func: function
+        Function for visualizing the histograms, is called for every window.
+        plot_clipping() is a simple function that can be passed as `plot_hist_func`
+        to quickly visualize what is going on in `clip_amplitudes()`.
+        Signature:
+        ```
+        plot_hist_func(data, winx0, winx1, bins, h,
+                       min_clip, max_clip, min_ampl, max_ampl, kwargs):
+        ```
+        - `data` (array): the full data array.
+        - `winx0` (int): the start index of the current window.
+        - `winx1` (int): the end index of the current window.
+        - `bins` (array): the bin edges of the histogram.
+        - `h` (array): the histogram, plot it with
+           ```
+           plt.bar(bins[:-1], h, width=np.mean(np.diff(bins)))
+           ```
+        - `min_clip` (float): the current value of the minimum clip amplitude.
+        - `max_clip` (float): the current value of the minimum clip amplitude.
+        - `min_ampl` (float): the minimum amplitude of the data.
+        - `max_ampl` (float): the maximum amplitude of the data.
+        - `kwargs` (dict): further user supplied key-word arguments.
+
+    Returns
+    -------
+      min_clip: float
+          Minimum amplitude that is not clipped.
+      max_clip: float
+          Maximum amplitude that is not clipped.
     """
 
     min_clipa = min_ampl
@@ -90,7 +108,8 @@ def clip_amplitudes(data, win_indices, min_fac=2.0, nbins=20,
 
 def plot_clipping(data, winx0, winx1, bins,
                   h, min_clip, max_clip, min_ampl, max_ampl):
-    """Visualize the data histograms and the detected clipping amplitudes in clip_amplitudes().
+    """Visualize the data histograms and the detected clipping amplitudes.
+    Pass this function as the `plot_hist_func` argument to `clip_amplitudes()`.
     """
     plt.subplot(2, 1, 1)
     plt.plot(data[winx0:winx1], 'b')
@@ -111,11 +130,16 @@ def add_clip_config(cfg, min_clip=0.0, max_clip=0.0,
     """ Add parameter needed for clip_amplitudes() as
     a new section to a configuration.
 
-    Args:
-      cfg (ConfigFile): the configuration
-      min_clip (float): default minimum clip amplitude.
-      max_clip (float): default maximum clip amplitude.
-      See clip_amplitudes() for details on the remaining arguments.
+    Parameters
+    ----------
+    cfg: ConfigFile
+        The configuration.
+    min_clip: float
+        Default minimum clip amplitude.
+    max_clip: float
+        Default maximum clip amplitude.
+        
+    See `clip_amplitudes()` for details on the remaining arguments.
     """
 
     cfg.add_section('Clipping amplitudes:')
@@ -134,15 +158,23 @@ def clip_args(cfg, rate):
     respective parameter names of the function clip_amplitudes().
     The return value can then be passed as key-word arguments to this function.
 
-    Args:
-      cfg (ConfigFile): the configuration
-      rate (float): the sampling rate of the data
-
-    Returns:
-      a (dict): dictionary with names of arguments of the clip_amplitudes() function and their values as supplied by cfg.
+    Parameters
+    ----------
+    cfg: ConfigFile
+        The configuration.
+    rate: float
+        The sampling rate of the data.
+
+    Returns
+    -------
+    a: dict
+        Dictionary with names of arguments of the `clip_amplitudes()` function
+        and their values as supplied by `cfg`.
     """
-    a = cfg.map({'min_fac': 'minClipFactor', 'nbins': 'clipBins',
-                 'min_ampl': 'minDataAmplitude', 'max_ampl': 'maxDataAmplitude'})
+    a = cfg.map({'min_fac': 'minClipFactor',
+                 'nbins': 'clipBins',
+                 'min_ampl': 'minDataAmplitude',
+                 'max_ampl': 'maxDataAmplitude'})
     a['win_indices'] = int(cfg.value('clipWindow') * rate)
     return a
 
@@ -151,79 +183,112 @@ def best_window_indices(data, samplerate, single=True, win_size=1., win_shift=0.
                         th_factor=0.8, percentile=10.0, min_clip=-np.inf, max_clip=np.inf,
                         w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.5,
                         plot_data_func=None, **kwargs):
-    """ Detect the best window of the data to be analyzed. The data have
-    been sampled with rate Hz.
+    """Find the window within data most suitable for subsequent analysis.
     
     First, large peaks and troughs of the data are detected.  Peaks and
     troughs have to be separated in amplitude by at least the value of a
-    dynamic threshold.  The threshold is computed in win_shift wide
-    windows as thresh_ampl_fac times the interpercentile range at
-    the percentile and 100.0-percentile percentile of the data
-    using the eventdetection.percentile_threshold() function.
+    dynamic threshold.  The threshold is computed in `win_shift` wide
+    windows as `thresh_ampl_fac` times the interpercentile range at
+    the `percentile`-th and 100.0-`percentile`-th percentile of the data
+    using the `eventdetection.percentile_threshold()` function.
 
     Second, criteria for selecting the best window are computed for each
-    window of width win_size shifted by win_shift trough the data. The
+    window of width `win_size` shifted by `win_shift` trough the data. The
     three criteria are:
 
-    - the coefficient of variation of the inter-peak and inter-trough
-    intervals.
     - the mean peak-to-trough amplitude multiplied with the fraction of
-    non clipped peak and trough amplitudes.
+      non clipped peak and trough amplitudes.
     - the coefficient of variation of the peak-to-trough amplitude.
+    - the coefficient of variation of the inter-peak and inter-trough
+      intervals.
 
     Third, a cost function is computed as a weighted sum of the three
-    criteria (mean-amplitude is taken negatively). The weights are given
-    by w_cv_interv, w_ampl, and w_cv_ampl.
+    criteria (the mean amplitude is taken negatively). The respective
+    weights are given by `w_ampl`, `w_cv_ampl`, and `w_cv_interv`.
 
     Finally, a threshold is set to the minimum value of the cost
     function plus tolerance.  Then the largest region with the cost
     function below this threshold is selected as the best window.  If
-    single is True, then only the single window with smallest cost
+    `single` is `True`, then only the single window with smallest cost
     within the selected largest region is returned.
 
-    Data of the best window algorithm can be visualized by supplying the
-    function plot_data_func.  Additional arguments for this function can
-    be supplied via key-word arguments kwargs.
-
-    :param data: (1-D array). The data to be analyzed
-    :param samplerate: (float). Sampling rate of the data in Hz
-    :param single: (boolean). If true return only the single window with the smallest cost. If False return the largest window with the cost below the minimum cost plus tolerance.
-    :param win_size: (float). Size of the best window in seconds. Choose it large enough for a minimum analysis.
-    :param win_shift: (float). Time shift in seconds between windows. Should be smaller or equal to win_size and not smaller than about one tenth of win_shift.
-    :param percentile: (float). percentile parameter for the eventdetection.percentile_threshold() function used to estimate thresholds for detecting peaks in the data.
-    :param th_factor: (float). th_factor parameter for the eventdetection.percentile_threshold() function used to estimate thresholds for detecting peaks in the data.
-    :param min_clip: (float). Minimum amplitude below which data are clipped.
-    :param max_clip: (float). Maximum amplitude above which data are clipped.
-    :param w_cv_interv: (float). Weight for the coefficient of variation of the intervals.
-    :param w_ampl: (float). Weight for the mean peak-to-trough amplitude.
-    :param w_cv_ampl: (float). Weight for the coefficient of variation of the amplitudes.
-    :param tolerance: (float). Added to the minimum cost for selecting the region of best windows.
-    :param plot_data_func: Function for plotting the raw data, detected peaks and troughs, the criteria,
-    the cost function and the selected best window.
-    plot_best_window() is a simple function that can be passed as plot_data_func.
+    The data used by best window algorithm can be visualized by supplying the
+    function `plot_data_func`.  Additional arguments for this function can
+    be supplied via key-word arguments `kwargs`.
+
+    Parameters
+    ----------
+    data: 1-D array
+        The data to be analyzed.
+    samplerate: float
+        Sampling rate of the data in Hertz.
+    single: boolean
+        If `true` return only the single window with the smallest cost.
+        If `false` return the largest window with the cost below the minimum cost
+        plus tolerance.
+    win_size: float
+        Minimum size of the desired best window in seconds.
+        Choose it large enough for the subsequent analysis.
+    win_shift: float
+        Time shift in seconds between windows. Should be smaller or equal to `win_size`.
+    percentile: float
+        `percentile` parameter for the `eventdetection.percentile_threshold()` function
+        used to estimate thresholds for detecting peaks in the data.
+    th_factor: float
+        `th_factor` parameter for the eventdetection.percentile_threshold() function
+        used to estimate thresholds for detecting peaks in the data.
+    min_clip: float
+        Minimum amplitude below which data are clipped.
+    max_clip: float
+        Maximum amplitude above which data are clipped.
+    w_cv_interv: float
+        Weight for the coefficient of variation of the intervals between detected
+        peaks and throughs.
+    w_ampl: float
+        Weight for the mean peak-to-trough amplitude.
+    w_cv_ampl: float
+        Weight for the coefficient of variation of the amplitudes.
+    tolerance: float
+        Added to the minimum cost for expanding the region of the best window.
+    plot_data_func: function
+        Function for plotting the raw data, detected peaks and troughs, the criteria,
+        the cost function and the selected best window.
+        `plot_best_window()` is a simple function that can be passed as the `plot_data_func`
+        parameter to quickly visualize what is going on in selecting the best window.
+        Signature:
+        ````
         plot_data_func(data, rate, peak_idx, trough_idx, idx0, idx1,
                        win_start_times, cv_interv, mean_ampl, cv_ampl, clipped_frac, cost,
                        thresh, valid_wins, **kwargs)
-        :param data (array): the raw data.
-        :param rate (float): the sampling rate of the data.
-        :param peak_idx (array): indices into raw data indicating detected peaks.
-        :param trough_idx (array): indices into raw data indicating detected troughs.
-        :param idx0 (int): index of the start of the best window.
-        :param idx1 (int): index of the end of the best window.
-        :param win_start_times (array): the times of the analysis windows.
-        :param cv_interv (array): the coefficient of variation of the inter-peak and -trough intervals.
-        :param mean_ampl (array): the mean peak-to-trough amplitude.
-        :param cv_ampl (array): the coefficient of variation of the peak-to-trough amplitudes.
-        :param clipped_frac (array): the fraction of clipped peaks or troughs.
-        :param cost (array): the cost function.
-        :param thresh (float): the threshold for the cost function.
-        :param valid_wins (array): boolean array indicating the windows which fulfill all three criteria.
-        :param **kwargs: further user supplied key-word arguments.
-    :param kwargs: Keyword arguments passed to plot_data_func and plot_window_func. 
+        ```
+        - `data` (array): the raw data.
+        - `rate` (float): the sampling rate of the data.
+        - `peak_idx` (array): indices into raw data indicating detected peaks.
+        - `trough_idx` (array): indices into raw data indicating detected troughs.
+        - `idx0` (int): index of the start of the best window.
+        - `idx1` (int): index of the end of the best window.
+        - `win_start_times` (array): the times of the analysis windows.
+        - `cv_interv` (array): the coefficients of variation of the inter-peak and -trough
+           intervals.
+        - `mean_ampl` (array): the mean peak-to-trough amplitudes.
+        - `cv_ampl` (array): the coefficients of variation of the peak-to-trough amplitudes.
+        - `clipped_frac` (array): the fraction of clipped peaks or troughs.
+        - `cost` (array): the cost function.
+        - `thresh` (float): the threshold for the cost function.
+        - `valid_wins` (array): boolean array indicating the windows which fulfill
+          all three criteria.
+        - `**kwargs` (dict): further user supplied key-word arguments.
+    kwargs: dict
+        Keyword arguments passed to `plot_data_func`. 
     
-    :return start_index: int. Index of the start of the best window.
-    :return end_index: int. Index of the end of the best window.
-    :return clipped: float. The fraction of clipped peaks or troughs.
+    Returns
+    -------
+    start_index: int
+        Index of the start of the best window.
+    end_index: int
+        Index of the end of the best window.
+    clipped: float.
+        The fraction of clipped peaks or troughs.
     """
 
     # too little data:
@@ -331,12 +396,18 @@ def best_window_times(data, samplerate, single=True, win_size=1., win_shift=0.1,
                       th_factor=0.8, percentile=10.0, min_clip=-np.inf, max_clip=np.inf,
                       w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.5,
                       plot_data_func=None, **kwargs):
-    """Finds the window within data with the best data. See best_window_indices() for details.
-
-    Returns:
-      start_time (float): Time of the start of the best window.
-      end_time (float): Time of the end of the best window.
-      clipped (float): The fraction of clipped peaks or troughs.
+    """Find the window within data most suitable for subsequent analysis.
+
+    See `best_window_indices()` for details.
+
+    Returns
+    -------
+    start_time: float
+        Time of the start of the best window.
+    end_time: float
+        Time of the end of the best window.
+    clipped: float
+        The fraction of clipped peaks or troughs.
     """
     start_inx, end_inx, clipped = best_window_indices(data, samplerate, single,
                                                       win_size, win_shift,
@@ -351,11 +422,16 @@ def best_window(data, samplerate, single=True, win_size=1., win_shift=0.1,
                 th_factor=0.8, percentile=10.0, min_clip=-np.inf, max_clip=np.inf,
                 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.5,
                 plot_data_func=None, **kwargs):
-    """Finds the window within data with the best data. See best_window_indices() for details.
+    """Find the window within data most suitable for subsequent analysis.
+
+    See `best_window_indices()` for details.
 
-    Returns:
-      data (array): the data of the best window.
-      clipped (float): The fraction of clipped peaks or troughs.
+    Returns
+    -------
+    data: array
+        The data of the best window.
+    clipped: float
+        The fraction of clipped peaks or troughs.
     """
     start_inx, end_inx, clipped = best_window_indices(data, samplerate, single,
                                                       win_size, win_shift,
@@ -369,7 +445,9 @@ def best_window(data, samplerate, single=True, win_size=1., win_shift=0.1,
 def plot_best_window(data, rate, peak_idx, trough_idx, idx0, idx1,
                      win_times, cv_interv, mean_ampl, cv_ampl, clipped_frac,
                      cost, thresh, win_idx0, win_idx1, ax):
-    """Visualize the cost function of best_window_indices().
+    """Visualize the cost function of used for finding the best window for analysis.
+
+    Pass this function as the `plot_data_func` to the `best_window_*` functions.
     """
     # raw data:
     time = np.arange(0.0, len(data)) / rate
@@ -427,9 +505,12 @@ def add_best_window_config(cfg, single=True, win_size=1., win_shift=0.1,
     """ Add parameter needed for the best_window() functions as
     a new section to a configuration dictionary.
 
-    Args:
-      cfg (ConfigFile): the configuration
-      See best_window_indices() for details on the remaining arguments.
+    Parameters
+    ----------
+    cfg: ConfigFile
+        The configuration.
+        
+    See `best_window_indices()` for details on the remaining arguments.
     """
 
     cfg.add_section('Best window detection:')
@@ -454,14 +535,19 @@ def add_best_window_config(cfg, single=True, win_size=1., win_shift=0.1,
 
 def best_window_args(cfg):
     """ Translates a configuration to the
-    respective parameter names of the functions best_window*().
+    respective parameter names of the functions `best_window*()`.
     The return value can then be passed as key-word arguments to these functions.
 
-    Args:
-      cfg (ConfigFile): the configuration
+    Parameters
+    ----------
+    cfg: ConfigFile
+        The configuration.
 
-    Returns:
-      a (dict): dictionary with names of arguments of the best_window*() functions and their values as supplied by cfg.
+    Returns
+    -------
+    a: dict
+        Dictionary with names of arguments of the `best_window*()` functions
+        and their values as supplied by `cfg`.
     """
     return cfg.map({'win_size': 'bestWindowSize',
                     'win_shift': 'bestWindowShift',

From 7356d16462b0a22c95dd11f255a1b7933c382d9c Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Fri, 2 Nov 2018 21:38:59 +0100
Subject: [PATCH 06/15] [eodanalysis] started analyze_pulse()

---
 thunderfish/eodanalysis.py | 124 +++++++++++++++++++++++++++++++++----
 thunderfish/thunderfish.py |  56 ++++++++++-------
 2 files changed, 146 insertions(+), 34 deletions(-)

diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py
index 4b8db1ab..399dbd1f 100644
--- a/thunderfish/eodanalysis.py
+++ b/thunderfish/eodanalysis.py
@@ -12,6 +12,7 @@
 import numpy as np
 from scipy.optimize import curve_fit
 from .eventdetection import percentile_threshold, detect_peaks, snippets
+from .powerspectrum import decibel
 
 
 def eod_waveform(data, samplerate, th_factor=0.6, percentile=0.1,
@@ -116,7 +117,7 @@ def sinewaves(t, freq, delay, ampl, *ap):
     return ampl*x
 
 
-def analyze_wave(eod, freq, props={}, n_harm=6):
+def analyze_wave(eod, freq, n_harm=6):
     """
     Analyze the EOD waveform of a wave-type fish.
     
@@ -125,10 +126,9 @@ def analyze_wave(eod, freq, props={}, n_harm=6):
     eod: 2-D array
         The eod waveform. First column is time in seconds, second column the eod waveform.
         Further columns are optional but not used.
-    freq: float
-        The frequency of the EOD.
-    props: dict
-        Dictionary with properties of the analyzed EOD waveform.
+    freq: float or 2-D array
+        The frequency of the EOD or the list of harmonics (rows)
+        with frequency and peak height (columns) as returned from `harmonic_groups()`.
     n_harm: int
         Maximum number of harmonics used for the fit.
     
@@ -139,15 +139,22 @@ def analyze_wave(eod, freq, props={}, n_harm=6):
         Further columns are kept from the input `eod`. And a column is added with the
         fit of the fourier series to the waveform.
     props: dict
-        The `props` dictionary with added properties of the analyzed EOD waveform:
+        A dictionary with properties of the analyzed EOD waveform.
+        - type: set to 'wave'.
+        - EODf: is set to the EOD fundamental frequency.
         - p-p-amplitude: peak-to-peak amplitude of the Fourier fit.
         - amplitude: amplitude factor of the Fourier fit.
         - rmserror: root-mean-square error between Fourier-fit and EOD waveform relative to
           the p-p amplitude. If larger than 0.05 the data are bad.
+        - power: if `freq` is list of harmonics then `power` is set to the summed power
+          of all harmonics and transformed to decibel.
     spec_data: 2-D array of floats
         First column is the index of the harmonics, second column its frequency,
         third column its relative amplitude, and fourth column the phase shift
         relative to the fundamental.
+        If `freq` is a list of harmonics, a fith column is added to `spec_data` that
+        contains the sqaure rooted powers of the harmonics
+        normalized to the one ofthe fundamental.
         Rows are the harmonics, first row is the fundamental frequency with index 0,
         amplitude of one, and phase shift of zero.
         If the amplitude of the first harmonic (spec-data[1,3]) is larger than 2,
@@ -155,6 +162,10 @@ def analyze_wave(eod, freq, props={}, n_harm=6):
         then the EOD waveform has the wrong waveform and
         should not be used for further analysis.
     """
+    freq0 = freq
+    if hasattr(freq, 'shape'):
+        freq0 = freq[0][0]
+        
     # storage:
     meod = np.zeros((eod.shape[0], eod.shape[1]+1))
     meod[:,:-1] = eod
@@ -172,7 +183,7 @@ def analyze_wave(eod, freq, props={}, n_harm=6):
 
     # fit sine wave:
     ampl = 0.5*(np.max(meod[:,1])-np.min(meod[:,1]))
-    params = [freq, -0.25/freq, ampl]
+    params = [freq0, -0.25/freq0, ampl]
     for i in range(1, n_harm):
         params.extend([1.0/(i+1), 0.0])
     popt, pcov = curve_fit(sinewaves, meod[:,0], meod[:,1], params)
@@ -193,17 +204,108 @@ def analyze_wave(eod, freq, props={}, n_harm=6):
     rmserror = np.sqrt(np.mean((meod[:,1] - meod[:,3])**2.0))/ppampl
 
     # store results:
+    props = {}
+    props['type'] = 'wave'
+    props['EODf'] = freq0
     props['p-p-amplitude'] = ppampl
     props['amplitude'] = popt[2]
     props['rmserror'] = rmserror
-    spec_data = np.zeros((n_harm, 4))
-    spec_data[0, :] = [0.0, freq, 1.0, 0.0]
+    ncols = 4
+    if hasattr(freq, 'shape'):
+        spec_data = np.zeros((n_harm, 5))
+        ampls = np.sqrt(freq[:n_harm, 1])
+        ampls /= ampls[0]
+        spec_data[:len(ampls),4] = ampls
+        props['power'] = decibel(np.sum(freq[:,1]))
+    else:
+        spec_data = np.zeros((n_harm, 4))
+    spec_data[0,:4] = [0.0, freq0, 1.0, 0.0]
     for i in range(1, n_harm):
-        spec_data[i, :] = [i, (i+1)*freq, popt[1+i*2], popt[2+i*2]]
+        spec_data[i,:4] = [i, (i+1)*freq0, popt[1+i*2], popt[2+i*2]]
     
     return meod, props, spec_data
 
 
+def analyze_pulse(eod, eod_times, percentile=1):
+    """
+    Analyze the EOD waveform of a pulse-type fish.
+    
+    Parameters
+    ----------
+    eod: 2-D array
+        The eod waveform. First column is time in seconds, second column the eod waveform.
+        Further columns are optional but not used.
+    eod_times: 1-D array
+        List of times of detected EOD peaks.
+    percentile: float
+        Remove extreme values of the inter-pulse intervals when computing interval statistics.
+        All intervals below the `percentile` and above the `100-percentile` percentile
+        are ignored. `percentile` is given in percent.
+    
+    Returns
+    -------
+    meod: 2-D array of floats
+        The eod waveform. First column is time in seconds, second column the eod waveform.
+        Further columns are kept from the input `eod`.
+    props: dict
+        A dictionary with properties of the analyzed EOD waveform.
+        - type: set to 'pulse'.
+        - EODf: the inverse of the mean interval between `eod_times`.
+        - period: the mean interval between `eod_times`.
+        - p-p-amplitude: peak-to-peak amplitude of the EOD waveform.
+        - n: number of pulses analyzed.
+        - medianinterval: the median interval between pulses after removal
+          of extrem interval values.
+        - meaninterval: the mean interval between pulses after removal
+          of extrem interval values.
+        - stdinterval: the standard deviation of the intervals between pulses
+          after removal of extrem interval values.
+    intervals: 1-D array
+        List of inter-EOD intervals with extreme values removed.
+    """
+        
+    # storage:
+    meod = np.zeros(eod.shape)
+    meod[:,:] = eod
+    
+    # subtract mean:
+    meod[:,1] -= np.mean(meod[:,1])
+
+    # flip:
+    #if -np.min(meod[:,1]) > np.max(meod[:,1]):
+    #    meod[:,1] = -meod[:,1]
+    
+    # move peak of waveform to zero:
+    offs = len(meod[:,1])//4
+    meod[:,0] -= meod[offs+np.argmax(meod[offs:3*offs,1]),0]
+
+    # amplitude:
+    ppampl = np.max(meod[:,1]) - np.min(meod[:,1])
+
+    # analyze pulse timing:
+    inter_pulse_intervals = np.diff(eod_times)
+    period = np.mean(inter_pulse_intervals)
+    
+    # store properties:
+    props = {}
+    props['type'] = 'pulse'
+    props['EODf'] = 1.0/period
+    props['period'] = period
+    props['p-p-amplitude'] = ppampl
+    props['n'] = len(eod_times)
+
+    # analyze central intervals:    
+    lower, upper = np.percentile(inter_pulse_intervals, [percentile, 100.0-percentile])
+    intervals = inter_pulse_intervals[(inter_pulse_intervals > lower) &
+                                      (inter_pulse_intervals < upper)]
+    if len(intervals) > 2:
+        props['medianinterval'] = np.median(intervals)
+        props['meaninterval'] = np.mean(intervals)
+        props['stdinterval'] = np.std(intervals, ddof=1)
+    
+    return meod, props, intervals
+
+
 def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, ax, unit='a.u.', **kwargs):
     """Plot mean eod and its standard deviation.
 
@@ -236,7 +338,7 @@ def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, ax, unit='a.u.', **kwarg
     else:
         fkwargs['color'] = 'steelblue'
     ax.autoscale(True)
-    if len(fit_eod)>0:
+    if fit_eod is not None and len(fit_eod)>0:
         ax.plot(1000.0*time, fit_eod, zorder=2, **fkwargs)
     ckwargs = dict(kwargs)
     if not 'lw' in ckwargs:
diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py
index 65959fcc..6fc85850 100644
--- a/thunderfish/thunderfish.py
+++ b/thunderfish/thunderfish.py
@@ -10,6 +10,7 @@
 import os
 import argparse
 import numpy as np
+import matplotlib.patches as mpatches
 import matplotlib.pyplot as plt
 from .version import __version__
 from .configfile import ConfigFile
@@ -18,10 +19,10 @@
 from .bestwindow import clip_amplitudes, best_window_indices
 from .harmonicgroups import add_psd_peak_detection_config, add_harmonic_groups_config
 from .checkpulse import check_pulse_width, add_check_pulse_width_config, check_pulse_width_args
-from .powerspectrum import decibel, plot_decibel_psd, multi_resolution_psd
+from .powerspectrum import plot_decibel_psd, multi_resolution_psd
 from .harmonicgroups import harmonic_groups, harmonic_groups_args, psd_peak_detection_args, fundamental_freqs, fundamental_freqs_and_power, colors_markers, plot_harmonic_groups
 from .consistentfishes import consistent_fishes
-from .eodanalysis import eod_waveform, analyze_wave, eod_waveform_plot
+from .eodanalysis import eod_waveform, analyze_wave, analyze_pulse, eod_waveform_plot
 from .csvmaker import write_csv
 
 
@@ -133,9 +134,18 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
     ##########
 
     # plot mean EOD
+    usedax4 = False
+    usedax5 = False
     eodaxes = [ax4, ax5]
     for axeod, mean_eod, props in zip(eodaxes[:2], mean_eods[:2], eod_props[0:2]):
-        eod_waveform_plot(mean_eod[:,0], mean_eod[:,1], mean_eod[:,2], mean_eod[:,3],
+        if axeod is ax4:
+            usedax4 = True
+        if axeod is ax5:
+            usedax5 = True
+        fit_eod = []
+        if mean_eod.shape[1] > 3:
+            fit_eod = mean_eod[:,3]
+        eod_waveform_plot(mean_eod[:,0], mean_eod[:,1], mean_eod[:,2], fit_eod,
                           axeod, unit=unit)
         axeod.set_title('Average EOD of %.1f Hz %sfish (n=%d EODs)'
                         % (props['EODf'], props['type'], props['n']), fontsize=14, y=1.05)
@@ -149,17 +159,20 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
 
     # plot inter EOD interval histogram
     if len(inter_eod_intervals)>2:
+        usedax5 = True
         tmp_period = 1000. * np.mean(inter_eod_intervals)
         tmp_period = tmp_period - tmp_period % 0.05
         inter_eod_intervals *= 1000.  # transform sec in msec
         median_IPI = 1000. * eod_props[0]['medianinterval']
+        mean_IPI = 1000. * eod_props[0]['meaninterval']
+        std_IPI = 1000. * eod_props[0]['stdinterval']
         n, edges = np.histogram(inter_eod_intervals, bins=np.arange(tmp_period - 5., tmp_period + 5., 0.05))
 
         ax5.bar(edges[:-1], n, edges[1] - edges[0] - 0.001)
-        ax5.plot([median_IPI, median_IPI], [0, np.max(n)], '--', color='red', lw=2, label='median %.2f ms' % median_IPI)
+        median_line, = ax5.plot([median_IPI, median_IPI], [0, np.max(n)], '--', color='red', lw=2, label='median %.2f ms' % median_IPI)
         ax5.set_xlabel('inter EOD interval [ms]')
         ax5.set_ylabel('n')
-        ax5.set_title('Inter EOD interval histogram', fontsize=14, y=1.05)
+        ax5.set_title('Inter-EOD-interval histogram', fontsize=14, y=1.05)
 
         max_IPI = np.ceil(np.max(inter_eod_intervals)+0.5)
         if max_IPI/median_IPI < 1.2:
@@ -168,7 +181,11 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
         if min_IPI/median_IPI > 0.8:
             min_IPI = np.floor(0.8*median_IPI)
         ax5.set_xlim(min_IPI, max_IPI)
-        ax5.legend(loc='upper right', frameon=False)
+        nopatch = mpatches.Patch(color='white', alpha=0.0)
+        ax5.legend([median_line, nopatch],
+                   ['median %.2f ms' % median_IPI,
+                    '(%.2f +/- %.2f ms)' % (mean_IPI, std_IPI)],
+                    loc='upper right', frameon=False)
 
     # cosmetics
     for ax in [ax2, ax3, ax4, ax5]:
@@ -176,6 +193,10 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
         ax.spines['right'].set_visible(False)
         ax.get_xaxis().tick_bottom()
         ax.get_yaxis().tick_left()
+    if not usedax4:
+        ax4.set_visible(False)
+    if not usedax5:
+        ax5.set_visible(False)
 
     # save figure as pdf
     if save_plot:
@@ -344,20 +365,10 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                          percentile=cfg.value('pulseWidthPercentile'),
                          th_factor=cfg.value('pulseWidthThresholdFactor'),
                          start=-mean_eod_window, stop=mean_eod_window)
+        mean_eod, props, inter_eod_intervals = analyze_pulse(mean_eod, eod_times)
         mean_eods.append(mean_eod)
         spec_data.append([])
-        eod_props.append({'type': 'pulse',
-                          'n': len(eod_times),
-                          'EODf': 1.0/pulse_period,
-                          'period': pulse_period})
-        # analyze inter-pulse intervals:
-        inter_pulse_intervals = np.diff(eod_times)  # in sec
-        lower_perc, upper_perc = np.percentile(inter_pulse_intervals, [1, 100 - 1])
-        inter_eod_intervals = inter_pulse_intervals[(inter_pulse_intervals > lower_perc) &
-                                                   (inter_pulse_intervals < upper_perc)]
-        if len(inter_eod_intervals) > 2:
-            eod_props[-1]['medianinterval'] = np.median(inter_eod_intervals)
-            eod_props[-1]['stdinterval'] = np.std(inter_eod_intervals, ddof=1)
+        eod_props.append(props)
 
     # analyse EOD waveform of all wavefish:
     powers = np.array([np.sum(fish[:cfg.value('powerNHarmonics'), 1])
@@ -369,11 +380,8 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                          percentile=cfg.value('pulseWidthPercentile'),
                          th_factor=cfg.value('pulseWidthThresholdFactor'),
                          period=1.0/fish[0,0])
-        props = {'type': 'wave',
-                 'n': len(eod_times),
-                 'EODf': fish[0,0],
-                 'power': decibel(np.sum(fish[:,1]))}
-        mean_eod, props, sdata = analyze_wave(mean_eod, fish[0,0], props)
+        mean_eod, props, sdata = analyze_wave(mean_eod, fish)
+        props['n'] = len(eod_times)
         # add good waveforms only:
         if (k > 0 or clipped < cfg.value('maximumClippedFraction')) and \
             sdata[1,2] < cfg.value('maximumFirstHarmonicAmplitude') and \
@@ -404,6 +412,8 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
             write_csv(os.path.join(output_folder, outfilename + '-waveform-%d.csv' % i),
                       header, mean_eod)
             header = ['harmonics', 'frequency (Hz)', 'amplitude', 'phase']
+            if sdata.shape[1] > 4:
+                header.append('dataampl')
             write_csv(os.path.join(output_folder, outfilename + '-spectrum-%d.csv' % i),
                       header, sdata)
 

From 2713368f7a7a8a7e35f8e410812d871e53b5db92 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Fri, 2 Nov 2018 23:11:13 +0100
Subject: [PATCH 07/15] [eodanalysis] finished analyze_pulse()

---
 thunderfish/eodanalysis.py | 80 +++++++++++++++++++++++++++++++-------
 thunderfish/thunderfish.py | 37 ++++++++++++------
 2 files changed, 91 insertions(+), 26 deletions(-)

diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py
index 399dbd1f..9cc86b11 100644
--- a/thunderfish/eodanalysis.py
+++ b/thunderfish/eodanalysis.py
@@ -226,7 +226,7 @@ def analyze_wave(eod, freq, n_harm=6):
     return meod, props, spec_data
 
 
-def analyze_pulse(eod, eod_times, percentile=1):
+def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1):
     """
     Analyze the EOD waveform of a pulse-type fish.
     
@@ -237,6 +237,8 @@ def analyze_pulse(eod, eod_times, percentile=1):
         Further columns are optional but not used.
     eod_times: 1-D array
         List of times of detected EOD peaks.
+    thresh_fac: float
+        Set the threshold for peak detection to the maximum pulse amplitude times this factor.
     percentile: float
         Remove extreme values of the inter-pulse intervals when computing interval statistics.
         All intervals below the `percentile` and above the `100-percentile` percentile
@@ -260,6 +262,11 @@ def analyze_pulse(eod, eod_times, percentile=1):
           of extrem interval values.
         - stdinterval: the standard deviation of the intervals between pulses
           after removal of extrem interval values.
+    peaks: 2-D array
+        For each peak and trough of the EOD waveform (first index) the
+        peak index (1 is P1, i.e. the largest positive peak),
+        time relative to largest positive peak, amplitude,
+        and amplitude normalized to largest postive peak.
     intervals: 1-D array
         List of inter-EOD intervals with extreme values removed.
     """
@@ -268,19 +275,49 @@ def analyze_pulse(eod, eod_times, percentile=1):
     meod = np.zeros(eod.shape)
     meod[:,:] = eod
     
-    # subtract mean:
-    meod[:,1] -= np.mean(meod[:,1])
-
-    # flip:
-    #if -np.min(meod[:,1]) > np.max(meod[:,1]):
-    #    meod[:,1] = -meod[:,1]
-    
+    # subtract mean at the ends of the snippet:
+    n = len(meod)//20
+    meod[:,1] -= 0.5*(np.mean(meod[:n,1]) + np.mean(meod[-n:,1]))
+
+    # largest positive and negative peak:
+    max_idx = np.argmax(meod[:,1])
+    max_ampl = np.abs(meod[max_idx,1])
+    min_idx = np.argmin(meod[:,1])
+    min_ampl = np.abs(meod[min_idx,1])
+    amplitude = np.max((max_ampl, min_ampl))
+    if max_ampl > 0.2*amplitude and min_ampl > 0.2*amplitude:
+        # two major peaks:
+        if min_idx < max_idx:
+            # flip:
+            meod[:,1] = -meod[:,1]
+            peak_idx = min_idx
+            min_idx = max_idx
+            max_idx = peak_idx
+    elif min_ampl > 0.2*amplitude:
+        # flip:
+        meod[:,1] = -meod[:,1]
+        peak_idx = min_idx
+        min_idx = max_idx
+        max_idx = peak_idx
+    max_ampl = np.abs(meod[max_idx,1])
+    min_ampl = np.abs(meod[min_idx,1])
+                
     # move peak of waveform to zero:
-    offs = len(meod[:,1])//4
-    meod[:,0] -= meod[offs+np.argmax(meod[offs:3*offs,1]),0]
+    meod[:,0] -= meod[max_idx,0]
 
     # amplitude:
-    ppampl = np.max(meod[:,1]) - np.min(meod[:,1])
+    ppampl = max_ampl + min_ampl
+
+    # find smaller peaks:
+    peak_idx, trough_idx = detect_peaks(meod[:,1], max_ampl*thresh_fac)
+    peak_list = np.sort(np.concatenate((peak_idx, trough_idx)))
+    p1i = np.nonzero(peak_list == max_idx)[0][0]
+    offs = 0 if p1i <= 2 else p1i - 2
+
+    # store:
+    peaks = np.zeros((len(peak_list)-offs,4))
+    for i, pi in enumerate(peak_list[offs:]):
+        peaks[i,:] = [i+1-p1i+offs, meod[pi,0], meod[pi,1], meod[pi,1]/max_ampl]
 
     # analyze pulse timing:
     inter_pulse_intervals = np.diff(eod_times)
@@ -303,10 +340,10 @@ def analyze_pulse(eod, eod_times, percentile=1):
         props['meaninterval'] = np.mean(intervals)
         props['stdinterval'] = np.std(intervals, ddof=1)
     
-    return meod, props, intervals
+    return meod, props, peaks, intervals
 
 
-def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, ax, unit='a.u.', **kwargs):
+def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, peaks, ax, unit='a.u.', **kwargs):
     """Plot mean eod and its standard deviation.
 
     Parameters
@@ -317,8 +354,10 @@ def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, ax, unit='a.u.', **kwarg
         Mean EOD waveform.
     std_eod: 1-D array
         Standard deviation of EOD waveform.
-    fit_eod: 1-D array
+    fit_eod: 1-D array or None
         Fit of the EOD waveform.
+    peaks: 2_D arrays or None
+        List of peak properties (index, time, and amplitude) of a EOD pulse.
     ax:
         Axis for plot
     unit: string
@@ -349,6 +388,19 @@ def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, ax, unit='a.u.', **kwarg
     ax.autoscale(False)
     ax.fill_between(1000.0*time, mean_eod + std_eod, mean_eod - std_eod,
                     color='grey', alpha=0.3, zorder=1)
+    if peaks is not None and len(peaks)>0:
+        maxa = np.max(peaks[:,2])
+        for p in peaks:
+            ax.scatter(1000.0*p[1], p[2], s=80,
+                       c=ckwargs['color'], edgecolors=ckwargs['color'])
+            label = 'P%d' % p[0]
+            if p[0] != 1:
+                label += '(%.0f%% @ %.0fus)' % (100.0*p[3], 1.0e6*p[1])
+            va = 'bottom' if p[2] > 0.0 else 'top'
+            if p[1] >= 0.0:
+                ax.text(1000.0*p[1]+0.1, p[2]+np.sign(p[2])*0.05*maxa, label, ha='left', va=va)
+            else:
+                ax.text(1000.0*p[1]-0.1, p[2]+np.sign(p[2])*0.05*maxa, label, ha='right', va=va)
     ax.set_xlabel('Time [msec]')
     ax.set_ylabel('Amplitude [%s]' % unit)
 
diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py
index 6fc85850..77716dd1 100644
--- a/thunderfish/thunderfish.py
+++ b/thunderfish/thunderfish.py
@@ -27,7 +27,7 @@
 
 
 def output_plot(base_name, pulse_fish, inter_eod_intervals,
-                raw_data, samplerate, idx0, idx1, fishlist, mean_eods, eod_props,
+                raw_data, samplerate, idx0, idx1, fishlist, mean_eods, eod_props, peak_data,
                 unit, psd_data, power_n_harmonics, label_power, max_freq=3000.0,
                 output_folder='', save_plot=False, show_plot=False):
     """
@@ -60,6 +60,8 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
         Mean trace for the mean EOD plot.
     eod_props: list of dict
         Properties for each waveform in mean_eods.
+    peak_data: list of 2_D arrays
+        For each pulsefish a list of peak properties (index, time, and amplitude).
     unit: string
         Unit of the trace and the mean EOD.
     psd_data: array
@@ -137,7 +139,7 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
     usedax4 = False
     usedax5 = False
     eodaxes = [ax4, ax5]
-    for axeod, mean_eod, props in zip(eodaxes[:2], mean_eods[:2], eod_props[0:2]):
+    for axeod, mean_eod, props, peaks in zip(eodaxes[:2], mean_eods[:2], eod_props[0:2], peak_data[0:2]):
         if axeod is ax4:
             usedax4 = True
         if axeod is ax5:
@@ -146,7 +148,7 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
         if mean_eod.shape[1] > 3:
             fit_eod = mean_eod[:,3]
         eod_waveform_plot(mean_eod[:,0], mean_eod[:,1], mean_eod[:,2], fit_eod,
-                          axeod, unit=unit)
+                          peaks, axeod, unit=unit)
         axeod.set_title('Average EOD of %.1f Hz %sfish (n=%d EODs)'
                         % (props['EODf'], props['type'], props['n']), fontsize=14, y=1.05)
         if props['type'] == 'wave':
@@ -357,6 +359,7 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
     eod_props = []
     mean_eods = []
     spec_data = []
+    peak_data = []
     inter_eod_intervals = []
     if pulse_fish:
         mean_eod_window = 0.002
@@ -365,9 +368,11 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                          percentile=cfg.value('pulseWidthPercentile'),
                          th_factor=cfg.value('pulseWidthThresholdFactor'),
                          start=-mean_eod_window, stop=mean_eod_window)
-        mean_eod, props, inter_eod_intervals = analyze_pulse(mean_eod, eod_times)
+        mean_eod, props, peaks, inter_eod_intervals = analyze_pulse(mean_eod, eod_times)
+        # TODO: add config parameter to analyze_pulse
         mean_eods.append(mean_eod)
         spec_data.append([])
+        peak_data.append(peaks)
         eod_props.append(props)
 
     # analyse EOD waveform of all wavefish:
@@ -390,6 +395,7 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
             eod_props.append(props)
             mean_eods.append(mean_eod)
             spec_data.append(sdata)
+            peak_data.append([])
             if verbose > 0:
                 print('%d take waveform of %6.1fHz fish: clipped=%4.2f ampl1=%4.2f ampl2=%4.2f ampl3=%4.2f rmserror=%4.3f'
                       % (idx, fish[0,0], clipped, sdata[1,2], sdata[2,2], sdata[3,2], props['rmserror']))
@@ -407,20 +413,27 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
 
     # write eod waveforms:
     if save_csvs and found_bestwindow:
-        for i, (mean_eod, sdata) in enumerate(zip(mean_eods, spec_data)):
-            header = ['time (s)', 'mean', 'std', 'fit']
+        for i, (mean_eod, sdata, pdata) in enumerate(zip(mean_eods, spec_data, peak_data)):
+            header = ['time (s)', 'mean', 'std']
+            if mean_eod.shape[1] > 3:
+                header.append('fit')
             write_csv(os.path.join(output_folder, outfilename + '-waveform-%d.csv' % i),
                       header, mean_eod)
-            header = ['harmonics', 'frequency (Hz)', 'amplitude', 'phase']
-            if sdata.shape[1] > 4:
-                header.append('dataampl')
-            write_csv(os.path.join(output_folder, outfilename + '-spectrum-%d.csv' % i),
-                      header, sdata)
+            if len(sdata)>0:
+                header = ['harmonics', 'frequency (Hz)', 'amplitude', 'phase']
+                if sdata.shape[1] > 4:
+                    header.append('dataampl')
+                write_csv(os.path.join(output_folder, outfilename + '-spectrum-%d.csv' % i),
+                          header, sdata)
+            if len(pdata)>0:
+                header = ['P', 'time (ms)', 'amplitude', 'relaampl']
+                write_csv(os.path.join(output_folder, outfilename + '-peaks-%d.csv' % i),
+                          header, pdata)
 
     if save_plot or not save_csvs:
         output_plot(outfilename, pulse_fish,
                     inter_eod_intervals, raw_data, samplerate, idx0, idx1, fishlist,
-                    mean_eods, eod_props, unit,
+                    mean_eods, eod_props, peak_data, unit,
                     psd_data, cfg.value('powerNHarmonics'), True, 3000.0, output_folder,
                     save_plot=save_plot, show_plot=not save_csvs)
 

From f0abf9129d3a13247b9cd64f43e1dfd345af39ac Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Sat, 3 Nov 2018 18:31:00 +0100
Subject: [PATCH 08/15] [eodanalysis] remove multiple peaks in analyze_pulse()

---
 thunderfish/eodanalysis.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py
index 9cc86b11..782b25c1 100644
--- a/thunderfish/eodanalysis.py
+++ b/thunderfish/eodanalysis.py
@@ -310,9 +310,13 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1):
 
     # find smaller peaks:
     peak_idx, trough_idx = detect_peaks(meod[:,1], max_ampl*thresh_fac)
-    peak_list = np.sort(np.concatenate((peak_idx, trough_idx)))
-    p1i = np.nonzero(peak_list == max_idx)[0][0]
-    offs = 0 if p1i <= 2 else p1i - 2
+    peak_l = np.sort(np.concatenate((peak_idx, trough_idx)))
+    # remove mutliple peaks:
+    peak_list = [prange[np.argmax(np.abs(meod[prange,1]))]
+                 for prange in np.split(peak_l, np.where(np.diff(meod[peak_l,1]>0.0)!=0)[0]+1)]
+    # find P1:
+    p1i = np.where(peak_list == max_idx)[0][0]
+    offs = 0 #if p1i <= 2 else p1i - 2
 
     # store:
     peaks = np.zeros((len(peak_list)-offs,4))

From ef796acdbc3f53fd198163382d3a66307ed7e845 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Sat, 3 Nov 2018 23:24:56 +0100
Subject: [PATCH 09/15] [eodanalysis] mark flipped waveforms

---
 thunderfish/eodanalysis.py | 7 +++++++
 thunderfish/thunderfish.py | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py
index 782b25c1..5b7eecb9 100644
--- a/thunderfish/eodanalysis.py
+++ b/thunderfish/eodanalysis.py
@@ -174,8 +174,10 @@ def analyze_wave(eod, freq, n_harm=6):
     meod[:,1] -= np.mean(meod[:,1])
 
     # flip:
+    flipped = False
     if -np.min(meod[:,1]) > np.max(meod[:,1]):
         meod[:,1] = -meod[:,1]
+        flipped = True
     
     # move peak of waveform to zero:
     offs = len(meod[:,1])//4
@@ -208,6 +210,7 @@ def analyze_wave(eod, freq, n_harm=6):
     props['type'] = 'wave'
     props['EODf'] = freq0
     props['p-p-amplitude'] = ppampl
+    props['flipped'] = flipped
     props['amplitude'] = popt[2]
     props['rmserror'] = rmserror
     ncols = 4
@@ -280,6 +283,7 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1):
     meod[:,1] -= 0.5*(np.mean(meod[:n,1]) + np.mean(meod[-n:,1]))
 
     # largest positive and negative peak:
+    flipped = False
     max_idx = np.argmax(meod[:,1])
     max_ampl = np.abs(meod[max_idx,1])
     min_idx = np.argmin(meod[:,1])
@@ -293,12 +297,14 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1):
             peak_idx = min_idx
             min_idx = max_idx
             max_idx = peak_idx
+            flipped = True
     elif min_ampl > 0.2*amplitude:
         # flip:
         meod[:,1] = -meod[:,1]
         peak_idx = min_idx
         min_idx = max_idx
         max_idx = peak_idx
+        flipped = True
     max_ampl = np.abs(meod[max_idx,1])
     min_ampl = np.abs(meod[min_idx,1])
                 
@@ -333,6 +339,7 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1):
     props['EODf'] = 1.0/period
     props['period'] = period
     props['p-p-amplitude'] = ppampl
+    props['flipped'] = flipped
     props['n'] = len(eod_times)
 
     # analyze central intervals:    
diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py
index 77716dd1..4daa0063 100644
--- a/thunderfish/thunderfish.py
+++ b/thunderfish/thunderfish.py
@@ -149,6 +149,8 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
             fit_eod = mean_eod[:,3]
         eod_waveform_plot(mean_eod[:,0], mean_eod[:,1], mean_eod[:,2], fit_eod,
                           peaks, axeod, unit=unit)
+        if props['flipped']:
+            axeod.text(0.03, 0.03, 'flipped', transform = axeod.transAxes, va='bottom')
         axeod.set_title('Average EOD of %.1f Hz %sfish (n=%d EODs)'
                         % (props['EODf'], props['type'], props['n']), fontsize=14, y=1.05)
         if props['type'] == 'wave':

From 72d4d452e04b00bde0f57647b1cd982c348f0fd8 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Sun, 4 Nov 2018 13:01:57 +0100
Subject: [PATCH 10/15] [eodanalysis] pimped up plotting

---
 thunderfish/eodanalysis.py | 83 ++++++++++++++++++++++++++------------
 thunderfish/thunderfish.py | 21 ++++------
 2 files changed, 65 insertions(+), 39 deletions(-)

diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py
index 5b7eecb9..10851b7e 100644
--- a/thunderfish/eodanalysis.py
+++ b/thunderfish/eodanalysis.py
@@ -143,6 +143,7 @@ def analyze_wave(eod, freq, n_harm=6):
         - type: set to 'wave'.
         - EODf: is set to the EOD fundamental frequency.
         - p-p-amplitude: peak-to-peak amplitude of the Fourier fit.
+        - flipped: True if the waveform was flipped.
         - amplitude: amplitude factor of the Fourier fit.
         - rmserror: root-mean-square error between Fourier-fit and EOD waveform relative to
           the p-p amplitude. If larger than 0.05 the data are bad.
@@ -257,7 +258,10 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1):
         - type: set to 'pulse'.
         - EODf: the inverse of the mean interval between `eod_times`.
         - period: the mean interval between `eod_times`.
+        - max-amplitude: the amplitude of the largest positive peak (P1).
+        - min-amplitude: the amplitude of the largest negative peak (P1).
         - p-p-amplitude: peak-to-peak amplitude of the EOD waveform.
+        - flipped: True if the waveform was flipped.
         - n: number of pulses analyzed.
         - medianinterval: the median interval between pulses after removal
           of extrem interval values.
@@ -322,7 +326,7 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1):
                  for prange in np.split(peak_l, np.where(np.diff(meod[peak_l,1]>0.0)!=0)[0]+1)]
     # find P1:
     p1i = np.where(peak_list == max_idx)[0][0]
-    offs = 0 #if p1i <= 2 else p1i - 2
+    offs = 0 if p1i <= 2 else p1i - 2
 
     # store:
     peaks = np.zeros((len(peak_list)-offs,4))
@@ -338,6 +342,8 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1):
     props['type'] = 'pulse'
     props['EODf'] = 1.0/period
     props['period'] = period
+    props['max-amplitude'] = max_ampl
+    props['min-amplitude'] = min_ampl
     props['p-p-amplitude'] = ppampl
     props['flipped'] = flipped
     props['n'] = len(eod_times)
@@ -354,21 +360,22 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1):
     return meod, props, peaks, intervals
 
 
-def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, peaks, ax, unit='a.u.', **kwargs):
+def eod_waveform_plot(eod_waveform, peaks, props, ax, unit='a.u.', **kwargs):
     """Plot mean eod and its standard deviation.
 
     Parameters
     ----------
-    time: 1-D array
-        Time of the mean EOD.
-    mean_eod: 1-D array
-        Mean EOD waveform.
-    std_eod: 1-D array
-        Standard deviation of EOD waveform.
-    fit_eod: 1-D array or None
-        Fit of the EOD waveform.
+    eod_waveform: 2-D array
+        EOD waveform. First column is time in seconds,
+        second column the (mean) eod waveform. The optional third column is the
+        standard deviation and the optional fourth column is a fit on the waveform.
     peaks: 2_D arrays or None
         List of peak properties (index, time, and amplitude) of a EOD pulse.
+    props: dict or None
+        The `props` returned by `analyze_wave()` or `analyze_pulse()`. Used to
+        annotate the plot with wave- or pulse type, EODf, ampltitude,
+        mark the waveform as flipped, and to set the range for the x-axis in case
+        of wavefish. 
     ax:
         Axis for plot
     unit: string
@@ -376,6 +383,7 @@ def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, peaks, ax, unit='a.u.',
     kwargs: dict
         Arguments passed on to the plot command for the mean eod.
     """
+    # plot parameter for fit:
     fkwargs = {}
     if 'flw' in kwargs:
         fkwargs['lw'] = kwargs['flw']
@@ -387,23 +395,32 @@ def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, peaks, ax, unit='a.u.',
         del kwargs['fcolor']
     else:
         fkwargs['color'] = 'steelblue'
+    # plot parameter for mean eod waveform:
+    mkwargs = dict(kwargs)
+    if not 'lw' in mkwargs:
+        mkwargs['lw'] = 2
+    if not 'color' in mkwargs:
+        mkwargs['color'] = 'r'
+    # plot fit:
     ax.autoscale(True)
-    if fit_eod is not None and len(fit_eod)>0:
-        ax.plot(1000.0*time, fit_eod, zorder=2, **fkwargs)
-    ckwargs = dict(kwargs)
-    if not 'lw' in ckwargs:
-        ckwargs['lw'] = 2
-    if not 'color' in ckwargs:
-        ckwargs['color'] = 'r'
-    ax.plot(1000.0*time, mean_eod, zorder=3, **ckwargs)
+    time = 1000.0 * eod_waveform[:,0]
+    if eod_waveform.shape[1] > 3:
+        ax.plot(time, eod_waveform[:,3], zorder=2, **fkwargs)
+    # plot waveform:
+    mean_eod = eod_waveform[:,1]
+    ax.plot(time, mean_eod, zorder=3, **mkwargs)
+    # plot standard deviation:
     ax.autoscale(False)
-    ax.fill_between(1000.0*time, mean_eod + std_eod, mean_eod - std_eod,
-                    color='grey', alpha=0.3, zorder=1)
+    if eod_waveform.shape[1] > 2:
+        std_eod = eod_waveform[:,2]
+    ax.fill_between(time, mean_eod + std_eod, mean_eod - std_eod,
+                    color='grey', alpha=0.2, zorder=1)
+    # annotate peaks:
     if peaks is not None and len(peaks)>0:
         maxa = np.max(peaks[:,2])
         for p in peaks:
             ax.scatter(1000.0*p[1], p[2], s=80,
-                       c=ckwargs['color'], edgecolors=ckwargs['color'])
+                       c=mkwargs['color'], edgecolors=mkwargs['color'])
             label = 'P%d' % p[0]
             if p[0] != 1:
                 label += '(%.0f%% @ %.0fus)' % (100.0*p[3], 1.0e6*p[1])
@@ -412,6 +429,22 @@ def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, peaks, ax, unit='a.u.',
                 ax.text(1000.0*p[1]+0.1, p[2]+np.sign(p[2])*0.05*maxa, label, ha='left', va=va)
             else:
                 ax.text(1000.0*p[1]-0.1, p[2]+np.sign(p[2])*0.05*maxa, label, ha='right', va=va)
+    if props is not None:
+        label = ''
+        if 'type' in props:
+            label += '%s-type fish\n' % props['type']
+        if 'EODf' in props:
+            label += 'EODf = %.1f Hz\n' % props['EODf']
+        if 'p-p-amplitude' in props:
+            label += 'p-p amplitude = %.3g %s\n' % (props['p-p-amplitude'], unit)
+        if 'n' in props:
+            label += 'n = %d EODs\n' % props['n']
+        if props['flipped']:
+            label += 'flipped\n'
+        ax.text(0.03, 0.97, label, transform = ax.transAxes, va='top')
+        if 'type' in props and props['type'] == 'wave':
+            lim = 750.0/props['EODf']
+            ax.set_xlim([-lim, +lim])
     ax.set_xlabel('Time [msec]')
     ax.set_ylabel('Amplitude [%s]' % unit)
 
@@ -432,17 +465,17 @@ def eod_waveform_plot(time, mean_eod, std_eod, fit_eod, peaks, ax, unit='a.u.',
     # data:
     if len(sys.argv) <= 1:
         samplerate = 44100.0
-        data = generate_biphasic_pulses(80.0, samplerate, 4.0, noise_std=0.05)
+        data = generate_biphasic_pulses(80.0, samplerate, 5.0, noise_std=0.02)
         unit = 'mV'
     else:
         rawdata, samplerate, unit = load_data(sys.argv[1], 0)
         data, _ = best_window(rawdata, samplerate)
 
     # analyse EOD:
-    mean_eod, std_eod, time, eod_times = eod_waveform(data, samplerate,
-                                                      start=-0.002, stop=0.002)
+    mean_eod, eod_times = eod_waveform(data, samplerate, start=-0.002, stop=0.002)
+    mean_eod, props, peaks, inter_eod_intervals = analyze_pulse(mean_eod, eod_times)
 
     # plot:
     fig, ax = plt.subplots()
-    eod_waveform_plot(time, mean_eod, std_eod, ax, unit=unit)
+    eod_waveform_plot(mean_eod, peaks, props, ax, unit=unit)
     plt.show()
diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py
index 4daa0063..0f5fd606 100644
--- a/thunderfish/thunderfish.py
+++ b/thunderfish/thunderfish.py
@@ -119,7 +119,6 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
     ax2.set_xlim(time[0], time[-1])
     ax2.set_xlabel('Time [sec]')
     ax2.set_ylabel('Amplitude [a.u.]')
-    #ax2.legend(bbox_to_anchor=(1.15, 1), frameon=False)
     ############
 
     # plot psd
@@ -144,19 +143,13 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
             usedax4 = True
         if axeod is ax5:
             usedax5 = True
-        fit_eod = []
-        if mean_eod.shape[1] > 3:
-            fit_eod = mean_eod[:,3]
-        eod_waveform_plot(mean_eod[:,0], mean_eod[:,1], mean_eod[:,2], fit_eod,
-                          peaks, axeod, unit=unit)
-        if props['flipped']:
-            axeod.text(0.03, 0.03, 'flipped', transform = axeod.transAxes, va='bottom')
-        axeod.set_title('Average EOD of %.1f Hz %sfish (n=%d EODs)'
-                        % (props['EODf'], props['type'], props['n']), fontsize=14, y=1.05)
-        if props['type'] == 'wave':
-            lim = 750.0/props['EODf']
-            axeod.set_xlim([-lim, +lim])
-        else:
+        pulse = True if props['type'] == 'pulse' else False
+        axeod.set_title('Average EOD of %.1f Hz %sfish'
+                        % (props['EODf'], props['type']), fontsize=14, y=1.05)
+        del props['type']
+        del props['EODf']
+        eod_waveform_plot(mean_eod, peaks, props, axeod, unit=unit)
+        if pulse:
             break
 
     ################

From c8dc5d56c4255b336af16b4c74a3138fcd900faf Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Sun, 4 Nov 2018 15:42:30 +0100
Subject: [PATCH 11/15] [eodanalysis] better plotting interface

---
 thunderfish/eodanalysis.py | 75 +++++++++++++-------------------------
 thunderfish/thunderfish.py | 21 +++++++----
 2 files changed, 40 insertions(+), 56 deletions(-)

diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py
index 10851b7e..f01aab01 100644
--- a/thunderfish/eodanalysis.py
+++ b/thunderfish/eodanalysis.py
@@ -360,7 +360,10 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1):
     return meod, props, peaks, intervals
 
 
-def eod_waveform_plot(eod_waveform, peaks, props, ax, unit='a.u.', **kwargs):
+def eod_waveform_plot(eod_waveform, peaks, ax, unit=None,
+                      mkwargs={'lw': 2, 'color': 'red'},
+                      skwargs={'alpha': 0.2, 'color': 'grey'},
+                      fkwargs={'lw': 6, 'color': 'steelblue'}):
     """Plot mean eod and its standard deviation.
 
     Parameters
@@ -370,51 +373,33 @@ def eod_waveform_plot(eod_waveform, peaks, props, ax, unit='a.u.', **kwargs):
         second column the (mean) eod waveform. The optional third column is the
         standard deviation and the optional fourth column is a fit on the waveform.
     peaks: 2_D arrays or None
-        List of peak properties (index, time, and amplitude) of a EOD pulse.
-    props: dict or None
-        The `props` returned by `analyze_wave()` or `analyze_pulse()`. Used to
-        annotate the plot with wave- or pulse type, EODf, ampltitude,
-        mark the waveform as flipped, and to set the range for the x-axis in case
-        of wavefish. 
+        List of peak properties (index, time, and amplitude) of a EOD pulse
+        as returned by `analyze_pulse()`.
     ax:
         Axis for plot
     unit: string
-        Unit of the data.
-    kwargs: dict
+        Optional unit of the data used for y-label.
+    mkwargs: dict
         Arguments passed on to the plot command for the mean eod.
+    skwargs: dict
+        Arguments passed on to the fill_between command for the standard deviation of the eod.
+    fkwargs: dict
+        Arguments passed on to the plot command for the fitted eod.
     """
-    # plot parameter for fit:
-    fkwargs = {}
-    if 'flw' in kwargs:
-        fkwargs['lw'] = kwargs['flw']
-        del kwargs['flw']
-    else:
-        fkwargs['lw'] = 6
-    if 'fcolor' in kwargs:
-        fkwargs['color'] = kwargs['fcolor']
-        del kwargs['fcolor']
-    else:
-        fkwargs['color'] = 'steelblue'
-    # plot parameter for mean eod waveform:
-    mkwargs = dict(kwargs)
-    if not 'lw' in mkwargs:
-        mkwargs['lw'] = 2
-    if not 'color' in mkwargs:
-        mkwargs['color'] = 'r'
-    # plot fit:
     ax.autoscale(True)
     time = 1000.0 * eod_waveform[:,0]
+    # plot fit:
     if eod_waveform.shape[1] > 3:
         ax.plot(time, eod_waveform[:,3], zorder=2, **fkwargs)
     # plot waveform:
     mean_eod = eod_waveform[:,1]
     ax.plot(time, mean_eod, zorder=3, **mkwargs)
     # plot standard deviation:
-    ax.autoscale(False)
     if eod_waveform.shape[1] > 2:
+        ax.autoscale(False)
         std_eod = eod_waveform[:,2]
-    ax.fill_between(time, mean_eod + std_eod, mean_eod - std_eod,
-                    color='grey', alpha=0.2, zorder=1)
+        ax.fill_between(time, mean_eod + std_eod, mean_eod - std_eod,
+                        zorder=1, **skwargs)
     # annotate peaks:
     if peaks is not None and len(peaks)>0:
         maxa = np.max(peaks[:,2])
@@ -429,24 +414,11 @@ def eod_waveform_plot(eod_waveform, peaks, props, ax, unit='a.u.', **kwargs):
                 ax.text(1000.0*p[1]+0.1, p[2]+np.sign(p[2])*0.05*maxa, label, ha='left', va=va)
             else:
                 ax.text(1000.0*p[1]-0.1, p[2]+np.sign(p[2])*0.05*maxa, label, ha='right', va=va)
-    if props is not None:
-        label = ''
-        if 'type' in props:
-            label += '%s-type fish\n' % props['type']
-        if 'EODf' in props:
-            label += 'EODf = %.1f Hz\n' % props['EODf']
-        if 'p-p-amplitude' in props:
-            label += 'p-p amplitude = %.3g %s\n' % (props['p-p-amplitude'], unit)
-        if 'n' in props:
-            label += 'n = %d EODs\n' % props['n']
-        if props['flipped']:
-            label += 'flipped\n'
-        ax.text(0.03, 0.97, label, transform = ax.transAxes, va='top')
-        if 'type' in props and props['type'] == 'wave':
-            lim = 750.0/props['EODf']
-            ax.set_xlim([-lim, +lim])
     ax.set_xlabel('Time [msec]')
-    ax.set_ylabel('Amplitude [%s]' % unit)
+    if unit is not None and len(unit)>0:
+        ax.set_ylabel('Amplitude [%s]' % unit)
+    else:
+        ax.set_ylabel('Amplitude')
 
 
 if __name__ == '__main__':
@@ -477,5 +449,10 @@ def eod_waveform_plot(eod_waveform, peaks, props, ax, unit='a.u.', **kwargs):
 
     # plot:
     fig, ax = plt.subplots()
-    eod_waveform_plot(mean_eod, peaks, props, ax, unit=unit)
+    eod_waveform_plot(mean_eod, peaks, ax, unit=unit)
+    props['unit'] = unit
+    label = '{type}-type fish\nEODf = {EODf:.1f} Hz\np-p amplitude = {p-p-amplitude:.3g} {unit}\nn = {n} EODs\n'.format(**props)
+    if props['flipped']:
+        label += 'flipped\n'
+    plt.text(0.03, 0.97, label, transform = ax.transAxes, va='top')
     plt.show()
diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py
index 0f5fd606..d990dec9 100644
--- a/thunderfish/thunderfish.py
+++ b/thunderfish/thunderfish.py
@@ -143,13 +143,20 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
             usedax4 = True
         if axeod is ax5:
             usedax5 = True
-        pulse = True if props['type'] == 'pulse' else False
-        axeod.set_title('Average EOD of %.1f Hz %sfish'
-                        % (props['EODf'], props['type']), fontsize=14, y=1.05)
-        del props['type']
-        del props['EODf']
-        eod_waveform_plot(mean_eod, peaks, props, axeod, unit=unit)
-        if pulse:
+        axeod.set_title('Average EOD of {EODf:.1f} Hz {type}-type fish'.format(**props),
+                        fontsize=14, y=1.05)
+        if unit == 'a.u.':
+            unit = ''
+        eod_waveform_plot(mean_eod, peaks, axeod, unit)
+        props['unit'] = unit
+        label = 'p-p amplitude = {p-p-amplitude:.3g} {unit}\nn = {n} EODs\n'.format(**props)
+        if props['flipped']:
+            label += 'flipped\n'
+        axeod.text(0.03, 0.97, label, transform = axeod.transAxes, va='top')
+        if props['type'] == 'wave':
+            lim = 750.0/props['EODf']
+            axeod.set_xlim([-lim, +lim])
+        else:
             break
 
     ################

From a2985120b2a2205b90e4d731c713b3648b7490d4 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Sun, 4 Nov 2018 19:05:51 +0100
Subject: [PATCH 12/15] [bestwindow] default values of percentile again

---
 thunderfish/bestwindow.py     | 12 ++++++------
 thunderfish/eodanalysis.py    |  4 ++--
 thunderfish/eventdetection.py |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/thunderfish/bestwindow.py b/thunderfish/bestwindow.py
index 62241bbf..3dd6398b 100644
--- a/thunderfish/bestwindow.py
+++ b/thunderfish/bestwindow.py
@@ -180,7 +180,7 @@ def clip_args(cfg, rate):
 
 
 def best_window_indices(data, samplerate, single=True, win_size=1., win_shift=0.1,
-                        th_factor=0.8, percentile=10.0, min_clip=-np.inf, max_clip=np.inf,
+                        th_factor=0.8, percentile=1.0, min_clip=-np.inf, max_clip=np.inf,
                         w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.5,
                         plot_data_func=None, **kwargs):
     """Find the window within data most suitable for subsequent analysis.
@@ -393,7 +393,7 @@ def best_window_indices(data, samplerate, single=True, win_size=1., win_shift=0.
 
 
 def best_window_times(data, samplerate, single=True, win_size=1., win_shift=0.1,
-                      th_factor=0.8, percentile=10.0, min_clip=-np.inf, max_clip=np.inf,
+                      th_factor=0.8, percentile=1.0, min_clip=-np.inf, max_clip=np.inf,
                       w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.5,
                       plot_data_func=None, **kwargs):
     """Find the window within data most suitable for subsequent analysis.
@@ -419,7 +419,7 @@ def best_window_times(data, samplerate, single=True, win_size=1., win_shift=0.1,
 
 
 def best_window(data, samplerate, single=True, win_size=1., win_shift=0.1,
-                th_factor=0.8, percentile=10.0, min_clip=-np.inf, max_clip=np.inf,
+                th_factor=0.8, percentile=1.0, min_clip=-np.inf, max_clip=np.inf,
                 w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0, tolerance=0.5,
                 plot_data_func=None, **kwargs):
     """Find the window within data most suitable for subsequent analysis.
@@ -498,7 +498,7 @@ def plot_best_window(data, rate, peak_idx, trough_idx, idx0, idx1,
 
     
 def add_best_window_config(cfg, single=True, win_size=1., win_shift=0.1,
-                           th_factor=0.8, percentile=10.0,
+                           th_factor=0.8, percentile=1.0,
                            min_clip=-np.inf, max_clip=np.inf,
                            w_cv_interv=1.0, w_ampl=1.0, w_cv_ampl=1.0,
                            tolerance=0.5):
@@ -604,8 +604,8 @@ def best_window_args(cfg):
 
     # compute best window:
     print("call bestwindow() function...")
-    best_window_indices(data, rate, single=False,
-                        win_size=4.0, win_shift=0.5, th_factor=0.8, percentile=10.0,
+    best_window_indices(data, rate, single=True,
+                        win_size=4.0, win_shift=0.5, th_factor=0.8, percentile=1.0,
                         min_clip=min_clip, max_clip=max_clip,
                         w_cv_ampl=10.0, tolerance=0.5,
                         plot_data_func=plot_best_window, ax=ax)
diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py
index f01aab01..a94aa560 100644
--- a/thunderfish/eodanalysis.py
+++ b/thunderfish/eodanalysis.py
@@ -15,7 +15,7 @@
 from .powerspectrum import decibel
 
 
-def eod_waveform(data, samplerate, th_factor=0.6, percentile=0.1,
+def eod_waveform(data, samplerate, th_factor=0.8, percentile=1.0,
                  period=None, start=None, stop=None):
     """Detect EODs in the given data, extract data snippets around each EOD,
     and compute a mean waveform with standard deviation.
@@ -230,7 +230,7 @@ def analyze_wave(eod, freq, n_harm=6):
     return meod, props, spec_data
 
 
-def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1):
+def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1.0):
     """
     Analyze the EOD waveform of a pulse-type fish.
     
diff --git a/thunderfish/eventdetection.py b/thunderfish/eventdetection.py
index 0a9445ec..b8071bd3 100644
--- a/thunderfish/eventdetection.py
+++ b/thunderfish/eventdetection.py
@@ -630,7 +630,7 @@ def minmax_threshold(data, samplerate=None, win_size=None, th_factor=0.8):
         return (np.max(data) - np.min(data)) * th_factor
 
 
-def percentile_threshold(data, samplerate=None, win_size=None, th_factor=0.8, percentile=10.0):
+def percentile_threshold(data, samplerate=None, win_size=None, th_factor=0.8, percentile=1.0):
     """Esimate a threshold for detect_peaks() based on an inter-percentile range of the data.
 
     The threshold is computed as the range between the percentile and

From d514d690a862b67890765e03939666d2872589c8 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Sun, 4 Nov 2018 21:42:08 +0100
Subject: [PATCH 13/15] [eventdetection] improved documentation of threshold
 estimators

---
 thunderfish/checkpulse.py     |  4 +--
 thunderfish/eventdetection.py | 68 ++++++++++++++++++++++-------------
 2 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/thunderfish/checkpulse.py b/thunderfish/checkpulse.py
index 1160ab49..9f8bad37 100644
--- a/thunderfish/checkpulse.py
+++ b/thunderfish/checkpulse.py
@@ -28,7 +28,7 @@
     pass
 
 
-def check_pulse_width(data, samplerate, th_factor=2.0, percentile=25.0,
+def check_pulse_width(data, samplerate, th_factor=0.8, percentile=1.0,
                       pulse_thresh=0.1, verbose=0, plot_data_func=None, **kwargs):
     """Detects if a fish is pulse- or wave-type based on the proportion of the time distance
     between a peak and its following trough, relative to the time between consecutive peaks.
@@ -339,7 +339,7 @@ def plot_psd_proportion(freqs, power, proportions, percentiles, pulse_fish,
     ax.set_ylabel('Power [dB]', fontsize=fs)
 
 
-def add_check_pulse_width_config(cfg, th_factor=2.0, percentile=25.0, pulse_thresh=0.1):
+def add_check_pulse_width_config(cfg, th_factor=0.8, percentile=1.0, pulse_thresh=0.1):
     """ Add parameter needed for check_pulse_width() as
     a new section to a configuration.
 
diff --git a/thunderfish/eventdetection.py b/thunderfish/eventdetection.py
index b8071bd3..d63a6b45 100644
--- a/thunderfish/eventdetection.py
+++ b/thunderfish/eventdetection.py
@@ -478,14 +478,18 @@ def widen_events(onsets, offsets, max_time, duration):
 
     
 def std_threshold(data, samplerate=None, win_size=None, th_factor=5.):
-    """Esimates a threshold for detect_peaks() based on the standard deviation of the data.
+    """Esimates a threshold for `detect_peaks()` based on the standard deviation of the data.
 
-    The threshold is computed as the standard deviation of the data multiplied with th_factor.
+    The threshold is computed as the standard deviation of the data
+    multiplied with `th_factor`.
 
-    If samplerate and win_size is given, then the threshold is computed for
-    each non-overlapping window of duration win_size separately.
+    In case of Gaussian distributed data, setting `th_factor=2.0` (two standard deviations)
+    captures 68% of the data, `th_factor=4.0` captures 95%, and `th_factor=6.0` 99.7%.
+
+    If `samplerate` and `win_size` is given, then the threshold is computed for
+    each non-overlapping window of duration `win_size` separately.
     In this case the returned threshold is an array of the same size as data.
-    Without a samplerate and win_size a single threshold value determined from
+    Without a `samplerate` and `win_size` a single threshold value determined from
     the whole data array is returned.
 
     Parameters
@@ -520,13 +524,16 @@ def std_threshold(data, samplerate=None, win_size=None, th_factor=5.):
     
 def hist_threshold(data, samplerate=None, win_size=None, th_factor=5.,
                    nbins=100, hist_height=1.0/np.sqrt(np.e)):
-    """Esimate a threshold for detect_peaks() based on a histogram of the data.
+    """Esimate a threshold for `detect_peaks()` based on a histogram of the data.
+
+    The standard deviation of the data is estimated from half the
+    width of the histogram of the data at `hist_height` relative height.
+    This estimates the data's standard deviation by ignoring tails of the distribution.
 
-    The standard deviation of the data is estimated from the
-    width of the histogram of the data at hist_height relative height.
+    However, you need enough data to robustly estimate the histogram.
 
-    If samplerate and win_size is given, then the threshold is computed for
-    each non-overlapping window of duration win_size separately.
+    If `samplerate` and `win_size` is given, then the threshold is computed for
+    each non-overlapping window of duration `win_size` separately.
     In this case the returned threshold is an array of the same size as data.
     Without a samplerate and win_size a single threshold value determined from
     the whole data array is returned.
@@ -585,13 +592,13 @@ def hist_threshold(data, samplerate=None, win_size=None, th_factor=5.,
 
     
 def minmax_threshold(data, samplerate=None, win_size=None, th_factor=0.8):
-    """Esimate a threshold for detect_peaks() based on minimum and maximum values of the data.
+    """Esimate a threshold for `detect_peaks()` based on minimum and maximum values of the data.
 
     The threshold is computed as the difference between maximum and
-    minimum value of the data multiplied with th_factor.
+    minimum value of the data multiplied with `th_factor`.
 
-    If samplerate and win_size is given, then the threshold is computed for
-    each non-overlapping window of duration win_size separately.
+    If `samplerate` and `win_size` is given, then the threshold is computed for
+    each non-overlapping window of duration `win_size` separately.
     In this case the returned threshold is an array of the same size as data.
     Without a samplerate and win_size a single threshold value determined from
     the whole data array is returned.
@@ -619,10 +626,8 @@ def minmax_threshold(data, samplerate=None, win_size=None, th_factor=0.8):
 
         for inx0 in range(0, len(data), win_size_indices):
             inx1 = inx0 + win_size_indices
-
             window_min = np.min(data[inx0:inx1])
             window_max = np.max(data[inx0:inx1])
-
             threshold[inx0:inx1] = (window_max - window_min) * th_factor
         return threshold
 
@@ -630,21 +635,34 @@ def minmax_threshold(data, samplerate=None, win_size=None, th_factor=0.8):
         return (np.max(data) - np.min(data)) * th_factor
 
 
-def percentile_threshold(data, samplerate=None, win_size=None, th_factor=0.8, percentile=1.0):
-    """Esimate a threshold for detect_peaks() based on an inter-percentile range of the data.
+def percentile_threshold(data, samplerate=None, win_size=None, th_factor=1.0, percentile=1.0):
+    """Esimate a threshold for `detect_peaks()` based on an inter-percentile range of the data.
 
     The threshold is computed as the range between the percentile and
     100.0-percentile percentiles of the data multiplied with
     th_factor.
 
-    For very small values of percentile the estimated threshold
-    approaches the one returned by minmax_threshold() (for same values
-    of th_factor). For percentile=16.0 and Gaussian distributed data,
+    For very small values of `percentile` the estimated threshold
+    approaches the one returned by `minmax_threshold()` (for same values
+    of `th_factor`). For `percentile=16.0` and Gaussian distributed data,
     the returned theshold is twice the one returned by std_threshold()
-    (two times the standard deviation).
-
-    If samplerate and win_size is given, then the threshold is computed for
-    each non-overlapping window of duration win_size separately.
+    or `hist_threshold()`, i.e. twice the standard deviation.
+
+    If you have knowledge about how many data points are in the tails of
+    the distribution, then this method is preferred over
+    `hist_threhsold()`. For example, if you expect peaks that you want
+    to detect using `detect_peaks()` at an average rate of 10Hz and
+    these peaks are about 1ms wide, then you have a 1ms peak per 100ms
+    period, i.e. the peaks make up 1% of the distribution. So you might
+    want to set `percentile=1.0`. If you set it lower, then you might
+    choose `th_factor` slightly smaller than one to capture also smaller
+    peaks. Setting `percentile` slightly higher might not change the
+    estimated threshold too much, since you start incorporating the
+    noise floor with a large density, but you may want to set
+    `th_factor` larger than one to reduce false detections.
+
+    If `samplerate` and `win_size` is given, then the threshold is computed for
+    each non-overlapping window of duration `win_size` separately.
     In this case the returned threshold is an array of the same size as data.
     Without a samplerate and win_size a single threshold value determined from
     the whole data array is returned.

From 83c768db7b3899bdf3c207364563ec2ac9a88961 Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Tue, 6 Nov 2018 23:31:06 +0100
Subject: [PATCH 14/15] [eodanalysis] added single pulse power spectrum

---
 thunderfish/eodanalysis.py   | 24 ++++++++--
 thunderfish/powerspectrum.py |  2 +-
 thunderfish/thunderfish.py   | 90 ++++++++++++++++++++++--------------
 3 files changed, 77 insertions(+), 39 deletions(-)

diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py
index a94aa560..435f6761 100644
--- a/thunderfish/eodanalysis.py
+++ b/thunderfish/eodanalysis.py
@@ -12,7 +12,7 @@
 import numpy as np
 from scipy.optimize import curve_fit
 from .eventdetection import percentile_threshold, detect_peaks, snippets
-from .powerspectrum import decibel
+from .powerspectrum import psd, nfft_noverlap, decibel
 
 
 def eod_waveform(data, samplerate, th_factor=0.8, percentile=1.0,
@@ -230,7 +230,7 @@ def analyze_wave(eod, freq, n_harm=6):
     return meod, props, spec_data
 
 
-def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1.0):
+def analyze_pulse(eod, eod_times, thresh_fac=0.01, fresolution=1.0, percentile=1.0):
     """
     Analyze the EOD waveform of a pulse-type fish.
     
@@ -243,6 +243,8 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1.0):
         List of times of detected EOD peaks.
     thresh_fac: float
         Set the threshold for peak detection to the maximum pulse amplitude times this factor.
+    fresolution: float
+        The frequency resolution of the power spectrum of the single pulse.
     percentile: float
         Remove extreme values of the inter-pulse intervals when computing interval statistics.
         All intervals below the `percentile` and above the `100-percentile` percentile
@@ -274,6 +276,9 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1.0):
         peak index (1 is P1, i.e. the largest positive peak),
         time relative to largest positive peak, amplitude,
         and amplitude normalized to largest postive peak.
+    power: 2-D array
+        The power spectrum of a single pulse. First column are the frequencies,
+        second column the power in decibel scaled to the pulse rate.
     intervals: 1-D array
         List of inter-EOD intervals with extreme values removed.
     """
@@ -336,6 +341,19 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1.0):
     # analyze pulse timing:
     inter_pulse_intervals = np.diff(eod_times)
     period = np.mean(inter_pulse_intervals)
+
+    # power spectrum of single pulse:
+    samplerate = 1.0/(meod[1,0]-meod[0,0])
+    nfft, _ = nfft_noverlap(fresolution, samplerate)
+    n = len(meod)//4
+    nn = np.max([nfft, 2*n])
+    data = np.zeros(nn)
+    data[nn//2-n:nn//2+n] = meod[max_idx-n:max_idx+n,1]
+    power, freqs = psd(data, samplerate, fresolution)
+    # store and scale:
+    dbpower = np.zeros((len(freqs), 2))
+    dbpower[:,0] = freqs
+    dbpower[:,1] = decibel(power/period/period)
     
     # store properties:
     props = {}
@@ -357,7 +375,7 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, percentile=1.0):
         props['meaninterval'] = np.mean(intervals)
         props['stdinterval'] = np.std(intervals, ddof=1)
     
-    return meod, props, peaks, intervals
+    return meod, props, peaks, dbpower, intervals
 
 
 def eod_waveform_plot(eod_waveform, peaks, ax, unit=None,
diff --git a/thunderfish/powerspectrum.py b/thunderfish/powerspectrum.py
index 0ee401d2..66a7bde1 100644
--- a/thunderfish/powerspectrum.py
+++ b/thunderfish/powerspectrum.py
@@ -38,7 +38,7 @@ def next_power_of_two(n):
     return int(2 ** np.floor(np.log(n) / np.log(2.0) + 1.0-1e-8))
 
 
-def nfft_noverlap(freq_resolution, samplerate, overlap_frac, min_nfft=16):
+def nfft_noverlap(freq_resolution, samplerate, overlap_frac=0.5, min_nfft=16):
     """The required number of points for an FFT to achieve a minimum frequency resolution
     and the number of overlapping data points.
 
diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py
index d990dec9..d5d71d39 100644
--- a/thunderfish/thunderfish.py
+++ b/thunderfish/thunderfish.py
@@ -28,7 +28,7 @@
 
 def output_plot(base_name, pulse_fish, inter_eod_intervals,
                 raw_data, samplerate, idx0, idx1, fishlist, mean_eods, eod_props, peak_data,
-                unit, psd_data, power_n_harmonics, label_power, max_freq=3000.0,
+                spec_data, unit, psd_data, power_n_harmonics, label_power, max_freq=3000.0,
                 output_folder='', save_plot=False, show_plot=False):
     """
     Creates an output plot for the Thunderfish program.
@@ -62,6 +62,9 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
         Properties for each waveform in mean_eods.
     peak_data: list of 2_D arrays
         For each pulsefish a list of peak properties (index, time, and amplitude).
+    spec_data: list of 2_D arrays
+        For each pulsefish a power spectrum of the single pulse and for each wavefish
+        the relative amplitudes and phases of the harmonics.
     unit: string
         Unit of the trace and the mean EOD.
     psd_data: array
@@ -130,6 +133,7 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
                              frameon=False, bbox_to_anchor=(1.0, 1.1), loc='upper left',
                              title='EOD frequencies')
     plot_decibel_psd(ax3, psd_data[0][1], psd_data[0][0], max_freq=max_freq, color='blue')
+    #ax3.plot(spec_data[0][:,0], spec_data[0][:,1], 'r', lw=2)
     ax3.set_title('Powerspectrum (%d detected wave-fish)' % len(fishlist), y=1.05)
 
     ##########
@@ -161,35 +165,47 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
 
     ################
 
-    # plot inter EOD interval histogram
-    if len(inter_eod_intervals)>2:
+    if not usedax5 and eod_props[0]['type'] == 'pulse':
         usedax5 = True
-        tmp_period = 1000. * np.mean(inter_eod_intervals)
-        tmp_period = tmp_period - tmp_period % 0.05
-        inter_eod_intervals *= 1000.  # transform sec in msec
-        median_IPI = 1000. * eod_props[0]['medianinterval']
-        mean_IPI = 1000. * eod_props[0]['meaninterval']
-        std_IPI = 1000. * eod_props[0]['stdinterval']
-        n, edges = np.histogram(inter_eod_intervals, bins=np.arange(tmp_period - 5., tmp_period + 5., 0.05))
-
-        ax5.bar(edges[:-1], n, edges[1] - edges[0] - 0.001)
-        median_line, = ax5.plot([median_IPI, median_IPI], [0, np.max(n)], '--', color='red', lw=2, label='median %.2f ms' % median_IPI)
-        ax5.set_xlabel('inter EOD interval [ms]')
-        ax5.set_ylabel('n')
-        ax5.set_title('Inter-EOD-interval histogram', fontsize=14, y=1.05)
-
-        max_IPI = np.ceil(np.max(inter_eod_intervals)+0.5)
-        if max_IPI/median_IPI < 1.2:
-            max_IPI = np.ceil(1.2*median_IPI)
-        min_IPI = np.floor(np.min(inter_eod_intervals)-0.5)
-        if min_IPI/median_IPI > 0.8:
-            min_IPI = np.floor(0.8*median_IPI)
-        ax5.set_xlim(min_IPI, max_IPI)
-        nopatch = mpatches.Patch(color='white', alpha=0.0)
-        ax5.legend([median_line, nopatch],
-                   ['median %.2f ms' % median_IPI,
-                    '(%.2f +/- %.2f ms)' % (mean_IPI, std_IPI)],
-                    loc='upper right', frameon=False)
+        smax = np.max(spec_data[0][:,1])
+        ax5.plot(spec_data[0][:,0], spec_data[0][:,1] - smax, 'b', lw=3)
+        ax5.set_xscale('log')
+        ax5.set_xlim(1.0, 10000.0)
+        ax5.set_ylim(-60.0, 1.0)
+        ax5.set_xlabel('Frequency [Hz]')
+        ax5.set_ylabel('Power [dB]')
+        ax5.set_title('Single-pulse spectrum', fontsize=14, y=1.05)
+        
+
+    ## # plot inter EOD interval histogram
+    ## if len(inter_eod_intervals)>2:
+    ##     usedax5 = True
+    ##     tmp_period = 1000. * np.mean(inter_eod_intervals)
+    ##     tmp_period = tmp_period - tmp_period % 0.05
+    ##     inter_eod_intervals *= 1000.  # transform sec in msec
+    ##     median_IPI = 1000. * eod_props[0]['medianinterval']
+    ##     mean_IPI = 1000. * eod_props[0]['meaninterval']
+    ##     std_IPI = 1000. * eod_props[0]['stdinterval']
+    ##     n, edges = np.histogram(inter_eod_intervals, bins=np.arange(tmp_period - 5., tmp_period + 5., 0.05))
+
+    ##     ax5.bar(edges[:-1], n, edges[1] - edges[0] - 0.001)
+    ##     median_line, = ax5.plot([median_IPI, median_IPI], [0, np.max(n)], '--', color='red', lw=2, label='median %.2f ms' % median_IPI)
+    ##     ax5.set_xlabel('inter EOD interval [ms]')
+    ##     ax5.set_ylabel('n')
+    ##     ax5.set_title('Inter-EOD-interval histogram', fontsize=14, y=1.05)
+
+    ##     max_IPI = np.ceil(np.max(inter_eod_intervals)+0.5)
+    ##     if max_IPI/median_IPI < 1.2:
+    ##         max_IPI = np.ceil(1.2*median_IPI)
+    ##     min_IPI = np.floor(np.min(inter_eod_intervals)-0.5)
+    ##     if min_IPI/median_IPI > 0.8:
+    ##         min_IPI = np.floor(0.8*median_IPI)
+    ##     ax5.set_xlim(min_IPI, max_IPI)
+    ##     nopatch = mpatches.Patch(color='white', alpha=0.0)
+    ##     ax5.legend([median_line, nopatch],
+    ##                ['median %.2f ms' % median_IPI,
+    ##                 '(%.2f +/- %.2f ms)' % (mean_IPI, std_IPI)],
+    ##                 loc='upper right', frameon=False)
 
     # cosmetics
     for ax in [ax2, ax3, ax4, ax5]:
@@ -370,10 +386,11 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                          percentile=cfg.value('pulseWidthPercentile'),
                          th_factor=cfg.value('pulseWidthThresholdFactor'),
                          start=-mean_eod_window, stop=mean_eod_window)
-        mean_eod, props, peaks, inter_eod_intervals = analyze_pulse(mean_eod, eod_times)
+        mean_eod, props, peaks, power, inter_eod_intervals = analyze_pulse(mean_eod, eod_times,
+                                                                    fresolution=minfres)
         # TODO: add config parameter to analyze_pulse
         mean_eods.append(mean_eod)
-        spec_data.append([])
+        spec_data.append(power)
         peak_data.append(peaks)
         eod_props.append(props)
 
@@ -422,9 +439,12 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
             write_csv(os.path.join(output_folder, outfilename + '-waveform-%d.csv' % i),
                       header, mean_eod)
             if len(sdata)>0:
-                header = ['harmonics', 'frequency (Hz)', 'amplitude', 'phase']
-                if sdata.shape[1] > 4:
-                    header.append('dataampl')
+                if sdata.shape[1] == 2:
+                    header = ['frequency (Hz)', 'power (dB)']
+                else:
+                    header = ['harmonics', 'frequency (Hz)', 'amplitude', 'phase']
+                    if sdata.shape[1] > 4:
+                        header.append('dataampl')
                 write_csv(os.path.join(output_folder, outfilename + '-spectrum-%d.csv' % i),
                           header, sdata)
             if len(pdata)>0:
@@ -435,7 +455,7 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
     if save_plot or not save_csvs:
         output_plot(outfilename, pulse_fish,
                     inter_eod_intervals, raw_data, samplerate, idx0, idx1, fishlist,
-                    mean_eods, eod_props, peak_data, unit,
+                    mean_eods, eod_props, peak_data, spec_data, unit,
                     psd_data, cfg.value('powerNHarmonics'), True, 3000.0, output_folder,
                     save_plot=save_plot, show_plot=not save_csvs)
 

From 523d48e690435a22fe9a86d7ed473b4dd09baf9b Mon Sep 17 00:00:00 2001
From: Jan Benda <jan.benda@uni-tuebingen.de>
Date: Thu, 8 Nov 2018 23:01:16 +0100
Subject: [PATCH 15/15] [thunderfish] remove wavefish based on pulse spectrum

---
 thunderfish/eodanalysis.py | 25 ++++++-----
 thunderfish/thunderfish.py | 92 ++++++++++++++++++--------------------
 thunderfish/version.py     |  2 +-
 3 files changed, 60 insertions(+), 59 deletions(-)

diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py
index 435f6761..b2754050 100644
--- a/thunderfish/eodanalysis.py
+++ b/thunderfish/eodanalysis.py
@@ -230,7 +230,8 @@ def analyze_wave(eod, freq, n_harm=6):
     return meod, props, spec_data
 
 
-def analyze_pulse(eod, eod_times, thresh_fac=0.01, fresolution=1.0, percentile=1.0):
+def analyze_pulse(eod, eod_times, thresh_fac=0.01, min_dist=50.0e-6,
+                  fresolution=1.0, percentile=1.0):
     """
     Analyze the EOD waveform of a pulse-type fish.
     
@@ -243,6 +244,8 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, fresolution=1.0, percentile=1
         List of times of detected EOD peaks.
     thresh_fac: float
         Set the threshold for peak detection to the maximum pulse amplitude times this factor.
+    min_dist: float
+        Minimum distance between peak and troughs of the pulse.
     fresolution: float
         The frequency resolution of the power spectrum of the single pulse.
     percentile: float
@@ -278,7 +281,7 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, fresolution=1.0, percentile=1
         and amplitude normalized to largest postive peak.
     power: 2-D array
         The power spectrum of a single pulse. First column are the frequencies,
-        second column the power in decibel scaled to the pulse rate.
+        second column the power.
     intervals: 1-D array
         List of inter-EOD intervals with extreme values removed.
     """
@@ -326,9 +329,12 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, fresolution=1.0, percentile=1
     # find smaller peaks:
     peak_idx, trough_idx = detect_peaks(meod[:,1], max_ampl*thresh_fac)
     peak_l = np.sort(np.concatenate((peak_idx, trough_idx)))
-    # remove mutliple peaks:
-    peak_list = [prange[np.argmax(np.abs(meod[prange,1]))]
-                 for prange in np.split(peak_l, np.where(np.diff(meod[peak_l,1]>0.0)!=0)[0]+1)]
+    # remove mutliple peaks that do not oszillate around zero:
+    peak_list = np.array([prange[np.argmax(np.abs(meod[prange,1]))]
+                 for prange in np.split(peak_l, np.where(np.diff(meod[peak_l,1]>0.0)!=0)[0]+1)])
+    # remove multiple peaks that are too close and too small:
+    ridx = [(k, k+1) for k in np.where((np.diff(meod[peak_list,0]) < min_dist) & (meod[peak_list[:-1],1]<0.1))]
+    peak_list = np.delete(peak_list, ridx)
     # find P1:
     p1i = np.where(peak_list == max_idx)[0][0]
     offs = 0 if p1i <= 2 else p1i - 2
@@ -350,10 +356,9 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, fresolution=1.0, percentile=1
     data = np.zeros(nn)
     data[nn//2-n:nn//2+n] = meod[max_idx-n:max_idx+n,1]
     power, freqs = psd(data, samplerate, fresolution)
-    # store and scale:
-    dbpower = np.zeros((len(freqs), 2))
-    dbpower[:,0] = freqs
-    dbpower[:,1] = decibel(power/period/period)
+    ppower = np.zeros((len(freqs), 2))
+    ppower[:,0] = freqs
+    ppower[:,1] = power
     
     # store properties:
     props = {}
@@ -375,7 +380,7 @@ def analyze_pulse(eod, eod_times, thresh_fac=0.01, fresolution=1.0, percentile=1
         props['meaninterval'] = np.mean(intervals)
         props['stdinterval'] = np.std(intervals, ddof=1)
     
-    return meod, props, peaks, dbpower, intervals
+    return meod, props, peaks, ppower, intervals
 
 
 def eod_waveform_plot(eod_waveform, peaks, ax, unit=None,
diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py
index d5d71d39..c43ce002 100644
--- a/thunderfish/thunderfish.py
+++ b/thunderfish/thunderfish.py
@@ -19,7 +19,7 @@
 from .bestwindow import clip_amplitudes, best_window_indices
 from .harmonicgroups import add_psd_peak_detection_config, add_harmonic_groups_config
 from .checkpulse import check_pulse_width, add_check_pulse_width_config, check_pulse_width_args
-from .powerspectrum import plot_decibel_psd, multi_resolution_psd
+from .powerspectrum import decibel, plot_decibel_psd, multi_resolution_psd
 from .harmonicgroups import harmonic_groups, harmonic_groups_args, psd_peak_detection_args, fundamental_freqs, fundamental_freqs_and_power, colors_markers, plot_harmonic_groups
 from .consistentfishes import consistent_fishes
 from .eodanalysis import eod_waveform, analyze_wave, analyze_pulse, eod_waveform_plot
@@ -125,6 +125,8 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
     ############
 
     # plot psd
+    if len(spec_data) > 0 and len(spec_data[0]) > 0.0:
+        ax3.plot(spec_data[0][:,0], decibel(5.0*eod_props[0]['EODf']**2.0*spec_data[0][:,1]), 'k', lw=1, alpha=0.2)
     if len(fishlist) > 0:
         colors, markers = colors_markers()
         plot_harmonic_groups(ax3, fishlist, max_freq=max_freq, max_groups=12, sort_by_freq=True,
@@ -133,7 +135,6 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
                              frameon=False, bbox_to_anchor=(1.0, 1.1), loc='upper left',
                              title='EOD frequencies')
     plot_decibel_psd(ax3, psd_data[0][1], psd_data[0][0], max_freq=max_freq, color='blue')
-    #ax3.plot(spec_data[0][:,0], spec_data[0][:,1], 'r', lw=2)
     ax3.set_title('Powerspectrum (%d detected wave-fish)' % len(fishlist), y=1.05)
 
     ##########
@@ -165,12 +166,13 @@ def output_plot(base_name, pulse_fish, inter_eod_intervals,
 
     ################
 
-    if not usedax5 and eod_props[0]['type'] == 'pulse':
+    if not usedax5 and len(eod_props) > 0 and eod_props[0]['type'] == 'pulse':
         usedax5 = True
-        smax = np.max(spec_data[0][:,1])
-        ax5.plot(spec_data[0][:,0], spec_data[0][:,1] - smax, 'b', lw=3)
-        ax5.set_xscale('log')
+        db = decibel(spec_data[0][:,1])
+        smax = np.nanmax(db)
+        ax5.plot(spec_data[0][:,0], db - smax, 'b', lw=3)
         ax5.set_xlim(1.0, 10000.0)
+        ax5.set_xscale('log')
         ax5.set_ylim(-60.0, 1.0)
         ax5.set_xlabel('Frequency [Hz]')
         ax5.set_ylabel('Power [dB]')
@@ -244,7 +246,7 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
     cfg.add_section('Waveform selection:')
     cfg.add('maximumClippedFraction', 0.01, '', 'Take waveform of the fish with the highest power only if the fraction of clipped signals is below this value.')
     cfg.add('maximumFirstHarmonicAmplitude', 2.0, '', 'Skip waveform of wave-type fish if the ampltude of the first harmonic is higher than this factor times the amplitude of the fundamental.')
-    cfg.add('maximumSecondHarmonicAmplitude', 0.2, '', 'Skip waveform of wave-type fish if the ampltude of the second harmonic is higher than this factor times the amplitude of the fundamental. That is, the waveform appears to have twice the frequency than the fundamental.')
+    cfg.add('maximumSecondHarmonicAmplitude', 0.8, '', 'Skip waveform of wave-type fish if the ampltude of the second harmonic is higher than this factor times the amplitude of the fundamental. That is, the waveform appears to have twice the frequency than the fundamental.')
     cfg.add('maximumRMSError', 0.05, '', 'Skip waveform of wave-type fish if the root-mean-squared error relative to the peak-to-peak amplitude is larger than this number.')
     
     # load configuration from working directory and data directories:
@@ -335,50 +337,23 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                 print('  none')
         fishlists.append(fishlist)
     # filter the different fishlists to get a fishlist with consistent fishes:
-    filtered_fishlist = consistent_fishes(fishlists, df_th=cfg.value('frequencyThreshold'))
+    fishlist = consistent_fishes(fishlists, df_th=cfg.value('frequencyThreshold'))
     if verbose > 0:
-        if len(filtered_fishlist) > 0:
+        if len(fishlist) > 0:
             print('fundamental frequencies consistent in all power spectra:')
-            print('  ' + ' '.join(['%.1f' % freq[0, 0] for freq in filtered_fishlist]))
+            print('  ' + ' '.join(['%.1f' % freq[0, 0] for freq in fishlist]))
         else:
             print('no fundamental frequencies are consistent in all power spectra')
 
-    # remove multiples of pulse fish frequency from fishlist:
-    if pulse_fish:
-        fishlist = []
-        for fish in filtered_fishlist:
-            eodf = fish[0,0]
-            n = np.round(eodf*pulse_period)
-            pulse_eodf = n/pulse_period
-            if verbose > 0:
-                print('check wavefish at %.1f Hz versus %d-th harmonic of pulse frequency %.1f Hz at resolution of %.1f Hz'
-                      % (eodf, n, pulse_eodf, 0.5*n*deltaf))
-            # TODO: make the threshold a parameter!
-            if np.abs(eodf-pulse_eodf) > 0.5*n*deltaf:
-                fishlist.append(fish)
-            else:
-                if verbose > 0:
-                    print('removed frequency %.1f Hz, because it is multiple of pulsefish %.1f' % (eodf, pulse_eodf))
-    else:
-        fishlist = filtered_fishlist
-
-    # save fish frequencies and amplitudes:
-    if save_csvs and found_bestwindow:
-        if len(fishlist) > 0:
-            # write csv file with main EODf and corresponding power in dB of detected fishes:
-            csv_matrix = fundamental_freqs_and_power(fishlist, cfg.value('powerNHarmonics'))
-            csv_name = os.path.join(output_folder, outfilename + '-wavefish-eodfs.csv')
-            header = ['fundamental frequency (Hz)', 'power (dB)']
-            write_csv(csv_name, header, csv_matrix)
-    #if pulse_fish:
-        # TODO: write frequency amd parameter of pulse-fish to -pulsefish_eodfs.csv
-        
-    # analyse eod waveform:
+    # analysis results:
     eod_props = []
     mean_eods = []
     spec_data = []
     peak_data = []
-    inter_eod_intervals = []
+    intervals = []
+    power_thresh = []
+    
+    # analyse eod waveform of pulse-fish:
     if pulse_fish:
         mean_eod_window = 0.002
         mean_eod, eod_times = \
@@ -386,13 +361,26 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                          percentile=cfg.value('pulseWidthPercentile'),
                          th_factor=cfg.value('pulseWidthThresholdFactor'),
                          start=-mean_eod_window, stop=mean_eod_window)
-        mean_eod, props, peaks, power, inter_eod_intervals = analyze_pulse(mean_eod, eod_times,
-                                                                    fresolution=minfres)
+        mean_eod, props, peaks, power, intervals = analyze_pulse(mean_eod, eod_times,
+                                                                 fresolution=minfres)
         # TODO: add config parameter to analyze_pulse
         mean_eods.append(mean_eod)
         spec_data.append(power)
         peak_data.append(peaks)
         eod_props.append(props)
+        power_thresh = np.zeros(power.shape)
+        power_thresh[:,0] = power[:,0]
+        power_thresh[:,1] = 5.0*props['EODf']**2.0 * power[:,1]
+
+    if len(power_thresh) > 0:
+        n = len(fishlist)
+        for k, fish in enumerate(reversed(fishlist)):
+            df = power_thresh[1,0] - power_thresh[0,0]
+            hfrac = float(np.sum(fish[:,1] < power_thresh[np.array(fish[:,0]//df, dtype=int),1]))/float(len(fish[:,1]))
+            if hfrac >= 0.5:
+                fishlist.pop(n-1-k)
+                if verbose > 0:
+                    print('removed frequency %.1f Hz, because %.0f%% of the harmonics where below pulsefish threshold' % (fish[0,0], 100.0*hfrac))        
 
     # analyse EOD waveform of all wavefish:
     powers = np.array([np.sum(fish[:cfg.value('powerNHarmonics'), 1])
@@ -428,10 +416,11 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
         fishlist = []
         eod_props = []
         mean_eods = []
-        inter_eod_intervals = []
+        intervals = []
 
-    # write eod waveforms:
+    # write results to files:
     if save_csvs and found_bestwindow:
+        # eod waveforms:
         for i, (mean_eod, sdata, pdata) in enumerate(zip(mean_eods, spec_data, peak_data)):
             header = ['time (s)', 'mean', 'std']
             if mean_eod.shape[1] > 3:
@@ -440,7 +429,7 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                       header, mean_eod)
             if len(sdata)>0:
                 if sdata.shape[1] == 2:
-                    header = ['frequency (Hz)', 'power (dB)']
+                    header = ['frequency (Hz)', 'power']
                 else:
                     header = ['harmonics', 'frequency (Hz)', 'amplitude', 'phase']
                     if sdata.shape[1] > 4:
@@ -451,10 +440,17 @@ def thunderfish(filename, channel=0, save_csvs=False, save_plot=False,
                 header = ['P', 'time (ms)', 'amplitude', 'relaampl']
                 write_csv(os.path.join(output_folder, outfilename + '-peaks-%d.csv' % i),
                           header, pdata)
+        # wavefish frequencies and amplitudes:
+        if len(fishlist) > 0:
+            # write csv file with main EODf and corresponding power in dB of detected fishes:
+            csv_matrix = fundamental_freqs_and_power(fishlist, cfg.value('powerNHarmonics'))
+            csv_name = os.path.join(output_folder, outfilename + '-wavefish-eodfs.csv')
+            header = ['fundamental frequency (Hz)', 'power (dB)']
+            write_csv(csv_name, header, csv_matrix)
 
     if save_plot or not save_csvs:
         output_plot(outfilename, pulse_fish,
-                    inter_eod_intervals, raw_data, samplerate, idx0, idx1, fishlist,
+                    intervals, raw_data, samplerate, idx0, idx1, fishlist,
                     mean_eods, eod_props, peak_data, spec_data, unit,
                     psd_data, cfg.value('powerNHarmonics'), True, 3000.0, output_folder,
                     save_plot=save_plot, show_plot=not save_csvs)
diff --git a/thunderfish/version.py b/thunderfish/version.py
index 74d7938f..c6f8db61 100644
--- a/thunderfish/version.py
+++ b/thunderfish/version.py
@@ -1 +1 @@
-__version__='1.2.0'     # see http://semver.org/
+__version__='1.4.0'     # see http://semver.org/