diff --git a/thunderfish/consistentfishes.py b/thunderfish/consistentfishes.py index f5024466..f27c881d 100644 --- a/thunderfish/consistentfishes.py +++ b/thunderfish/consistentfishes.py @@ -1,18 +1,25 @@ +""" +Functions to compare fishlists created by the harmonicgroups.py module in order to create a create a fishlist with only +these fishes available in all fishlists. + +consistent_fishes(): Compares a list of fishlists and builds a consistent fishlist. +""" + import numpy as np import harmonicgroups as hg def find_consistency(fundamentals, df_th=1.): """ - Compares several lists to find values that are, with a certain threshhold, available in every list. + Compares lists of floats to find these values consistent in every list. (with a certain threshold) - This function gets a list of lists conatining values as input. For every value in the first list this function is - looking in the other lists if there is a entry with the same value +- threshold. The vector consistancy_help - has the length of the first list of fundamentals and it's entries are all ones. When a value from the first list is - also available in another list this function adds 1 to that particular entry in the consostany_help vector. After - comparina all values from the first list with all values of the other lists the consostancy_help vector got values - between 1 and len(fundamentals). The indices where this vector is equal to len(fundamentals) are also the indices of - fishes in the first fishlist that are available in every fishlist. + Every value of the first list is compared to the values of the other lists. The consistency_help array consists in + the beginning of ones, has the same length as the first list and is used to keep the information about values that + are available in several lists. If the difference between a value of the first list and a value of another list is + below the threshold the entry of the consistency_help array is added 1 to at the index of the value in the value in + the first list. The indices of the consistency_help array that are equal to the amount of lists compared are the + indices of the values of the first list that are consistent in all lists. The consistent value array and the indices + are returned. :param fundamentals: (2-D array) list of lists containing the fundamentals of a fishlist. @@ -38,7 +45,7 @@ def find_consistency(fundamentals, df_th=1.): def consistent_fishes_plot(fishlists, filtered_fishlist, ax, fs): """ - Creates an axis for plotting to visualize what this modul did. + Creates an axis for plotting all lists and the consistent values marked with a bar. :param filtered_fishlist: (3-D array) Contains power and frequency of these fishes that hve been detected in several powerspectra using different resolutions. @@ -64,10 +71,11 @@ def consistent_fishes_plot(fishlists, filtered_fishlist, ax, fs): ax.set_xlabel('list no.', fontsize=fs) -def consistent_fishes_psd_plot(filtered_fishlist, ax): +def consistent_fishes_psd_plot(filtered_fishlist, ax, max_freq= 3000): """ - This function can be passed to the consistent fishes plot as plot_data_func. It's purpose is to plot the four most - powerfull fundamental frequencies into a powerspectrum plot. + Creates an axis for plotting the power of the four most powerful detected fishes with its fundamental and harmonics. + + The axis passed to this function should already contain the plotted power-spectrum in dB. :param filtered_fishlist: (3-D array) Contains power and frequency of these fishes that hve been detected in several powerspectra using different resolutions. @@ -83,14 +91,16 @@ def consistent_fishes_psd_plot(filtered_fishlist, ax): x = np.array([filtered_fishlist[fish][harmonic][0] for harmonic in range(len(filtered_fishlist[fish]))]) y = np.array([filtered_fishlist[fish][harmonic][1] for harmonic in range(len(filtered_fishlist[fish]))]) y[y < 1e-20] = np.nan - ax.plot(x, 10.0 * np.log10(y), 'o', markersize=8, label='%.1f' % filtered_fishlist[fish][0][0]) + ax.plot(x[x < max_freq], 10.0 * np.log10(y[x < max_freq]), 'o', markersize=8, label='%.1f' % filtered_fishlist[fish][0][0]) ax.legend(loc='upper right', frameon=False, numpoints=1) def consistent_fishes(fishlists, verbose=0, plot_data_func=None, **kwargs): """ - This function gets several fishlists, compares them, and gives back one fishlist that only contains these fishes - that are available in every given fishlist. This is the main function that calls the other functions in the code. + Compares several fishlists to create a fishlist only containing these fishes present in all these fishlists. + + Therefore several functions are used to first extract the fundamental frequencies of every fish in each fishlist, + before comparing them and building a fishlist only containing these fishes present in all fishlists. :param fishlists: (4-D array) List of fishlists with harmonics and each frequency and power. fishlists[fishlist][fish][harmonic][frequency, power] diff --git a/thunderfish/create_parallel_jobs.py b/thunderfish/create_parallel_jobs.py index 55934a05..c9f5108d 100644 --- a/thunderfish/create_parallel_jobs.py +++ b/thunderfish/create_parallel_jobs.py @@ -3,6 +3,7 @@ import numpy as np import glob import sys +import os if __name__ == '__main__': @@ -15,7 +16,7 @@ job_name = 'parallel_jobs_for_' + folders[-2] + '_' + folders[-1] + '.txt' rec_dir = sys.argv[1] - rec_files = glob.glob(os.path.join(rec_dir, '*') + rec_files = glob.glob(os.path.join(rec_dir, '*')) task_list = ['python thunderfish.py ' + e for e in rec_files] if len(task_list) > 0: diff --git a/thunderfish/eodanalysis.py b/thunderfish/eodanalysis.py index b1cb51cf..e7feb1ed 100644 --- a/thunderfish/eodanalysis.py +++ b/thunderfish/eodanalysis.py @@ -1,14 +1,23 @@ +""" +Detects EODs in a given dataset and computes their mean waveform. + +eod_waveform(): calculates a mean EOD of a given dataset. +""" + import numpy as np import peakdetection as pkd def eod_waveform(data, samplerate, th_factor=0.8, percentile=0.1, start=None, stop=None): - """Detects EODs in the given data and computes their mean waveform. + """Detects EODs in the given data, extracts data snippets around each EOD and computes a mean waveform with standard + deviation. :param data: (1-D array) the data to be analysed. :param samplerate: (float) samplerate of the data in Hertz. - :param percentile: (int). percentile parameter for the peakdetection.percentile_threshold() function used to estimate thresholds for detecting EOD peaks in the data. - :param th_factor: (float). th_factor parameter for the peakdetection.percentile_threshold() function used to estimate thresholds for detecting EOD peaks in the data. + :param percentile: (int). percentile parameter for the peakdetection.percentile_threshold() function used to + estimate thresholds for detecting EOD peaks in the data. + :param th_factor: (float). th_factor parameter for the peakdetection.percentile_threshold() function used to + estimate thresholds for detecting EOD peaks in the data. :param start: (float or None) start time of EOD snippets relative to peak. :param stop: (float or None) stop time of EOD snippets relative to peak. :return mean_eod (1-D array) Average of the EOD snippets. @@ -64,6 +73,7 @@ def eod_waveform_plot(time, mean_eod, std_eod, ax, unit='a.u.'): ax.set_xlabel('Time [msec]') ax.set_ylabel('Amplitude [%s]' % unit) ax.set_xlim(1000.0*min(time), max(1000.0*time)) + ax.legend(loc='upper right', frameon=False) if __name__ == '__main__': diff --git a/thunderfish/powerspectrum.py b/thunderfish/powerspectrum.py index 18862edb..bfe1d95f 100644 --- a/thunderfish/powerspectrum.py +++ b/thunderfish/powerspectrum.py @@ -1,5 +1,8 @@ """ +Functions to calculate a powerspectrum o n the basis of a given dataset, a given samplingrate and a given +frequencyresolution for the psd. +multi_resolution_psd(): Performs the steps to calculate a powerspectrum. """ import numpy as np @@ -15,16 +18,6 @@ def next_power_of_two(n): return int(2 ** np.floor(np.log(n) / np.log(2.0) + 1.0-1e-8)) -def nfft(freq_resolution, samplerate): - """The required number of points for an FFT to achieve a minimum frequency resolution. - - :param freq_resolution: (float) the minimum required frequency resolution in Hertz. - :param samplerate: (float) the sampling rate of the data in Hert. - :return nfft: (int) the number of FFT points - """ - return next_power_of_two(samplerate / fresolution) - - def nfft_noverlap(freq_resolution, samplerate, overlap_frac, min_nfft=0): """The required number of points for an FFT to achieve a minimum frequency resolution and the number of overlapping data points. @@ -48,8 +41,7 @@ def psd(data, samplerate, fresolution, detrend=mlab.detrend_none, sides='default', scale_by_freq=None): """Power spectrum density of a given frequency resolution. - From the requested frequency resolution and the samplerate - nfft is computed. + From the requested frequency resolution and the samplerate nfft is computed. :param data: (1-D array) data array you want to calculate a psd of. :param samplerate: (float) sampling rate of the data in Hertz. @@ -68,7 +60,7 @@ def psd(data, samplerate, fresolution, detrend=mlab.detrend_none, def plot_decibel_psd(power, freqs, ax, max_freq=3000, fs=12, color='blue', alpha=1.): """ - Plots a powerspectum in decibel. + Plots the powerspectum in decibel. :param power: (1-D array) power array of a psd. :param freqs: (1-D array) frequency array of a psd. @@ -81,19 +73,20 @@ def plot_decibel_psd(power, freqs, ax, max_freq=3000, fs=12, color='blue', alpha decibel_psd = power.copy() decibel_psd[decibel_psd < 1e-20] = np.nan decibel_psd[decibel_psd >= 1e-20] = 10.0 * np.log10(decibel_psd[decibel_psd >= 1e-20]) - ax.plot(freqs, decibel_psd, color=color, alpha=alpha) + ax.plot(freqs[freqs < max_freq], decibel_psd[freqs < max_freq], color=color, alpha=alpha) ax.set_ylabel('Power [dB]', fontsize=fs) ax.set_xlabel('Frequency [Hz]', fontsize=fs) - ax.set_xlim([0, max_freq]) def multi_resolution_psd(data, samplerate, fresolution=0.5, detrend=mlab.detrend_none, window=mlab.window_hanning, overlap=0.5, pad_to=None, sides='default', scale_by_freq=None): - """This function is performing the steps to calculate a powerspectrum on the basis of a given dataset, a given - samplingrate and a given frequencyresolution for the psd. Therefore two other functions are called to first - calculate the nfft value and second calculate the powerspectrum. + """Performs the steps to calculate a powerspectrum on the basis of a given dataset, a given samplingrate and a given + frequencyresolution for the psd. + + Two other functions are called to first calculate the nfft value and second calculate the powerspectrum. The given + frequencyresolution can be a float or a list/array of floats. (for further argument information see numpy.psd documentation) :param data: (1-D array) data array you want to calculate a psd of. diff --git a/thunderfish/thunderfish.py b/thunderfish/thunderfish.py index 3dd1d0a8..6473ae43 100644 --- a/thunderfish/thunderfish.py +++ b/thunderfish/thunderfish.py @@ -11,13 +11,136 @@ import matplotlib.pyplot as plt -def main(audio_file, channel=0, output_folder='', beat_plot=False, verbose=0): - # create figure and axis for the outputplot - fig = plt.figure(facecolor='white', figsize=(12., 8.)) - ax1 = fig.add_subplot(2, 2, (3, 4)) # axis for the psd - ax2 = fig.add_subplot(2, 2, 2) # axis for the mean eod - # TODO: add axis at the top showing the whole trace and the best window +def output_plot(audio_file, pulse_fish_width, pulse_fish_psd, EOD_count, mean_IPI, inter_eod_intervals, std_IPI, + raw_data, samplerate, idx0, idx1, filtered_fishlist, period, time_eod, mean_eod, std_eod, unit, + psd_data): + """ + Creates an output plot for the Thunderfish program. + + This output contains the raw trace where the analysis window is marked, the power-spectrum of this analysis window + where the detected fish are marked, a mean EOD plot, a histogram of the inter EOD interval and further information + about the recording that is analysed. + + :param axs: (list) list of axis fo plots. + :param audio_file: (string) path to and name of audiofile. + :param pulse_fish_width: (bool) True if a pulsefish has been detected by analysis of the EODs. + :param pulse_fish_psd: (bool) True if a pulsefish has been detected by analysis of the PSD. + :param EOD_count: (int) number of detected EODs. + :param mean_IPI: (float) mean inter EOD interval. + :param inter_eod_intervals: (array) time difference from one to another detected EOD. + :param std_IPI: (float) standard deviation of the inter EOD interval. + :param raw_data: (array) dataset. + :param samplerate: (float) samplerate of the dataset. + :param idx0: (float) index of the beginning of the analysis window in the dataset. + :param idx1: (float) index of the end of the analysis window in the dataset. + :param filtered_fishlist: (array) frequency and power of fundamental frequency/harmonics of several fish. + :param period: (float) mean EOD time difference. + :param time_eod: (array) time for the mean EOD plot. + :param mean_eod: (array) mean array of EODs + :param std_eod: (array) standard deviation array of EODs + :param unit: (string) unit of the trace and the mean EOD + :param psd_data: (array) power spectrum of the analysed data for different frequency resolutions. + """ + fig = plt.figure(facecolor='white', figsize=(18., 10.)) + ax1 = plt.subplot2grid((5, 6), (0, 0), colspan=6) # title + ax2 = plt.subplot2grid((5, 6), (1, 0), rowspan=2, colspan=3) # trace + ax3 = plt.subplot2grid((5, 6), (1, 3), rowspan=2, colspan=3) # psd + ax4 = plt.subplot2grid((5, 6), (3, 0), rowspan=2, colspan=2) # mean eod + ax5 = plt.subplot2grid((5, 6), (3, 2), rowspan=2, colspan=2) # meta data + ax6 = plt.subplot2grid((5, 6), (3, 4), rowspan=2, colspan=2) # inter EOD histogram + + # plot title + filename = audio_file.split('/')[-1] + ax1.text(0.5, 0.5, 'Thunderfish: %s' % filename, fontsize=30, horizontalalignment='center') + ax1.set_frame_on(False) + ax1.get_xaxis().set_visible(False) + ax1.get_yaxis().set_visible(False) + ############ + + # plot trace + time = np.arange(len(raw_data)) / samplerate + ax2.plot(time[:idx0], raw_data[:idx0], color='blue') + ax2.plot(time[idx1:], raw_data[idx1:], color='blue') + ax2.plot(time[idx0:idx1], raw_data[idx0:idx1], color='red', label='analysis window') + ax2.set_xlabel('Time [sec]') + ax2.set_ylabel('Amplitude [a.u.]') + ax2.legend(loc='upper right', frameon=False) + ############ + + # plot psd + ps.plot_decibel_psd(psd_data[0][0], psd_data[0][1], ax3, fs=12) + if not pulse_fish_width and not pulse_fish_psd: + cf.consistent_fishes_psd_plot(filtered_fishlist, ax=ax3) + ########## + + # plot mean EOD + ea.eod_waveform_plot(time_eod, mean_eod, std_eod, ax4, unit=unit) + if not pulse_fish_width and not pulse_fish_psd: + ax4.set_xlim([-600 * period, 600 * period]) # half a period in milliseconds + else: + ax4.set_xlim([-100 * period, 100 * period]) # half a period in milliseconds + # TODO: make xlim dependent on fish type! + ############### + + # plot meta data + try: + dom_freq = filtered_fishlist[np.argsort([filtered_fishlist[fish][0][1] for fish in range(len(filtered_fishlist))])[-1]][0][0] + fish_count = len(filtered_fishlist) + except IndexError: + dom_freq = 1./ period + fish_count = 1 + + ax5.set_frame_on(False) + ax5.get_xaxis().set_visible(False) + ax5.get_yaxis().set_visible(False) + + fishtype = 'pulse' if pulse_fish_width and pulse_fish_psd else 'wave' + ax5.text(0.1, 0.9, 'fishtype:', fontsize=14) + ax5.text(0.6, 0.9, '%s' %fishtype, fontsize=14) + + ax5.text(0.1, 0.7, '# detected fish:', fontsize=14) + ax5.text(0.6, 0.7, '%.0f' % fish_count, fontsize=14) + + if fishtype is 'wave': + ax5.text(0.1, 0.5, 'dominant frequency:', fontsize=14) + ax5.text(0.6, 0.5, '%.1f Hz' % dom_freq, fontsize=14) + else: + ax5.text(0.1, 0.5, 'Mean pulse frequency:', fontsize=14) + ax5.text(0.6, 0.5, '%.1f Hz' % dom_freq, fontsize=14) + + ax5.text(0.1, 0.3, '# detected EODs:', fontsize=14) + ax5.text(0.6, 0.3, '%.0f' %EOD_count, fontsize=14) + + ax5.text(0.1, 0.1, 'mean EOD interval:', fontsize=14) + ax5.text(0.6, 0.1, '%.2f ms' %mean_IPI, fontsize=14) + + ################ + + # plot inter EOD interval histogram + n, edges = np.histogram(inter_eod_intervals, bins=100) + + ax6.bar(edges[:-1], n, edges[1]-edges[0]) + ax6.plot([mean_IPI, mean_IPI], [0, max(n)], '--', color= 'red', lw=2, label='mean') + ax6.plot([mean_IPI - std_IPI, mean_IPI - std_IPI], [0, max(n)], '--', color= 'green', lw=2, label='std') + ax6.plot([mean_IPI + std_IPI, mean_IPI + std_IPI], [0, max(n)], '--', color= 'green', lw=2) + ax6.set_xlabel('inter EOD interval [ms]') + ax6.set_ylabel('n') + ax6.legend(loc= 'upper right', frameon=False) + + # cosmetics + for ax in [ax2, ax3, ax4, ax6]: + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.get_xaxis().tick_bottom() + ax.get_yaxis().tick_left() + + plt.tight_layout() + + # TODO: plot result in pdf! + plt.show() + +def main(audio_file, channel=0, output_folder='', beat_plot=False, verbose=0): # get config dictionary cfg = ct.get_config_dict() @@ -29,16 +152,15 @@ def main(audio_file, channel=0, output_folder='', beat_plot=False, verbose=0): # calculate best_window: clip_win_size = 0.5 min_clip, max_clip = bw.clip_amplitudes(raw_data, int(clip_win_size * samplerate)) - data, clip = bw.best_window(raw_data, samplerate, single=True, win_size=4.0, - min_clip=min_clip, max_clip=max_clip) + idx0, idx1, clipped = bw.best_window_indices(raw_data, samplerate, single=True, win_size=8.0, min_clip=min_clip, + max_clip=max_clip, w_cv_ampl=10.0, th_factor=0.8) + data = raw_data[idx0:idx1] # pulse-type fish? pulse_fish_width, pta_value = chp.check_pulse_width(data, samplerate) # calculate powerspectrums with different frequency resolutions psd_data = ps.multi_resolution_psd(data, samplerate, fresolution=[0.5, 2 * 0.5, 4 * 0.5]) - ps.plot_decibel_psd(psd_data[0][0], psd_data[0][1], ax1, fs=12) - # TODO: plot harmonic groups in spectrum! # find the fishes in the different powerspectrums: fishlists = [] @@ -50,25 +172,23 @@ def main(audio_file, channel=0, output_folder='', beat_plot=False, verbose=0): pulse_fish_psd, proportion = chp.check_pulse_psd(psd_data[0][0], psd_data[0][1]) # filter the different fishlists to get a fishlist with consistent fishes: - if pulse_fish_width and not pulse_fish_psd: + if not pulse_fish_width and not pulse_fish_psd: filtered_fishlist = cf.consistent_fishes(fishlists) - cf.consistent_fishes_psd_plot(filtered_fishlist, ax=ax1) + else: + filtered_fishlist = [] # analyse eod waveform: - mean_eod, std_eod, time, eod_times = ea.eod_waveform(data, samplerate) + mean_eod, std_eod, time, eod_times = ea.eod_waveform(data, samplerate, th_factor=0.6) period = np.mean(np.diff(eod_times)) - # TODO: analyse it! - # TODO: plot inter-peak interval histogram for pulse and wave fish, annotate with mean, standard deviation and CV. - # plot waveform: - ea.eod_waveform_plot(time, mean_eod, std_eod, ax2, unit=unit) - ax2.set_xlim(-500*period, 500*period) # half a period in milliseconds - # TODO: make xlim dependent on fish type! + # inter-peal interval + inter_peak_intervals = np.diff(eod_times)* 1000. # in ms + mean_IPI = np.mean(inter_peak_intervals) + std_IPI = np.std(inter_peak_intervals, ddof=1) - plt.tight_layout() - - # TODO: plot result in pdf! - plt.show() + output_plot(audio_file, pulse_fish_width, pulse_fish_psd, len(eod_times), mean_IPI, + inter_peak_intervals, std_IPI, raw_data, samplerate, idx0, idx1, filtered_fishlist, period, time, + mean_eod, std_eod, unit, psd_data) if __name__ == '__main__':